Exemplo n.º 1
0
 def __init__(self, index_file_name: str, **kwargs):
     super().__init__(**kwargs)
     self.index_file_name = index_file_name
     if os.path.exists(self.save_path):
         self._docs = DocumentArray.load(self.save_path)
     else:
         self._docs = DocumentArray()
Exemplo n.º 2
0
 def __init__(self, dump_path: Optional[str] = None, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self.logger = JinaLogger('CompoundQueryExecutor')
     self._dump_path = dump_path
     if self._dump_path is not None and os.path.exists(self._dump_path):
         self._docs = DocumentArray.load(self._dump_path)
     else:
         self._docs = DocumentArray()
Exemplo n.º 3
0
 def __init__(self, dump_path=None, *args, **kwargs):
     super().__init__(*args, **kwargs)
     # backwards compatibility
     assert 'dump_path' in kwargs['runtime_args'].keys()
     if dump_path is not None:
         shard_id = getattr(self.runtime_args, 'pea_id', None)
         shard_dump_path = os.path.join(dump_path, f'{shard_id}.ndjson')
         self._docs = DocumentArray.load(shard_dump_path)
     else:
         self._docs = DocumentArray()
Exemplo n.º 4
0
 def __init__(self, **kwargs):
     super().__init__(**kwargs)
     self.logger = JinaLogger('CrudIndexer')
     self._docs = DocumentArray()
     self._dump_location = os.path.join(self.metas.workspace, 'docs')
     if os.path.exists(self._dump_location):
         self._docs = DocumentArray.load(self._dump_location)
         self.logger.info(f'Loaded {len(self._docs)} from {self._dump_location}')
     else:
         self.logger.info(f'No data found at {self._dump_location}')
Exemplo n.º 5
0
    def __init__(self, metas, **kwargs):
        super().__init__(**kwargs)
        dump_path = kwargs['runtime_args'].get('dump_path', None)

        if dump_path is not None:
            shard_id = getattr(self.runtime_args, 'pea_id', None)
            shard_dump_path = os.path.join(dump_path, f'{shard_id}.ndjson')
            self._docs = DocumentArray.load(shard_dump_path)
        else:
            self._docs = DocumentArray()
Exemplo n.º 6
0
 def __init__(self, dump_path: Optional[str] = None, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self.logger = JinaLogger('QueryExecutor')
     self._dump_path = dump_path or kwargs.get('runtime_args', {}).get(
         'dump_path', None)
     if self._dump_path is not None and os.path.exists(self._dump_path):
         self.logger.success(
             f'loading Executor from dump path: {self._dump_path}')
         self._docs = DocumentArray.load(self._dump_path)
     else:
         self.logger.warning(f'no dump path passed. Loading an empty index')
         self._docs = DocumentArray()
Exemplo n.º 7
0
def test_document_save_load(method, tmp_path):
    da1 = DocumentArray(random_docs(1000))
    da2 = DocumentArray()
    for doc in random_docs(10):
        da2.append(doc)
    for da in [da1, da2]:
        tmp_file = os.path.join(tmp_path, 'test')
        with TimeContext(f'w/{method}'):
            da.save(tmp_file, file_format=method)
        with TimeContext(f'r/{method}'):
            da_r = DocumentArray.load(tmp_file, file_format=method)
        assert len(da) == len(da_r)
        for d, d_r in zip(da, da_r):
            assert d.id == d_r.id
            np.testing.assert_equal(d.embedding, d_r.embedding)
            assert d.content == d_r.content
Exemplo n.º 8
0
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     if os.path.exists(self.save_path):
         self._docs = DocumentArray.load(self.save_path)
     else:
         self._docs = DocumentArray()