def __init__(self, index_file_name: str, **kwargs): super().__init__(**kwargs) self.index_file_name = index_file_name if os.path.exists(self.save_path): self._docs = DocumentArray.load(self.save_path) else: self._docs = DocumentArray()
def __init__(self, dump_path: Optional[str] = None, *args, **kwargs): super().__init__(*args, **kwargs) self.logger = JinaLogger('CompoundQueryExecutor') self._dump_path = dump_path if self._dump_path is not None and os.path.exists(self._dump_path): self._docs = DocumentArray.load(self._dump_path) else: self._docs = DocumentArray()
def __init__(self, dump_path=None, *args, **kwargs): super().__init__(*args, **kwargs) # backwards compatibility assert 'dump_path' in kwargs['runtime_args'].keys() if dump_path is not None: shard_id = getattr(self.runtime_args, 'pea_id', None) shard_dump_path = os.path.join(dump_path, f'{shard_id}.ndjson') self._docs = DocumentArray.load(shard_dump_path) else: self._docs = DocumentArray()
def __init__(self, **kwargs): super().__init__(**kwargs) self.logger = JinaLogger('CrudIndexer') self._docs = DocumentArray() self._dump_location = os.path.join(self.metas.workspace, 'docs') if os.path.exists(self._dump_location): self._docs = DocumentArray.load(self._dump_location) self.logger.info(f'Loaded {len(self._docs)} from {self._dump_location}') else: self.logger.info(f'No data found at {self._dump_location}')
def __init__(self, metas, **kwargs): super().__init__(**kwargs) dump_path = kwargs['runtime_args'].get('dump_path', None) if dump_path is not None: shard_id = getattr(self.runtime_args, 'pea_id', None) shard_dump_path = os.path.join(dump_path, f'{shard_id}.ndjson') self._docs = DocumentArray.load(shard_dump_path) else: self._docs = DocumentArray()
def __init__(self, dump_path: Optional[str] = None, *args, **kwargs): super().__init__(*args, **kwargs) self.logger = JinaLogger('QueryExecutor') self._dump_path = dump_path or kwargs.get('runtime_args', {}).get( 'dump_path', None) if self._dump_path is not None and os.path.exists(self._dump_path): self.logger.success( f'loading Executor from dump path: {self._dump_path}') self._docs = DocumentArray.load(self._dump_path) else: self.logger.warning(f'no dump path passed. Loading an empty index') self._docs = DocumentArray()
def test_document_save_load(method, tmp_path): da1 = DocumentArray(random_docs(1000)) da2 = DocumentArray() for doc in random_docs(10): da2.append(doc) for da in [da1, da2]: tmp_file = os.path.join(tmp_path, 'test') with TimeContext(f'w/{method}'): da.save(tmp_file, file_format=method) with TimeContext(f'r/{method}'): da_r = DocumentArray.load(tmp_file, file_format=method) assert len(da) == len(da_r) for d, d_r in zip(da, da_r): assert d.id == d_r.id np.testing.assert_equal(d.embedding, d_r.embedding) assert d.content == d_r.content
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) if os.path.exists(self.save_path): self._docs = DocumentArray.load(self.save_path) else: self._docs = DocumentArray()