Exemplo n.º 1
0
def _create_test_data_message(counter=0):
    req = list(request_generator('/', DocumentArray([Document(text=str(counter))])))[0]
    msg = Message(None, req, 'test', '123')
    return msg
Exemplo n.º 2
0
def input_docs():
    return DocumentArray([Document() for _ in range(50)])
Exemplo n.º 3
0
 def foo(self, docs: DocumentArray, **kwargs):
     docs.append(Document(text=str(self.shard_id)))
     return docs
Exemplo n.º 4
0
def da_and_dam(N):
    da = DocumentArray.empty(N)
    dam = DocumentArrayMemmap.empty(N)
    return da, dam
Exemplo n.º 5
0
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self._docs = DocumentArray()
     self.logger = JinaLogger('IndexExecutor')
Exemplo n.º 6
0
 def docs(self):
     return DocumentArray(list(random_docs(10)))
Exemplo n.º 7
0
def test_set_embeddings_multi_kind(array):
    da = DocumentArray([Document() for _ in range(10)])
    da.embeddings = array
Exemplo n.º 8
0
def test_da_get_embeddings_slice():
    da = DocumentArray(random_docs(100))
    np.testing.assert_almost_equal(
        da.get_attributes('embedding')[10:20], da._get_embeddings(slice(10, 20))
    )
Exemplo n.º 9
0
def docarray_for_cache():
    da = DocumentArray()
    d1 = Document(id=1)
    d2 = Document(id='2')
    da.extend([d1, d2])
    return da
Exemplo n.º 10
0
def test_traversal_path():
    da = DocumentArray([Document() for _ in range(6)])
    assert len(da) == 6

    da.traverse_flat(['r'])

    with pytest.raises(ValueError):
        da.traverse_flat('r')

    da.traverse(['r'])
    with pytest.raises(ValueError):
        for _ in da.traverse('r'):
            pass

    da.traverse(['r'])
    with pytest.raises(ValueError):
        for _ in da.traverse('r'):
            pass
Exemplo n.º 11
0
def test_da_get_embeddings():
    da = DocumentArray(random_docs(100))
    np.testing.assert_almost_equal(da.get_attributes('embedding'), da.embeddings)
Exemplo n.º 12
0
def docarray(docs):
    return DocumentArray(docs)
Exemplo n.º 13
0
def test_delete_by_id(docarray: DocumentArray, document_factory):
    doc = document_factory.create(4, 'test 4')
    docarray.append(doc)
    del docarray[doc.id]
    assert len(docarray) == 3
    assert docarray == docarray
Exemplo n.º 14
0
 def fake_reduce(self, **kwargs):
     return DocumentArray([Document(id='fake_document')])
Exemplo n.º 15
0
def _create_test_data_message(counter=0):
    return list(
        request_generator('/',
                          DocumentArray([Document(text=str(counter))])))[0]
Exemplo n.º 16
0
def test_blobs_getter_da():
    blobs = np.random.random((100, 10, 10))
    da = DocumentArray([Document(blob=blob) for blob in blobs])
    assert len(da) == 100
    np.testing.assert_almost_equal(da.get_attributes('blob'), da.blobs)
Exemplo n.º 17
0
 def foo(self, docs, **kwargs):
     self._count += 1
     current_count = self._count
     if current_count % 2 == 0:
         time.sleep(0.1)
     return DocumentArray([Document(text=str(current_count))])
Exemplo n.º 18
0
 def index(self, docs: DocumentArray, **kwargs):
     self.db.insert_multiple(docs.get_attributes('tags'))
Exemplo n.º 19
0
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self._docs = DocumentArray()
     self.logger = JinaLogger('KeyValueDBMSIndexer')
Exemplo n.º 20
0
 def filter(self, docs: DocumentArray, **kwargs):
     filtered_docs = DocumentArray(d for d in docs.traverse_flat(['c'])
                                   if d.mime_type == 'text/plain')
     return filtered_docs
Exemplo n.º 21
0
def segmenter_doc_array():
    inputs = [
        Document(tags={'caption': 'hello', 'image': '1.jpg'}),
        Document(tags={'caption': 'world', 'image': '2.jpg'}),
    ]
    return DocumentArray(inputs)
Exemplo n.º 22
0
    def craft(self, docs, *args, **kwargs):
        tmp_dir = os.environ.get('TEST_EVAL_FLOW_TMPDIR')
        with open(f'{tmp_dir}/{self.tag}.txt', 'a') as fp:
            fp.write(f'{docs[0].id}')
        return None


class DummyEvaluator2(DummyEvaluator1):
    tag = 2


class DummyEvaluator3(DummyEvaluator1):
    tag = 3


docs = DocumentArray([x for x in random_docs(1)])
params = ['HANG', 'REMOVE', 'COLLECT']


def validate(ids, expect):
    assert len(ids) > 0
    for j in ids:
        tmp_dir = os.environ.get('TEST_EVAL_FLOW_TMPDIR')
        fname = f'{tmp_dir}/{j}.txt'
        assert os.path.exists(fname) == expect
        if expect:
            with open(fname) as fp:
                assert fp.read() != ''


@pytest.fixture
Exemplo n.º 23
0
 def foo(self, **kwargs):
     return DocumentArray([Document(), Document()])
Exemplo n.º 24
0
class CrudIndexer(Executor):
    """Simple indexer class """

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.logger = JinaLogger('CrudIndexer')
        self._docs = DocumentArray()
        self._dump_location = os.path.join(self.metas.workspace, 'docs')
        if os.path.exists(self._dump_location):
            self._docs = DocumentArray.load(self._dump_location)
            self.logger.info(f'Loaded {len(self._docs)} from {self._dump_location}')
        else:
            self.logger.info(f'No data found at {self._dump_location}')

    @requests(on='/index')
    def index(self, docs: 'DocumentArray', **kwargs):
        self._docs.extend(docs)

    @requests(on='/update')
    def update(self, docs: 'DocumentArray', **kwargs):
        self.delete(docs)
        self.index(docs)

    def close(self) -> None:
        self.logger.info(f'Dumping {len(self._docs)} to {self._dump_location}')
        self._docs.save(self._dump_location)

    @requests(on='/delete')
    def delete(self, docs: 'DocumentArray', **kwargs):
        # TODO we can do del _docs[d.id] once
        # tests.unit.types.arrays.test_documentarray.test_delete_by_id is fixed
        ids_to_delete = [d.id for d in docs]
        idx_to_delete = []
        for i, doc in enumerate(self._docs):
            if doc.id in ids_to_delete:
                idx_to_delete.append(i)
        for i in sorted(idx_to_delete, reverse=True):
            del self._docs[i]

    @requests(on='/search')
    def search(self, docs: 'DocumentArray', parameters: Dict, **kwargs):
        top_k = int(parameters.get('top_k', 1))
        a = np.stack(docs.get_attributes('embedding'))
        b = np.stack(self._docs.get_attributes('embedding'))
        q_emb = _ext_A(_norm(a))
        d_emb = _ext_B(_norm(b))
        dists = _cosine(q_emb, d_emb)
        idx, dist = self._get_sorted_top_k(dists, top_k)
        for _q, _ids, _dists in zip(docs, idx, dist):
            for _id, _dist in zip(_ids, _dists):
                d = Document(self._docs[int(_id)], copy=True)
                d.scores['cosine'] = 1 - _dist
                _q.matches.append(d)

    @staticmethod
    def _get_sorted_top_k(
        dist: 'np.array', top_k: int
    ) -> Tuple['np.ndarray', 'np.ndarray']:
        if top_k >= dist.shape[1]:
            idx = dist.argsort(axis=1)[:, :top_k]
            dist = np.take_along_axis(dist, idx, axis=1)
        else:
            idx_ps = dist.argpartition(kth=top_k, axis=1)[:, :top_k]
            dist = np.take_along_axis(dist, idx_ps, axis=1)
            idx_fs = dist.argsort(axis=1)
            idx = np.take_along_axis(idx_ps, idx_fs, axis=1)
            dist = np.take_along_axis(dist, idx_fs, axis=1)

        return idx, dist
Exemplo n.º 25
0
 def __init__(self, **kwargs):
     super().__init__(**kwargs)
     self._docs = DocumentArray()
Exemplo n.º 26
0
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     if os.path.exists(self.save_path):
         self._docs = DocumentArray.load(self.save_path)
     else:
         self._docs = DocumentArray()
Exemplo n.º 27
0
def test_pca_plot_generated(embeddings, tmpdir):
    doc_array = DocumentArray([Document(embedding=x) for x in embeddings])
    file_path = os.path.join(tmpdir, 'pca_plot.png')
    doc_array.visualize(output=file_path)
    assert os.path.exists(file_path)
Exemplo n.º 28
0
 def status(self, **kwargs):
     # returns ids of all docs in tags
     return DocumentArray(Document(tags={'ids': self.docs[:, 'id']}))
Exemplo n.º 29
0
 def no_polling(self, docs: DocumentArray, **kwargs):
     docs.append(Document(text='added'))
     return docs
Exemplo n.º 30
0
def test_input_lines_with_empty_filepath_and_lines():
    with pytest.raises(ValueError):
        lines = DocumentArray.from_lines(lines=None, filepath=None)
        for _ in lines:
            pass