示例#1
0
    def validate_fn(resp):
        assert len(resp.search.docs) == 1
        doc = resp.search.docs[0]
        assert doc.id == 1
        assert len(doc.chunks) == 3

        chunk0 = doc.chunks[0]
        assert chunk0.id == 10
        assert chunk0.text == text
        np.testing.assert_almost_equal(random_np_array,
                                       pb2array(chunk0.embedding))

        chunk1 = doc.chunks[1]
        assert chunk1.id == 20
        np.testing.assert_almost_equal(random_np_array, pb2array(chunk1.blob))

        chunk2 = doc.chunks[2]
        assert chunk2.id == 30
        assert chunk2.buffer == buffer

        assert len(doc.matches) == 3

        match0 = doc.matches[0]
        assert match0.id == 10
        assert match0.text == text
        np.testing.assert_almost_equal(random_np_array,
                                       pb2array(match0.embedding))

        match1 = doc.matches[1]
        assert match1.id == 20
        np.testing.assert_almost_equal(random_np_array, pb2array(match1.blob))

        match2 = doc.matches[2]
        assert match2.id == 30
        assert match2.buffer == buffer
示例#2
0
    def test_request_generate_numpy_arrays_iterator(self):

        input_array = np.random.random([10, 10])

        def generator():
            for array in input_array:
                yield array

        req = _generate(data=generator(), batch_size=5)

        request = next(req)
        self.assertEqual(len(request.index.docs), 5)
        for index, doc in enumerate(request.index.docs, 1):
            self.assertEqual(doc.id, index)
            self.assertEqual(doc.length, 5)
            self.assertEqual(doc.granularity, 0)
            self.assertEqual(pb2array(doc.blob).shape, (10, ))
            self.assertEqual(doc.blob.shape, [10])

        request = next(req)
        self.assertEqual(len(request.index.docs), 5)
        for index, doc in enumerate(request.index.docs, 1):
            self.assertEqual(doc.id, 5 + index)
            self.assertEqual(doc.length, 5)
            self.assertEqual(doc.granularity, 0)
            self.assertEqual(pb2array(doc.blob).shape, (10, ))
            self.assertEqual(doc.blob.shape, [10])
示例#3
0
    def test_direct_concat(self):
        doc1, doc2 = input_fn()

        t1 = np.concatenate(
            [pb2array(doc1.embedding),
             pb2array(doc2.embedding)], axis=0)
        doc1.embedding.buffer += doc2.embedding.buffer
        doc1.embedding.shape[0] += doc2.embedding.shape[0]
        t2 = pb2array(doc1.embedding)

        np.testing.assert_almost_equal(t1, t2)
示例#4
0
def test_multimodal_driver_with_shuffled_order(simple_multimodal_driver, mock_multimodal_encoder_shuffled,
                                               doc_with_multimodal_chunks):
    simple_multimodal_driver.attach(executor=mock_multimodal_encoder_shuffled, pea=None)
    simple_multimodal_driver._apply_all([doc_with_multimodal_chunks])
    doc = doc_with_multimodal_chunks
    assert len(doc.chunks) == 3
    visual1 = doc.chunks[2]
    visual2 = doc.chunks[0]
    textual = doc.chunks[1]
    control = np.concatenate([pb2array(visual2.embedding), pb2array(textual.embedding), pb2array(visual1.embedding)])
    test = pb2array(doc.embedding)
    np.testing.assert_array_equal(control, test)
示例#5
0
 def validate(req):
     assert len(req.docs) == 2
     assert req.docs[0].embedding.shape == [e1.shape[0] * 2]
     assert req.docs[1].embedding.shape == [e3.shape[0] * 2]
     assert req.docs[0].chunks[0].embedding.shape == [e2.shape[0] * 2]
     assert req.docs[1].chunks[0].embedding.shape == [e4.shape[0] * 2]
     np.testing.assert_almost_equal(pb2array(req.docs[0].embedding),
                                    np.concatenate([e1, e1], axis=0),
                                    decimal=4)
     np.testing.assert_almost_equal(pb2array(
         req.docs[0].chunks[0].embedding),
                                    np.concatenate([e2, e2], axis=0),
                                    decimal=4)
示例#6
0
文件: test_quant.py 项目: vcaca/jina
def get_output(req):
    np.random.seed(rseed)

    err = 0
    for d in req.docs:
        recv = pb2array(d.embedding)
        send = np.random.random([embed_dim])
        err += np.sum(np.abs(recv - send)) / embed_dim
        for c in d.chunks:
            recv = pb2array(c.embedding)
            send = np.random.random([embed_dim])
            err += np.sum(np.abs(recv - send)) / embed_dim

    print(f'reconstruction error: {err / num_docs:.6f}')
示例#7
0
 def validate(req):
     self.assertEqual(len(req.docs), 2)
     self.assertEqual(req.docs[0].embedding.shape, [e1.shape[0] * 2])
     self.assertEqual(req.docs[1].embedding.shape, [e3.shape[0] * 2])
     self.assertEqual(req.docs[0].chunks[0].embedding.shape,
                      [e2.shape[0] * 2])
     self.assertEqual(req.docs[1].chunks[0].embedding.shape,
                      [e4.shape[0] * 2])
     np.testing.assert_almost_equal(pb2array(req.docs[0].embedding),
                                    np.concatenate([e1, e1], axis=0),
                                    decimal=4)
     np.testing.assert_almost_equal(pb2array(
         req.docs[0].chunks[0].embedding),
                                    np.concatenate([e2, e2], axis=0),
                                    decimal=4)
示例#8
0
def test_array2pb():
    # i don't understand why is this set?
    # os env should be available to that process-context only
    if 'JINA_ARRAY_QUANT' in os.environ:
        print(f'quant is on: {os.environ["JINA_ARRAY_QUANT"]}')
        del os.environ['JINA_ARRAY_QUANT']

    np.testing.assert_almost_equal(pb2array(array2pb(e4)), e4)
示例#9
0
def print_embed(req):
    for d in req.docs:
        for c in d.chunks:
            embed = pb2array(c.embedding)
            text = colored(f'{c.text[:10]}...' if len(c.text) > 10 else c.text,
                           'blue')
            print(
                f'{text} embed to {embed.shape} [{embed[0]:.3f}, {embed[1]:.3f}...]'
            )
示例#10
0
def test_request_generate_numpy_arrays():
    input_array = np.random.random([10, 10])

    req = _generate(data=input_array, batch_size=5)

    request = next(req)
    assert len(request.index.docs) == 5
    for index, doc in enumerate(request.index.docs, 1):
        assert doc.length == 5
        assert pb2array(doc.blob).shape == (10, )
        assert doc.blob.shape == [10]

    request = next(req)
    assert len(request.index.docs) == 5
    for index, doc in enumerate(request.index.docs, 1):
        assert doc.length == 5
        assert pb2array(doc.blob).shape == (10, )
        assert doc.blob.shape == [10]
示例#11
0
    def test_request_generate_numpy_arrays(self):

        input_array = np.random.random([10, 10])

        req = _generate(data=input_array, batch_size=5)

        request = next(req)
        self.assertEqual(len(request.index.docs), 5)
        for index, doc in enumerate(request.index.docs, 1):
            self.assertEqual(doc.length, 5)
            self.assertEqual(pb2array(doc.blob).shape, (10, ))
            self.assertEqual(doc.blob.shape, [10])

        request = next(req)
        self.assertEqual(len(request.index.docs), 5)
        for index, doc in enumerate(request.index.docs, 1):
            self.assertEqual(doc.length, 5)
            self.assertEqual(pb2array(doc.blob).shape, (10, ))
            self.assertEqual(doc.blob.shape, [10])
示例#12
0
    def validate_fn(resp):
        assert len(resp.search.docs) == 1
        doc = resp.search.docs[0]
        assert int(doc.tags['id']) == 1
        assert len(doc.chunks) == 3

        chunk0 = doc.chunks[0]
        assert int(chunk0.tags['id']) == 10
        assert chunk0.text == text
        np.testing.assert_almost_equal(random_np_array,
                                       pb2array(chunk0.embedding))

        chunk1 = doc.chunks[1]
        assert int(chunk1.tags['id']) == 20
        np.testing.assert_almost_equal(random_np_array, pb2array(chunk1.blob))

        chunk2 = doc.chunks[2]
        assert int(chunk2.tags['id']) == 30
        assert chunk2.buffer == buffer
示例#13
0
    def validate_fn(resp):
        assert len(resp.search.docs) == 1
        doc = resp.search.docs[0]
        assert int(doc.tags['id']) == 1
        assert len(doc.matches) == 3

        match0 = doc.matches[0]
        assert int(match0.tags['id']) == 10
        assert match0.text == text
        np.testing.assert_almost_equal(random_np_array,
                                       pb2array(match0.embedding))

        match1 = doc.matches[1]
        assert int(match1.tags['id']) == 20
        np.testing.assert_almost_equal(random_np_array, pb2array(match1.blob))

        match2 = doc.matches[2]
        assert int(match2.tags['id']) == 30
        assert match2.buffer == buffer
示例#14
0
def test_vectorsearch_driver_mock_indexer_with_matches_on_chunks():
    driver = SimpleKVSearchDriver(traversal_paths=('cm', ))
    executor = MockIndexer()
    driver.attach(executor=executor, pea=None)
    doc = create_document_to_search_with_matches_on_chunks()

    driver._traverse_apply([doc])

    assert len(doc.chunks) == 1
    chunk = doc.chunks[0]
    assert len(chunk.matches) == 3
    for match in chunk.matches:
        assert match.embedding.buffer != b''
        embedding_array = pb2array(match.embedding)
        np.testing.assert_equal(embedding_array, np.array([int(match.id)]))
示例#15
0
def test_vectorsearch_driver_mock_indexer_with_matches_on_chunks():
    driver = SimpleKVSearchDriver(granularity_range=[1, 1], adjacency_range=[0, 1])
    executor = MockIndexer()
    driver.attach(executor=executor, pea=None)
    doc = create_document_to_search_with_matches_on_chunks()

    driver._traverse_apply([doc])

    assert len(doc.chunks) == 1
    chunk = doc.chunks[0]
    assert len(chunk.matches) == 3 # 2 missed
    for match in chunk.matches:
        assert match.embedding.buffer != b''
        embedding_array = pb2array(match.embedding)
        np.testing.assert_equal(embedding_array, np.array([match.id]))
示例#16
0
    def test_vectorsearch_driver_mock_indexer(self):
        doc = create_document_to_search()
        driver = SimpleKVSearchDriver(top_k=2)
        executor = MockIndexer()
        driver.attach(executor=executor, pea=None)

        self.assertEqual(len(doc.chunks), 5)
        for chunk in doc.chunks:
            self.assertEqual(chunk.embedding.buffer, b'')

        driver._apply_all(doc.chunks)

        # chunk idx: 5 had no matched and is removed as missing idx
        self.assertEqual(len(doc.chunks), 4)
        for chunk in doc.chunks:
            self.assertNotEqual(chunk.embedding.buffer, b'')
            embedding_array = pb2array(chunk.embedding)
            np.testing.assert_equal(embedding_array, np.array([chunk.id]))
示例#17
0
def test_vectorsearch_driver_mock_indexer_traverse_apply():
    doc = create_document_to_search()
    driver = SimpleKVSearchDriver()

    executor = MockIndexer()
    driver.attach(executor=executor, pea=None)

    assert len(doc.chunks) == 5
    for chunk in doc.chunks:
        assert chunk.embedding.buffer == b''

    driver._traverse_apply(doc.chunks)

    # chunk idx: 5 had no matched and is removed as missing idx
    assert len(doc.chunks) == 4
    for chunk in doc.chunks:
        assert chunk.embedding.buffer != b''
        embedding_array = pb2array(chunk.embedding)
        np.testing.assert_equal(embedding_array, np.array([int(chunk.id)]))
示例#18
0
 def validate_response(resp):
     assert len(resp.index.docs) == NUM_DOCS
     for idx, doc in enumerate(resp.index.docs):
         np.testing.assert_almost_equal(
             pb2array(doc.embedding),
             np.array([idx, idx, idx, idx, idx, 3, 3, 4, 4]))
示例#19
0
def test_array_protobuf_conversions_with_quantize(quantize, type):
    random_array = np.random.rand(random.randrange(0, 50), random.randrange(0, 20)).astype(type)
    np.testing.assert_almost_equal(pb2array(array2pb(random_array, quantize)), random_array, decimal=2)
示例#20
0
def test_array_protobuf_conversions(type):
    random_array = np.random.rand(random.randrange(0, 50), random.randrange(0, 20)).astype(type)
    np.testing.assert_almost_equal(pb2array(array2pb(random_array, None)), random_array)