def validate_fn(resp): assert len(resp.search.docs) == 1 doc = resp.search.docs[0] assert doc.id == 1 assert len(doc.chunks) == 3 chunk0 = doc.chunks[0] assert chunk0.id == 10 assert chunk0.text == text np.testing.assert_almost_equal(random_np_array, pb2array(chunk0.embedding)) chunk1 = doc.chunks[1] assert chunk1.id == 20 np.testing.assert_almost_equal(random_np_array, pb2array(chunk1.blob)) chunk2 = doc.chunks[2] assert chunk2.id == 30 assert chunk2.buffer == buffer assert len(doc.matches) == 3 match0 = doc.matches[0] assert match0.id == 10 assert match0.text == text np.testing.assert_almost_equal(random_np_array, pb2array(match0.embedding)) match1 = doc.matches[1] assert match1.id == 20 np.testing.assert_almost_equal(random_np_array, pb2array(match1.blob)) match2 = doc.matches[2] assert match2.id == 30 assert match2.buffer == buffer
def test_request_generate_numpy_arrays_iterator(self): input_array = np.random.random([10, 10]) def generator(): for array in input_array: yield array req = _generate(data=generator(), batch_size=5) request = next(req) self.assertEqual(len(request.index.docs), 5) for index, doc in enumerate(request.index.docs, 1): self.assertEqual(doc.id, index) self.assertEqual(doc.length, 5) self.assertEqual(doc.granularity, 0) self.assertEqual(pb2array(doc.blob).shape, (10, )) self.assertEqual(doc.blob.shape, [10]) request = next(req) self.assertEqual(len(request.index.docs), 5) for index, doc in enumerate(request.index.docs, 1): self.assertEqual(doc.id, 5 + index) self.assertEqual(doc.length, 5) self.assertEqual(doc.granularity, 0) self.assertEqual(pb2array(doc.blob).shape, (10, )) self.assertEqual(doc.blob.shape, [10])
def test_direct_concat(self): doc1, doc2 = input_fn() t1 = np.concatenate( [pb2array(doc1.embedding), pb2array(doc2.embedding)], axis=0) doc1.embedding.buffer += doc2.embedding.buffer doc1.embedding.shape[0] += doc2.embedding.shape[0] t2 = pb2array(doc1.embedding) np.testing.assert_almost_equal(t1, t2)
def test_multimodal_driver_with_shuffled_order(simple_multimodal_driver, mock_multimodal_encoder_shuffled, doc_with_multimodal_chunks): simple_multimodal_driver.attach(executor=mock_multimodal_encoder_shuffled, pea=None) simple_multimodal_driver._apply_all([doc_with_multimodal_chunks]) doc = doc_with_multimodal_chunks assert len(doc.chunks) == 3 visual1 = doc.chunks[2] visual2 = doc.chunks[0] textual = doc.chunks[1] control = np.concatenate([pb2array(visual2.embedding), pb2array(textual.embedding), pb2array(visual1.embedding)]) test = pb2array(doc.embedding) np.testing.assert_array_equal(control, test)
def validate(req): assert len(req.docs) == 2 assert req.docs[0].embedding.shape == [e1.shape[0] * 2] assert req.docs[1].embedding.shape == [e3.shape[0] * 2] assert req.docs[0].chunks[0].embedding.shape == [e2.shape[0] * 2] assert req.docs[1].chunks[0].embedding.shape == [e4.shape[0] * 2] np.testing.assert_almost_equal(pb2array(req.docs[0].embedding), np.concatenate([e1, e1], axis=0), decimal=4) np.testing.assert_almost_equal(pb2array( req.docs[0].chunks[0].embedding), np.concatenate([e2, e2], axis=0), decimal=4)
def get_output(req): np.random.seed(rseed) err = 0 for d in req.docs: recv = pb2array(d.embedding) send = np.random.random([embed_dim]) err += np.sum(np.abs(recv - send)) / embed_dim for c in d.chunks: recv = pb2array(c.embedding) send = np.random.random([embed_dim]) err += np.sum(np.abs(recv - send)) / embed_dim print(f'reconstruction error: {err / num_docs:.6f}')
def validate(req): self.assertEqual(len(req.docs), 2) self.assertEqual(req.docs[0].embedding.shape, [e1.shape[0] * 2]) self.assertEqual(req.docs[1].embedding.shape, [e3.shape[0] * 2]) self.assertEqual(req.docs[0].chunks[0].embedding.shape, [e2.shape[0] * 2]) self.assertEqual(req.docs[1].chunks[0].embedding.shape, [e4.shape[0] * 2]) np.testing.assert_almost_equal(pb2array(req.docs[0].embedding), np.concatenate([e1, e1], axis=0), decimal=4) np.testing.assert_almost_equal(pb2array( req.docs[0].chunks[0].embedding), np.concatenate([e2, e2], axis=0), decimal=4)
def test_array2pb(): # i don't understand why is this set? # os env should be available to that process-context only if 'JINA_ARRAY_QUANT' in os.environ: print(f'quant is on: {os.environ["JINA_ARRAY_QUANT"]}') del os.environ['JINA_ARRAY_QUANT'] np.testing.assert_almost_equal(pb2array(array2pb(e4)), e4)
def print_embed(req): for d in req.docs: for c in d.chunks: embed = pb2array(c.embedding) text = colored(f'{c.text[:10]}...' if len(c.text) > 10 else c.text, 'blue') print( f'{text} embed to {embed.shape} [{embed[0]:.3f}, {embed[1]:.3f}...]' )
def test_request_generate_numpy_arrays(): input_array = np.random.random([10, 10]) req = _generate(data=input_array, batch_size=5) request = next(req) assert len(request.index.docs) == 5 for index, doc in enumerate(request.index.docs, 1): assert doc.length == 5 assert pb2array(doc.blob).shape == (10, ) assert doc.blob.shape == [10] request = next(req) assert len(request.index.docs) == 5 for index, doc in enumerate(request.index.docs, 1): assert doc.length == 5 assert pb2array(doc.blob).shape == (10, ) assert doc.blob.shape == [10]
def test_request_generate_numpy_arrays(self): input_array = np.random.random([10, 10]) req = _generate(data=input_array, batch_size=5) request = next(req) self.assertEqual(len(request.index.docs), 5) for index, doc in enumerate(request.index.docs, 1): self.assertEqual(doc.length, 5) self.assertEqual(pb2array(doc.blob).shape, (10, )) self.assertEqual(doc.blob.shape, [10]) request = next(req) self.assertEqual(len(request.index.docs), 5) for index, doc in enumerate(request.index.docs, 1): self.assertEqual(doc.length, 5) self.assertEqual(pb2array(doc.blob).shape, (10, )) self.assertEqual(doc.blob.shape, [10])
def validate_fn(resp): assert len(resp.search.docs) == 1 doc = resp.search.docs[0] assert int(doc.tags['id']) == 1 assert len(doc.chunks) == 3 chunk0 = doc.chunks[0] assert int(chunk0.tags['id']) == 10 assert chunk0.text == text np.testing.assert_almost_equal(random_np_array, pb2array(chunk0.embedding)) chunk1 = doc.chunks[1] assert int(chunk1.tags['id']) == 20 np.testing.assert_almost_equal(random_np_array, pb2array(chunk1.blob)) chunk2 = doc.chunks[2] assert int(chunk2.tags['id']) == 30 assert chunk2.buffer == buffer
def validate_fn(resp): assert len(resp.search.docs) == 1 doc = resp.search.docs[0] assert int(doc.tags['id']) == 1 assert len(doc.matches) == 3 match0 = doc.matches[0] assert int(match0.tags['id']) == 10 assert match0.text == text np.testing.assert_almost_equal(random_np_array, pb2array(match0.embedding)) match1 = doc.matches[1] assert int(match1.tags['id']) == 20 np.testing.assert_almost_equal(random_np_array, pb2array(match1.blob)) match2 = doc.matches[2] assert int(match2.tags['id']) == 30 assert match2.buffer == buffer
def test_vectorsearch_driver_mock_indexer_with_matches_on_chunks(): driver = SimpleKVSearchDriver(traversal_paths=('cm', )) executor = MockIndexer() driver.attach(executor=executor, pea=None) doc = create_document_to_search_with_matches_on_chunks() driver._traverse_apply([doc]) assert len(doc.chunks) == 1 chunk = doc.chunks[0] assert len(chunk.matches) == 3 for match in chunk.matches: assert match.embedding.buffer != b'' embedding_array = pb2array(match.embedding) np.testing.assert_equal(embedding_array, np.array([int(match.id)]))
def test_vectorsearch_driver_mock_indexer_with_matches_on_chunks(): driver = SimpleKVSearchDriver(granularity_range=[1, 1], adjacency_range=[0, 1]) executor = MockIndexer() driver.attach(executor=executor, pea=None) doc = create_document_to_search_with_matches_on_chunks() driver._traverse_apply([doc]) assert len(doc.chunks) == 1 chunk = doc.chunks[0] assert len(chunk.matches) == 3 # 2 missed for match in chunk.matches: assert match.embedding.buffer != b'' embedding_array = pb2array(match.embedding) np.testing.assert_equal(embedding_array, np.array([match.id]))
def test_vectorsearch_driver_mock_indexer(self): doc = create_document_to_search() driver = SimpleKVSearchDriver(top_k=2) executor = MockIndexer() driver.attach(executor=executor, pea=None) self.assertEqual(len(doc.chunks), 5) for chunk in doc.chunks: self.assertEqual(chunk.embedding.buffer, b'') driver._apply_all(doc.chunks) # chunk idx: 5 had no matched and is removed as missing idx self.assertEqual(len(doc.chunks), 4) for chunk in doc.chunks: self.assertNotEqual(chunk.embedding.buffer, b'') embedding_array = pb2array(chunk.embedding) np.testing.assert_equal(embedding_array, np.array([chunk.id]))
def test_vectorsearch_driver_mock_indexer_traverse_apply(): doc = create_document_to_search() driver = SimpleKVSearchDriver() executor = MockIndexer() driver.attach(executor=executor, pea=None) assert len(doc.chunks) == 5 for chunk in doc.chunks: assert chunk.embedding.buffer == b'' driver._traverse_apply(doc.chunks) # chunk idx: 5 had no matched and is removed as missing idx assert len(doc.chunks) == 4 for chunk in doc.chunks: assert chunk.embedding.buffer != b'' embedding_array = pb2array(chunk.embedding) np.testing.assert_equal(embedding_array, np.array([int(chunk.id)]))
def validate_response(resp): assert len(resp.index.docs) == NUM_DOCS for idx, doc in enumerate(resp.index.docs): np.testing.assert_almost_equal( pb2array(doc.embedding), np.array([idx, idx, idx, idx, idx, 3, 3, 4, 4]))
def test_array_protobuf_conversions_with_quantize(quantize, type): random_array = np.random.rand(random.randrange(0, 50), random.randrange(0, 20)).astype(type) np.testing.assert_almost_equal(pb2array(array2pb(random_array, quantize)), random_array, decimal=2)
def test_array_protobuf_conversions(type): random_array = np.random.rand(random.randrange(0, 50), random.randrange(0, 20)).astype(type) np.testing.assert_almost_equal(pb2array(array2pb(random_array, None)), random_array)