def test_index_remote_rpi(test_workspace): f_args = set_gateway_parser().parse_args(['--host', '0.0.0.0']) def start_gateway(): with GatewayPod(f_args): time.sleep(3) t = mp.Process(target=start_gateway) t.daemon = True t.start() f = Flow(optimize_level=FlowOptimizeLevel.IGNORE_GATEWAY).add( uses=os.path.join(cur_dir, 'yaml/test-index-remote.yml'), parallel=3, separated_workspace=True, host='0.0.0.0', port_expose=random_port()) with f: f.index(input_fn=random_docs(1000))
def test_flow_topo_mixed(docker_image_built, _logforward): f = ( Flow() .add( name='d4', uses='docker://jinaai/jina:test-pip', entrypoint='jina executor', ) .add(name='d5', uses=_logforward) .add( name='d6', uses='docker://jinaai/jina:test-pip', needs='d4', entrypoint='jina executor', ) .join(['d6', 'd5']) ) with f: f.post(on='/index', inputs=random_docs(10))
def test_shards_insufficient_data(mocker, restful): """THIS IS SUPER IMPORTANT FOR TESTING SHARDS IF THIS FAILED, DONT IGNORE IT, DEBUG IT """ index_docs = 3 parallel = 4 mock = mocker.Mock() def validate(req): mock() assert len(req.docs) == 1 assert len(req.docs[0].matches) == index_docs for d in req.docs[0].matches: assert hasattr(d, 'weight') assert d.weight f = (Flow(restful=restful).add(name='doc_pb', uses=os.path.join(cur_dir, '../yaml/test-docpb.yml'), parallel=parallel)) with f: f.index(input_fn=random_docs(index_docs)) time.sleep(2) with f: pass time.sleep(2) f = (Flow(restful=restful).add(name='doc_pb', uses=os.path.join(cur_dir, '../yaml/test-docpb.yml'), parallel=parallel, polling='all', uses_after='_merge_chunks')) with f: f.search(input_fn=random_queries(1, index_docs), on_done=validate) time.sleep(2) rm_files(['test-docshard-tmp']) mock.assert_called_once()
def test_shards_insufficient_data(self): """THIS IS SUPER IMPORTANT FOR TESTING SHARDS IF THIS FAILED, DONT IGNORE IT, DEBUG IT """ index_docs = 3 parallel = 4 def validate(req): assert len(req.docs) == 1 assert len(req.docs[0].matches) == index_docs for d in req.docs[0].matches: self.assertTrue(hasattr(d, 'weight')) self.assertIsNotNone(d.weight) assert d.meta_info == b'hello world' f = Flow().add(name='doc_pb', uses=os.path.join(cur_dir, '../yaml/test-docpb.yml'), parallel=parallel, separated_workspace=True) with f: f.index(input_fn=random_docs(index_docs), random_doc_id=False) time.sleep(2) with f: pass time.sleep(2) f = Flow().add(name='doc_pb', uses=os.path.join(cur_dir, '../yaml/test-docpb.yml'), parallel=parallel, separated_workspace=True, polling='all', uses_reducing='_merge_all') with f: f.search(input_fn=random_queries(1, index_docs), random_doc_id=False, output_fn=validate, callback_on_body=True) time.sleep(2) self.add_tmpfile('test-docshard-tmp')
def test_lazy_msg_access_with_envelope(): envelope_proto = jina_pb2.EnvelopeProto() envelope_proto.compression.algorithm = 'NONE' envelope_proto.request_type = 'DataRequest' messages = [ Message( envelope_proto, r.SerializeToString(), ) for r in request_generator('/', random_docs(10)) ] for m in messages: assert not m.request.is_decompressed assert m.envelope assert len(m.dump()) == 3 assert not m.request.is_decompressed assert m.request._pb_body is None assert m.request._buffer is not None assert m.proto assert m.request.is_decompressed assert m.request._pb_body is not None assert m.request._buffer is None
def test_exec_fn_return_dict(mocker): encode_mock = mocker.Mock() class MyExecutor(BaseEncoder): def encode(self, id): encode_mock() return [{'id': 'hello'}] * len(id) exec = MyExecutor() bd = EncodeDriver() bd.attach(exec, runtime=None) docs = list(random_docs(10)) ds = DocumentSet(docs) bd._apply_all(ds) encode_mock.assert_called() for d in ds: assert d.id == 'hello'
def test_this_will_fail(): f = (Flow().add(name='a11', uses='DummySegment').add( name='a12', uses='DummySegment', needs='gateway').add(name='r1', uses='_merge_all', needs=['a11', 'a12']).add( name='a21', uses='DummySegment', needs='gateway').add( name='a22', uses='DummySegment', needs='gateway').add( name='r2', uses='_merge_all', needs=['a21', 'a22']).add(uses='_merge_all', needs=['r1', 'r2'])) with f: f.index(input_fn=random_docs(10, chunks_per_doc=0), output_fn=validate)
def test_extract_multi_fields(mocker): encode_mock = mocker.Mock() class MyExecutor(BaseEncoder): def encode(self, id, embedding): encode_mock() assert isinstance(id, list) assert isinstance(embedding, list) assert isinstance(id[0], str) assert isinstance(embedding[0], np.ndarray) exec = MyExecutor() bd = EncodeDriver() bd.attach(exec, runtime=None) docs = list(random_docs(10)) ds = DocumentArray(docs) bd._apply_all(ds) encode_mock.assert_called()
def test_flow_topo1(docker_image_built, _logforward): f = (Flow().add( name='d0', uses='docker://jinaai/jina:test-pip', uses_internal=_logforward, entrypoint='jina pod', ).add( name='d1', uses='docker://jinaai/jina:test-pip', uses_internal=_logforward, entrypoint='jina pod', ).add( name='d2', uses='docker://jinaai/jina:test-pip', uses_internal=_logforward, needs='d0', entrypoint='jina pod', ).join(['d2', 'd1'])) with f: f.index(inputs=random_docs(10))
def test_flow_topo_parallel(): f = ( Flow() .add( name='d7', uses='docker://jinaai/jina:test-pip', entrypoint='jina executor', parallel=3, ) .add(name='d8', parallel=3) .add( name='d9', uses='docker://jinaai/jina:test-pip', entrypoint='jina executor', needs='d7', ) .join(['d9', 'd8']) ) with f: f.post(on='/index', inputs=random_docs(10))
def test_this_should_work(mocker, restful): f = (Flow(restful=restful).add(name='a1').add( name='a11', uses='DummySegment', needs='a1').add(name='a12', uses='DummySegment', needs='a1').add( name='r1', uses='_merge_chunks', needs=['a11', 'a12']).add(name='a2', needs='gateway').add( name='a21', uses='DummySegment', needs='a2').add(name='a22', uses='DummySegment', needs='a2').add(name='r2', uses='_merge_chunks', needs=['a21', 'a22']).add( uses='_merge_chunks', needs=['r1', 'r2'])) response_mock = mocker.Mock(wrap=validate) with f: f.index(input_fn=random_docs(10, chunks_per_doc=0), on_done=response_mock) response_mock.assert_called()
def test_exec_fn_return_doc(mocker): encode_mock = mocker.Mock() class MyExecutor(BaseEncoder): def encode(self, id): encode_mock() return [Document(mime_type='image/png')] * len(id) exec = MyExecutor() bd = EncodeDriver() bd.attach(exec, runtime=None) docs = list(random_docs(10)) ds = DocumentArray(docs) bd._apply_all(ds) encode_mock.assert_called() for d in ds: assert d.mime_type == 'image/png'
def test_this_should_work(mocker, protocol): f = (Flow(protocol=protocol).add(name='a1').add( name='a11', uses='DummySegment', needs='a1').add(name='a12', uses='DummySegment', needs='a1').add( name='r1', uses=Merger, needs=['a11', 'a12']).add(name='a2', needs='gateway').add( name='a21', uses='DummySegment', needs='a2').add(name='a22', uses='DummySegment', needs='a2').add(name='r2', uses=Merger, needs=['a21', 'a22']).add( uses=Merger, needs=['r1', 'r2'])) response_mock = mocker.Mock() with f: f.index(inputs=random_docs(10, chunks_per_doc=0), on_done=response_mock) validate_callback(response_mock, validate)
def test_shards_insufficient_data(): """THIS IS SUPER IMPORTANT FOR TESTING SHARDS IF THIS FAILED, DONT IGNORE IT, DEBUG IT """ index_docs = 3 parallel = 4 def validate(req): assert len(req.docs) == 1 assert len(req.docs[0].matches) == index_docs for d in req.docs[0].matches: assert hasattr(d, 'weight') assert d.weight assert d.meta_info == b'hello world' f = Flow().add(name='doc_pb', uses=str(cur_dir.parent / 'yaml' / 'test-docpb.yml'), parallel=parallel, separated_workspace=True) with f: f.index(input_fn=random_docs(index_docs), override_doc_id=False) time.sleep(2) with f: pass time.sleep(2) f = Flow().add(name='doc_pb', uses=str(cur_dir.parent / 'yaml' / 'test-docpb.yml'), parallel=parallel, separated_workspace=True, polling='all', uses_after='_merge_all') with f: f.search(input_fn=random_queries(1, index_docs), override_doc_id=False, callback_on='body') time.sleep(2) rm_files(['test-docshard-tmp'])
def test_all_sync_clients(protocol, mocker): f = Flow(protocol=protocol).add(uses=MyExec) docs = list(random_docs(1000)) m1 = mocker.Mock() m2 = mocker.Mock() m3 = mocker.Mock() m4 = mocker.Mock() with f: c = Client( host='localhost', port=f.port, protocol=protocol, ) c.post('/', on_done=m1) c.post('/foo', docs, on_done=m2) c.post('/foo', on_done=m3) c.post('/foo', docs, parameters={'hello': 'world'}, on_done=m4) m1.assert_called_once() m2.assert_called() m3.assert_called_once() m4.assert_called()
def test_request_docs_chunks_mutable_iterator(): """Test if weak reference work in nested docs""" r = Request() r.request_type = 'index' for d in random_docs(10): r.docs.append(d) for d in r.docs: assert isinstance(d, Document) for idx, c in enumerate(d.chunks): assert isinstance(d, Document) c.text = f'look I changed it! {idx}' # iterate it again should see the change doc_pointers = [] for d in r.docs: assert isinstance(d, Document) for idx, c in enumerate(d.chunks): assert c.text == f'look I changed it! {idx}' doc_pointers.append(c) # pb-lize it should see the change rpb = r.proto for d in rpb.index.docs: assert isinstance(d, DocumentProto) for idx, c in enumerate(d.chunks): assert isinstance(c, DocumentProto) assert c.text == f'look I changed it! {idx}' # change again by following the pointers for d in doc_pointers: d.text = 'now i change it back' # iterate it again should see the change for d in rpb.index.docs: assert isinstance(d, DocumentProto) for c in d.chunks: assert c.text == 'now i change it back'
def test_get_set_item(tmpdir, idx1, idx99): dam = DocumentArrayMemmap(tmpdir) candidates = list(random_docs(100)) for d in candidates: d.id = f'id_{d.id}' dam.extend(candidates) dam[idx1] = Document(id='id_1', text='hello') assert len(dam) == 100 with pytest.raises(IndexError): dam[100] = Document(text='world') dam[idx99] = Document(id='id_99', text='world') assert len(dam) == 100 assert dam[1].text == 'hello' assert dam[99].text == 'world' assert dam['id_1'].text == 'hello' assert dam['id_99'].text == 'world' for idx, d in enumerate(dam): if idx == 1: assert d.text == 'hello' if idx == 99: assert d.text == 'world' with pytest.raises(ValueError): dam['unknown_new'] = Document()
def test_jina_document_to_pydantic_document(): document_proto_model = PROTO_TO_PYDANTIC_MODELS.DocumentProto for jina_doc in random_docs(num_docs=10): jina_doc = jina_doc.dict() pydantic_doc = document_proto_model(**jina_doc) assert jina_doc['text'] == pydantic_doc.text assert jina_doc['mime_type'] == pydantic_doc.mime_type assert (jina_doc['embedding']['dense']['shape'] == pydantic_doc.embedding.dense.shape) assert (jina_doc['embedding']['dense']['dtype'] == pydantic_doc.embedding.dense.dtype) for jina_doc_chunk, pydantic_doc_chunk in zip(jina_doc['chunks'], pydantic_doc.chunks): assert jina_doc_chunk['id'] == pydantic_doc_chunk.id assert jina_doc_chunk['tags'] == pydantic_doc_chunk.tags assert jina_doc_chunk['text'] == pydantic_doc_chunk.text assert jina_doc_chunk['mime_type'] == pydantic_doc_chunk.mime_type assert jina_doc_chunk['parent_id'] == pydantic_doc_chunk.parent_id assert jina_doc_chunk[ 'granularity'] == pydantic_doc_chunk.granularity
def test_binarypb_in_flow(test_metas, mocker): docs = list(random_docs(10)) def validate(req): assert len(docs) == len(req.docs) for d, d0 in zip(req.docs, docs): np.testing.assert_almost_equal(d.embedding, d0.embedding) f = Flow().add(uses='binarypb.yml') with f: f.index(docs) docs_no_embedding = copy.deepcopy(docs) for d in docs_no_embedding: d.ClearField('embedding') mock = mocker.Mock() with f: f.search(docs_no_embedding, on_done=mock) mock.assert_called_once() validate_callback(mock, validate)
def test_client_websocket(mocker, flow_with_websocket): with flow_with_websocket: time.sleep(0.5) client = Client( host='localhost', port=str(flow_with_websocket.port_expose), protocol='websocket', ) # Test that a regular index request triggers the correct callbacks on_always_mock = mocker.Mock() on_error_mock = mocker.Mock() on_done_mock = mocker.Mock() client.post( '', random_docs(1), request_size=1, on_always=on_always_mock, on_error=on_error_mock, on_done=on_done_mock, ) on_always_mock.assert_called_once() on_done_mock.assert_called_once() on_error_mock.assert_not_called()
def test_index_remote(self): f_args = set_gateway_parser().parse_args(['--allow-spawn']) def start_gateway(): with GatewayPod(f_args): time.sleep(20) t = mp.Process(target=start_gateway) t.daemon = True t.start() f = Flow().add(uses=os.path.join(cur_dir, 'yaml/test-index.yml'), parallel=3, separated_workspace=True, host='localhost', port_expose=f_args.port_expose) with f: f.index(input_fn=random_docs(1000)) time.sleep(3) for j in range(3): self.assertTrue(os.path.exists(f'test2-{j + 1}/test2.bin')) self.assertTrue(os.path.exists(f'test2-{j + 1}/tmp2')) self.add_tmpfile(f'test2-{j + 1}/test2.bin', f'test2-{j + 1}/tmp2', f'test2-{j + 1}')
def test_memmap_get_by_slice(tmpdir): def _assert_similar(da1, da2): for doc_a, doc_b in zip(da1, da2): assert doc_a.id == doc_b.id dam = DocumentArrayMemmap(tmpdir) candidates = list(random_docs(100)) for d in candidates: d.id = f'id_{d.id}' dam.extend(candidates) assert len(dam) == 100 first_10 = dam[:10] assert len(first_10) == 10 _assert_similar(candidates[:10], first_10) last_10 = dam[-10:] assert len(last_10) == 10 _assert_similar(candidates[-10:], last_10) out_of_bound_1 = dam[-101:-95] assert len(out_of_bound_1) == 5 _assert_similar(candidates[0:5], out_of_bound_1) out_of_bound_2 = dam[-101:101] assert len(out_of_bound_2) == 100 _assert_similar(candidates, out_of_bound_2) out_of_bound_3 = dam[95:101] assert len(out_of_bound_3) == 5 _assert_similar(candidates[95:], out_of_bound_3) assert len(dam[101:105]) == 0 assert len(dam[-105:-101]) == 0 assert len(dam[10:0]) == 0
def test_flow_topo1(docker_image_built): f = ( Flow() .add( name='d0', uses='docker://jinaai/jina:test-pip', entrypoint='jina executor', ) .add( name='d1', uses='docker://jinaai/jina:test-pip', entrypoint='jina executor', ) .add( name='d2', uses='docker://jinaai/jina:test-pip', needs='d0', entrypoint='jina executor', ) .join(['d2', 'd1']) ) with f: f.post(on='/index', inputs=random_docs(10))
def test_compression(compress_algo, low_bytes, high_ratio): no_comp_sizes = [] sizes = [] docs = list(random_docs(100, embed_dim=100)) kwargs = dict( identity='gateway', pod_name='123', compress_min_bytes=2 * sum(no_comp_sizes) if low_bytes else 0, compress_min_ratio=10 if high_ratio else 1, ) with TimeContext(f'no compress'): for r in request_generator(docs): m = Message(None, r, compress=CompressAlgo.NONE, **kwargs) m.dump() no_comp_sizes.append(m.size) kwargs = dict( identity='gateway', pod_name='123', compress_min_bytes=2 * sum(no_comp_sizes) if low_bytes else 0, compress_min_ratio=10 if high_ratio else 1, ) with TimeContext(f'compressing with {str(compress_algo)}') as tc: for r in request_generator(docs): m = Message(None, r, compress=compress_algo, **kwargs) m.dump() sizes.append(m.size) if compress_algo == CompressAlgo.NONE or low_bytes or high_ratio: assert sum(sizes) >= sum(no_comp_sizes) else: assert sum(sizes) < sum(no_comp_sizes) print( f'{str(compress_algo)}: size {sum(sizes) / len(sizes)} (ratio: {sum(no_comp_sizes) / sum(sizes):.2f}) with {tc.duration:.2f}s' )
def test_this_will_fail(mocker, restful): f = (Flow(restful=restful).add(name='a11', uses='DummySegment').add( name='a12', uses='DummySegment', needs='gateway').add( name='r1', uses='_merge_chunks', needs=['a11', 'a12']).add(name='a21', uses='DummySegment', needs='gateway').add( name='a22', uses='DummySegment', needs='gateway').add( name='r2', uses='_merge_chunks', needs=['a21', 'a22']).add(uses='_merge_chunks', needs=['r1', 'r2'])) response_mock = mocker.Mock() with f: f.index(inputs=random_docs(10, chunks_per_doc=0), on_done=response_mock) validate_callback(response_mock, validate)
def test_all_sync_clients(protocol, mocker): from jina import requests class MyExec(Executor): @requests def foo(self, docs, **kwargs): pass f = Flow(protocol=protocol).add(uses=MyExec) docs = list(random_docs(1000)) m1 = mocker.Mock() m2 = mocker.Mock() m3 = mocker.Mock() m4 = mocker.Mock() with f: f.post('/', on_done=m1) f.post('/foo', docs, on_done=m2) f.post('/foo', on_done=m3) f.post('/foo', docs, parameters={'hello': 'world'}, on_done=m4) m1.assert_called_once() m2.assert_called() m3.assert_called_once() m4.assert_called()
def test_binarypb_in_flow(test_metas, mocker): docs = list(random_docs(10)) def validate(req): assert len(docs) == len(req.docs) for d, d0 in zip(req.docs, docs): np.testing.assert_almost_equal( NdArray(d.embedding).value, NdArray(d0.embedding).value) f = Flow(callback_on='body').add(uses='binarypb.yml') response_mock = mocker.Mock(wrap=validate) with f: f.index(docs) docs_no_embedding = copy.deepcopy(docs) for d in docs_no_embedding: d.ClearField('embedding') with f: f.search(docs_no_embedding, on_done=response_mock) response_mock.assert_called()
def test_flow_with_one_container_pod(docker_image_built): f = (Flow() .add(name='dummyEncoder1', uses=img_name)) with f: f.index(input_fn=random_docs(10))
def test_lazy_request_fields(): reqs = (LazyRequest(r.SerializeToString(), Envelope()) for r in _generate(random_docs(10))) for r in reqs: assert list(r.DESCRIPTOR.fields_by_name.keys())
def test_simple_route(self): f = Flow().add(uses='_forward') with f: f.index(input_fn=random_docs(10))