def test_request_docs_mutable_iterator(): """To test the weak reference work in docs""" r = Request() r.request_type = 'index' for d in random_docs(10): r.docs.append(d) for idx, d in enumerate(r.docs): assert isinstance(d, Document) d.text = f'look I changed it! {idx}' # iterate it again should see the change doc_pointers = [] for idx, d in enumerate(r.docs): assert isinstance(d, Document) assert d.text == f'look I changed it! {idx}' doc_pointers.append(d) # pb-lize it should see the change rpb = r.proto for idx, d in enumerate(rpb.index.docs): assert isinstance(d, DocumentProto) assert d.text == f'look I changed it! {idx}' # change again by following the pointers for d in doc_pointers: d.text = 'now i change it back' # iterate it again should see the change for idx, d in enumerate(rpb.index.docs): assert isinstance(d, DocumentProto) assert d.text == 'now i change it back'
def test_request_extend_queryset(): q1 = SliceQL(start=3, end=4) q2 = QueryLang(SliceQL(start=3, end=4, priority=1)) q3 = jina_pb2.QueryLangProto() q3.name = 'SliceQL' q3.parameters['start'] = 3 q3.parameters['end'] = 4 q3.priority = 2 r = Request() r.queryset.extend([q1, q2, q3]) assert isinstance(r.queryset, Sequence) for idx, q in enumerate(r.queryset): assert q.priority == idx assert q.parameters['start'] == 3 assert q.parameters['end'] == 4 # q1 and q2 refer to the same assert len({id(q) for q in r.queryset}) == 2 r2 = Request() r2.queryset.extend(r.queryset) assert len({id(q) for q in r2.queryset}) == 2 r = Request() r.queryset.append(q1) r.queryset.append(q2) r.queryset.append(q3) for idx, q in enumerate(r.queryset): assert q.priority == idx assert q.parameters['start'] == 3 assert q.parameters['end'] == 4 with pytest.raises(TypeError): r.queryset.extend(1)
def test_add_doc_to_type(typ, pb_typ): r = Request() r.request_type = typ for _ in range(10): r.docs.append(Document()) r.groundtruths.append(Document()) assert len(r.docs) == 10 assert len(r.groundtruths) == 10
def chunks(document_factory): req = Request() req.request_type = 'index' req.docs.extend([ document_factory.create(1, 'test 1'), document_factory.create(2, 'test 1'), document_factory.create(3, 'test 3'), ]) return req.proto.index.docs
def test_empty_request_type(typ, pb_typ): r = Request() assert r.request_type is None with pytest.raises(ValueError): print(r.body) r.request_type = typ assert r._request_type == typ assert isinstance(r.body, pb_typ)
def matches(document_factory): req = Request() req.request_type = 'index' req.docs.extend([ document_factory.create(1, 'test 1'), document_factory.create(2, 'test 1'), document_factory.create(3, 'test 3') ]) return req.as_pb_object.index.docs
def eval_request(): req = Request() req.request_type = 'search' # doc: 1 # doc: 2 # doc: 3 # doc: 4 # doc: 5 - will be missing from KV indexer for idx in range(5): dp = Document() dp.id = f'0{str(idx + 1)}' * 8 req.docs.append(dp) return req
def test_lazy_request_fields(): reqs = ( Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10)) ) for r in reqs: assert list(r.DESCRIPTOR.fields_by_name.keys())
def validate_texts(resp: Request, top_k: int = 10, **kwargs): try: from steps import StepItems resp_dict = resp.dict() task = 'index' if 'index' in resp_dict.keys() else 'search' timestamp_from_tags = float( resp_dict[task]['docs'][0]['tags']['timestamp']) _log_time_per_pod(routes=resp_dict['routes'], timestamp=timestamp_from_tags, state=StepItems.state, task=task, num_docs=len(resp_dict[task]['docs']), **kwargs) for d in resp.search.docs: if len(d.matches) != top_k: logger.error( f'Number of actual matches: {len(d.matches)} vs expected number: {top_k}' ) for m in d.matches: if 'timestamp' not in m.tags.keys(): logger.error(f'timestamp not in tags: {m.tags}') # to test that the data from the KV store is retrieved if 'filename' not in m.tags.keys( ) and 'title' not in m.tags.keys(): logger.error( f'did not find "filename/title" in tags: {m.tags}') except Exception as e: logger.exception( f'Got an exception during `validate_images`. Continuing (not raising)' )
def test_pprint_routes(capfd): result = [] r = jina_pb2.RouteProto() r.status.code = jina_pb2.StatusProto.ERROR r.status.exception.stacks.extend(['r1\nline1', 'r2\nline2']) result.append(r) r = jina_pb2.RouteProto() r.status.code = jina_pb2.StatusProto.ERROR_CHAINED r.status.exception.stacks.extend(['line1', 'line2']) result.append(r) r = jina_pb2.RouteProto() r.status.code = jina_pb2.StatusProto.SUCCESS result.append(r) rr = Request() rr.routes.extend(result) pprint_routes(rr) out, err = capfd.readouterr() assert '⚪' in out assert '🟢' in out assert 'Pod' in out assert 'Time' in out assert 'Exception' in out assert 'r1' in out assert 'line1r2' in out assert 'line2' in out assert 'line1line2' in out
def test_lazy_append_access(): reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in _generate(random_docs(10))) for r in reqs: assert not r.is_used # write access r.train r.docs.append(jina_pb2.DocumentProto()) # now it is read assert r.is_used
def test_lazy_nested_clear_access(): reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10))) for r in reqs: assert not r.is_used # write access r.train r.index.ClearField('docs') # now it is read assert r.is_used
def test_lazy_nest_access(): reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10))) for r in reqs: assert not r.is_used # write access r.train r.docs[0].id = '1' * 16 # now it is read assert r.is_used assert r.index.docs[0].id == '1' * 16
def test_lazy_change_message_type(): reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10))) for r in reqs: assert not r.is_used # write access r.train r.control.command = jina_pb2.RequestProto.ControlRequestProto.IDLE # now it is read assert r.is_used assert len(r.index.docs) == 0
def test_lazy_append_access(): reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10))) for r in reqs: assert not r.is_used r.request_type = 'index' # write access r.train r.docs.append(Document()) # now it is read assert r.is_used
def test_lazy_access(field): reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10))) for r in reqs: assert not r.is_used # access r.train print(getattr(r, field)) # now it is read assert r.is_used
def eval_request(): num_docs = 10 req = jina_pb2.RequestProto() for idx in range(num_docs): doc = Document(req.index.docs.add()) gt = Document(req.index.groundtruths.add()) chunk_doc = doc.chunks.new() chunk_gt = gt.chunks.new() chunk_doc.embedding = np.array([1, 1]) chunk_gt.embedding = np.array([2, 2]) return Request(req)
def test_multiple_access(): reqs = [Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10))] for r in reqs: assert not r.is_used assert r assert not r.is_used for r in reqs: assert not r.is_used assert r.index assert r.is_used
def test_request_docs_chunks_mutable_iterator(): """Test if weak reference work in nested docs""" r = Request() r.request_type = 'index' for d in random_docs(10): r.docs.append(d) for d in r.docs: assert isinstance(d, Document) for idx, c in enumerate(d.chunks): assert isinstance(d, Document) c.text = f'look I changed it! {idx}' # iterate it again should see the change doc_pointers = [] for d in r.docs: assert isinstance(d, Document) for idx, c in enumerate(d.chunks): assert c.text == f'look I changed it! {idx}' doc_pointers.append(c) # pb-lize it should see the change rpb = r.as_pb_object for d in rpb.index.docs: assert isinstance(d, DocumentProto) for idx, c in enumerate(d.chunks): assert isinstance(c, DocumentProto) assert c.text == f'look I changed it! {idx}' # change again by following the pointers for d in doc_pointers: d.text = 'now i change it back' # iterate it again should see the change for d in rpb.index.docs: assert isinstance(d, DocumentProto) for c in d.chunks: assert c.text == 'now i change it back'
def test_pprint_routes(capfd): result = [] r = jina_pb2.RouteProto() r.status.code = jina_pb2.StatusProto.ERROR r.status.exception.stacks.extend(['r1\nline1', 'r2\nline2']) result.append(r) r = jina_pb2.RouteProto() r.status.code = jina_pb2.StatusProto.ERROR_CHAINED r.status.exception.stacks.extend(['line1', 'line2']) result.append(r) r = jina_pb2.RouteProto() r.status.code = jina_pb2.StatusProto.SUCCESS result.append(r) rr = Request() rr.routes.extend(result) pprint_routes(rr) out, err = capfd.readouterr() assert out == '''+-----+------+------------+
def request(field_type): num_docs = 10 req = jina_pb2.RequestProto() for idx in range(num_docs): doc = req.index.docs.add() gt = req.index.groundtruths.add() chunk_doc = Document(doc.chunks.add()) chunk_gt = Document(gt.chunks.add()) chunk_doc.granularity = 1 chunk_gt.granularity = 1 if field_type == 'text': chunk_doc.text = 'aaa' chunk_gt.text = 'aaaa' elif field_type == 'buffer': chunk_doc.buffer = b'\x01\x02\x03' chunk_gt.buffer = b'\x01\x02\x03\x04' elif field_type == 'blob': chunk_doc.blob = np.array([1, 1, 1]) chunk_gt.blob = np.array([1, 1, 1, 1]) return Request(req).as_typed_request('index')
import numpy as np import pytest from jina import Document, Request, QueryLang, NdArray from jina.types.score import NamedScore from jina.types.arrays import ChunkArray from jina.types.arrays.match import MatchArray @pytest.mark.parametrize( 'obj', [ Document(), Request(), QueryLang(), NamedScore(), NdArray(), MatchArray([Document()], Document()), ChunkArray([Document()], Document()), ], ) def test_builtin_str_repr_no_content(obj): print(obj) print(f'{obj!r}') @pytest.mark.parametrize( 'obj', [ Document(content='123', chunks=[Document(content='abc')]), QueryLang({
def test_request_extend_queryset(): q1 = SliceQL(start=3, end=4) q2 = QueryLang(SliceQL(start=3, end=4, priority=1)) q3 = jina_pb2.QueryLangProto() q3.name = 'SliceQL' q3.parameters['start'] = 3 q3.parameters['end'] = 4 q3.priority = 2 r = Request() r.extend_queryset([q1, q2, q3]) for idx, q in enumerate(r.queryset): assert q.priority == idx assert q.parameters['start'] == 3 assert q.parameters['end'] == 4 r = Request() r.extend_queryset(q1) r.extend_queryset(q2) r.extend_queryset(q3) for idx, q in enumerate(r.queryset): assert q.priority == idx assert q.parameters['start'] == 3 assert q.parameters['end'] == 4 with pytest.raises(TypeError): r.extend_queryset(1)