Exemplo n.º 1
0
def test_request_docs_mutable_iterator():
    """To test the weak reference work in docs"""
    r = Request()
    r.request_type = 'index'
    for d in random_docs(10):
        r.docs.append(d)

    for idx, d in enumerate(r.docs):
        assert isinstance(d, Document)
        d.text = f'look I changed it! {idx}'

    # iterate it again should see the change
    doc_pointers = []
    for idx, d in enumerate(r.docs):
        assert isinstance(d, Document)
        assert d.text == f'look I changed it! {idx}'
        doc_pointers.append(d)

    # pb-lize it should see the change
    rpb = r.proto

    for idx, d in enumerate(rpb.index.docs):
        assert isinstance(d, DocumentProto)
        assert d.text == f'look I changed it! {idx}'

    # change again by following the pointers
    for d in doc_pointers:
        d.text = 'now i change it back'

    # iterate it again should see the change
    for idx, d in enumerate(rpb.index.docs):
        assert isinstance(d, DocumentProto)
        assert d.text == 'now i change it back'
Exemplo n.º 2
0
def test_request_extend_queryset():
    q1 = SliceQL(start=3, end=4)
    q2 = QueryLang(SliceQL(start=3, end=4, priority=1))
    q3 = jina_pb2.QueryLangProto()
    q3.name = 'SliceQL'
    q3.parameters['start'] = 3
    q3.parameters['end'] = 4
    q3.priority = 2
    r = Request()
    r.queryset.extend([q1, q2, q3])
    assert isinstance(r.queryset, Sequence)
    for idx, q in enumerate(r.queryset):
        assert q.priority == idx
        assert q.parameters['start'] == 3
        assert q.parameters['end'] == 4

    # q1 and q2 refer to the same
    assert len({id(q) for q in r.queryset}) == 2

    r2 = Request()
    r2.queryset.extend(r.queryset)
    assert len({id(q) for q in r2.queryset}) == 2

    r = Request()
    r.queryset.append(q1)
    r.queryset.append(q2)
    r.queryset.append(q3)
    for idx, q in enumerate(r.queryset):
        assert q.priority == idx
        assert q.parameters['start'] == 3
        assert q.parameters['end'] == 4

    with pytest.raises(TypeError):
        r.queryset.extend(1)
Exemplo n.º 3
0
def test_add_doc_to_type(typ, pb_typ):
    r = Request()
    r.request_type = typ
    for _ in range(10):
        r.docs.append(Document())
        r.groundtruths.append(Document())
    assert len(r.docs) == 10
    assert len(r.groundtruths) == 10
Exemplo n.º 4
0
def chunks(document_factory):
    req = Request()
    req.request_type = 'index'
    req.docs.extend([
        document_factory.create(1, 'test 1'),
        document_factory.create(2, 'test 1'),
        document_factory.create(3, 'test 3'),
    ])
    return req.proto.index.docs
Exemplo n.º 5
0
def test_empty_request_type(typ, pb_typ):
    r = Request()
    assert r.request_type is None
    with pytest.raises(ValueError):
        print(r.body)

    r.request_type = typ
    assert r._request_type == typ
    assert isinstance(r.body, pb_typ)
Exemplo n.º 6
0
def matches(document_factory):
    req = Request()
    req.request_type = 'index'
    req.docs.extend([
        document_factory.create(1, 'test 1'),
        document_factory.create(2, 'test 1'),
        document_factory.create(3, 'test 3')
    ])
    return req.as_pb_object.index.docs
Exemplo n.º 7
0
def eval_request():
    req = Request()
    req.request_type = 'search'
    # doc: 1
    # doc: 2
    # doc: 3
    # doc: 4
    # doc: 5 - will be missing from KV indexer
    for idx in range(5):
        dp = Document()
        dp.id = f'0{str(idx + 1)}' * 8
        req.docs.append(dp)
    return req
Exemplo n.º 8
0
def test_lazy_request_fields():
    reqs = (
        Request(r.SerializeToString(), EnvelopeProto())
        for r in request_generator(random_docs(10))
    )
    for r in reqs:
        assert list(r.DESCRIPTOR.fields_by_name.keys())
Exemplo n.º 9
0
def validate_texts(resp: Request, top_k: int = 10, **kwargs):
    try:
        from steps import StepItems
        resp_dict = resp.dict()
        task = 'index' if 'index' in resp_dict.keys() else 'search'
        timestamp_from_tags = float(
            resp_dict[task]['docs'][0]['tags']['timestamp'])
        _log_time_per_pod(routes=resp_dict['routes'],
                          timestamp=timestamp_from_tags,
                          state=StepItems.state,
                          task=task,
                          num_docs=len(resp_dict[task]['docs']),
                          **kwargs)

        for d in resp.search.docs:
            if len(d.matches) != top_k:
                logger.error(
                    f'Number of actual matches: {len(d.matches)} vs expected number: {top_k}'
                )
            for m in d.matches:
                if 'timestamp' not in m.tags.keys():
                    logger.error(f'timestamp not in tags: {m.tags}')
                # to test that the data from the KV store is retrieved
                if 'filename' not in m.tags.keys(
                ) and 'title' not in m.tags.keys():
                    logger.error(
                        f'did not find "filename/title" in tags: {m.tags}')
    except Exception as e:
        logger.exception(
            f'Got an exception during `validate_images`. Continuing (not raising)'
        )
Exemplo n.º 10
0
def test_pprint_routes(capfd):
    result = []
    r = jina_pb2.RouteProto()
    r.status.code = jina_pb2.StatusProto.ERROR
    r.status.exception.stacks.extend(['r1\nline1', 'r2\nline2'])
    result.append(r)
    r = jina_pb2.RouteProto()
    r.status.code = jina_pb2.StatusProto.ERROR_CHAINED
    r.status.exception.stacks.extend(['line1', 'line2'])
    result.append(r)
    r = jina_pb2.RouteProto()
    r.status.code = jina_pb2.StatusProto.SUCCESS
    result.append(r)
    rr = Request()
    rr.routes.extend(result)
    pprint_routes(rr)
    out, err = capfd.readouterr()
    assert '⚪' in out
    assert '🟢' in out
    assert 'Pod' in out
    assert 'Time' in out
    assert 'Exception' in out
    assert 'r1' in out
    assert 'line1r2' in out
    assert 'line2' in out
    assert 'line1line2' in out
Exemplo n.º 11
0
def test_lazy_append_access():
    reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in _generate(random_docs(10)))
    for r in reqs:
        assert not r.is_used
        # write access r.train
        r.docs.append(jina_pb2.DocumentProto())
        # now it is read
        assert r.is_used
Exemplo n.º 12
0
def test_lazy_nested_clear_access():
    reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10)))
    for r in reqs:
        assert not r.is_used
        # write access r.train
        r.index.ClearField('docs')
        # now it is read
        assert r.is_used
Exemplo n.º 13
0
def test_lazy_nest_access():
    reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10)))
    for r in reqs:
        assert not r.is_used
        # write access r.train
        r.docs[0].id = '1' * 16
        # now it is read
        assert r.is_used
        assert r.index.docs[0].id == '1' * 16
Exemplo n.º 14
0
def test_lazy_change_message_type():
    reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10)))
    for r in reqs:
        assert not r.is_used
        # write access r.train
        r.control.command = jina_pb2.RequestProto.ControlRequestProto.IDLE
        # now it is read
        assert r.is_used
        assert len(r.index.docs) == 0
Exemplo n.º 15
0
def test_lazy_append_access():
    reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10)))
    for r in reqs:
        assert not r.is_used
        r.request_type = 'index'
        # write access r.train
        r.docs.append(Document())
        # now it is read
        assert r.is_used
Exemplo n.º 16
0
def test_lazy_access(field):
    reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10)))
    for r in reqs:
        assert not r.is_used

        # access r.train
        print(getattr(r, field))

        # now it is read
        assert r.is_used
Exemplo n.º 17
0
def eval_request():
    num_docs = 10
    req = jina_pb2.RequestProto()
    for idx in range(num_docs):
        doc = Document(req.index.docs.add())
        gt = Document(req.index.groundtruths.add())
        chunk_doc = doc.chunks.new()
        chunk_gt = gt.chunks.new()
        chunk_doc.embedding = np.array([1, 1])
        chunk_gt.embedding = np.array([2, 2])
    return Request(req)
Exemplo n.º 18
0
def test_multiple_access():
    reqs = [Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10))]
    for r in reqs:
        assert not r.is_used
        assert r
        assert not r.is_used

    for r in reqs:
        assert not r.is_used
        assert r.index
        assert r.is_used
Exemplo n.º 19
0
def test_request_docs_chunks_mutable_iterator():
    """Test if weak reference work in nested docs"""
    r = Request()
    r.request_type = 'index'
    for d in random_docs(10):
        r.docs.append(d)

    for d in r.docs:
        assert isinstance(d, Document)
        for idx, c in enumerate(d.chunks):
            assert isinstance(d, Document)
            c.text = f'look I changed it! {idx}'

    # iterate it again should see the change
    doc_pointers = []
    for d in r.docs:
        assert isinstance(d, Document)
        for idx, c in enumerate(d.chunks):
            assert c.text == f'look I changed it! {idx}'
            doc_pointers.append(c)

    # pb-lize it should see the change
    rpb = r.as_pb_object

    for d in rpb.index.docs:
        assert isinstance(d, DocumentProto)
        for idx, c in enumerate(d.chunks):
            assert isinstance(c, DocumentProto)
            assert c.text == f'look I changed it! {idx}'

    # change again by following the pointers
    for d in doc_pointers:
        d.text = 'now i change it back'

    # iterate it again should see the change
    for d in rpb.index.docs:
        assert isinstance(d, DocumentProto)
        for c in d.chunks:
            assert c.text == 'now i change it back'
Exemplo n.º 20
0
def test_pprint_routes(capfd):
    result = []
    r = jina_pb2.RouteProto()
    r.status.code = jina_pb2.StatusProto.ERROR
    r.status.exception.stacks.extend(['r1\nline1', 'r2\nline2'])
    result.append(r)
    r = jina_pb2.RouteProto()
    r.status.code = jina_pb2.StatusProto.ERROR_CHAINED
    r.status.exception.stacks.extend(['line1', 'line2'])
    result.append(r)
    r = jina_pb2.RouteProto()
    r.status.code = jina_pb2.StatusProto.SUCCESS
    result.append(r)
    rr = Request()
    rr.routes.extend(result)
    pprint_routes(rr)
    out, err = capfd.readouterr()
    assert out == '''+-----+------+------------+
Exemplo n.º 21
0
 def request(field_type):
     num_docs = 10
     req = jina_pb2.RequestProto()
     for idx in range(num_docs):
         doc = req.index.docs.add()
         gt = req.index.groundtruths.add()
         chunk_doc = Document(doc.chunks.add())
         chunk_gt = Document(gt.chunks.add())
         chunk_doc.granularity = 1
         chunk_gt.granularity = 1
         if field_type == 'text':
             chunk_doc.text = 'aaa'
             chunk_gt.text = 'aaaa'
         elif field_type == 'buffer':
             chunk_doc.buffer = b'\x01\x02\x03'
             chunk_gt.buffer = b'\x01\x02\x03\x04'
         elif field_type == 'blob':
             chunk_doc.blob = np.array([1, 1, 1])
             chunk_gt.blob = np.array([1, 1, 1, 1])
     return Request(req).as_typed_request('index')
Exemplo n.º 22
0
import numpy as np
import pytest

from jina import Document, Request, QueryLang, NdArray
from jina.types.score import NamedScore
from jina.types.arrays import ChunkArray
from jina.types.arrays.match import MatchArray


@pytest.mark.parametrize(
    'obj',
    [
        Document(),
        Request(),
        QueryLang(),
        NamedScore(),
        NdArray(),
        MatchArray([Document()], Document()),
        ChunkArray([Document()], Document()),
    ],
)
def test_builtin_str_repr_no_content(obj):
    print(obj)
    print(f'{obj!r}')


@pytest.mark.parametrize(
    'obj',
    [
        Document(content='123', chunks=[Document(content='abc')]),
        QueryLang({
Exemplo n.º 23
0
def test_request_extend_queryset():
    q1 = SliceQL(start=3, end=4)
    q2 = QueryLang(SliceQL(start=3, end=4, priority=1))
    q3 = jina_pb2.QueryLangProto()
    q3.name = 'SliceQL'
    q3.parameters['start'] = 3
    q3.parameters['end'] = 4
    q3.priority = 2
    r = Request()
    r.extend_queryset([q1, q2, q3])
    for idx, q in enumerate(r.queryset):
        assert q.priority == idx
        assert q.parameters['start'] == 3
        assert q.parameters['end'] == 4

    r = Request()
    r.extend_queryset(q1)
    r.extend_queryset(q2)
    r.extend_queryset(q3)
    for idx, q in enumerate(r.queryset):
        assert q.priority == idx
        assert q.parameters['start'] == 3
        assert q.parameters['end'] == 4

    with pytest.raises(TypeError):
        r.extend_queryset(1)