Пример #1
0
 def add_default_kwargs(kwargs: Dict):
     # TODO: refactor it into load from config file
     if ('top_k' in kwargs) and (kwargs['top_k'] is not None):
         # associate all VectorSearchDriver and SliceQL driver to use top_k
         from jina import QueryLang
         topk_ql = [
             QueryLang({
                 'name': 'SliceQL',
                 'priority': 1,
                 'parameters': {
                     'end': kwargs['top_k']
                 }
             }),
             QueryLang({
                 'name': 'VectorSearchDriver',
                 'priority': 1,
                 'parameters': {
                     'top_k': kwargs['top_k']
                 }
             })
         ]
         if 'queryset' not in kwargs:
             kwargs['queryset'] = topk_ql
         else:
             kwargs['queryset'].extend(topk_ql)
Пример #2
0
def test_topk_override(config, mocker):
    NDOCS = 3
    TOPK_OVERRIDE = 11

    def validate(resp):
        assert len(resp.search.docs) == NDOCS
        for doc in resp.search.docs:
            assert len(doc.matches) == TOPK_OVERRIDE

    # Making queryset
    top_k_queryset = QueryLang(
        {
            'name': 'VectorSearchDriver',
            'parameters': {'top_k': TOPK_OVERRIDE},
            'priority': 1,
        }
    )

    with Flow.load_config('flow.yml') as index_flow:
        index_flow.index(inputs=random_docs(100))

    mock = mocker.Mock()
    with Flow.load_config('flow.yml') as search_flow:
        search_flow.search(
            inputs=random_docs(NDOCS), on_done=mock, queryset=[top_k_queryset]
        )
    mock.assert_called_once()
    validate_callback(mock, validate)
Пример #3
0
def test_request_extend_queryset():
    q1 = SliceQL(start=3, end=4)
    q2 = QueryLang(SliceQL(start=3, end=4, priority=1))
    q3 = jina_pb2.QueryLangProto()
    q3.name = 'SliceQL'
    q3.parameters['start'] = 3
    q3.parameters['end'] = 4
    q3.priority = 2
    r = Request()
    r.queryset.extend([q1, q2, q3])
    assert isinstance(r.queryset, Sequence)
    for idx, q in enumerate(r.queryset):
        assert q.priority == idx
        assert q.parameters['start'] == 3
        assert q.parameters['end'] == 4

    # q1 and q2 refer to the same
    assert len({id(q) for q in r.queryset}) == 2

    r2 = Request()
    r2.queryset.extend(r.queryset)
    assert len({id(q) for q in r2.queryset}) == 2

    r = Request()
    r.queryset.append(q1)
    r.queryset.append(q2)
    r.queryset.append(q3)
    for idx, q in enumerate(r.queryset):
        assert q.priority == idx
        assert q.parameters['start'] == 3
        assert q.parameters['end'] == 4

    with pytest.raises(TypeError):
        r.queryset.extend(1)
Пример #4
0
def test_queryset_with_struct(random_workspace, mocker):
    total_docs = 4
    docs = []
    for doc_id in range(total_docs):
        doc = jina_pb2.DocumentProto()
        doc.text = f'I am doc{doc_id}'
        NdArray(doc.embedding).value = np.array([doc_id])
        doc.tags['label'] = f'label{doc_id % 2 + 1}'
        docs.append(doc)

    f = (Flow()
         .add(uses='- !FilterQL | {lookups: {tags__label__in: [label1, label2]}, traversal_paths: [r]}'))

    def validate_all_docs(resp):
        assert len(resp.docs) == total_docs

    def validate_label2_docs(resp):
        assert len(resp.docs) == total_docs / 2

    mock1 = mocker.Mock()
    mock2 = mocker.Mock()
    with f:
        # keep all the docs
        f.index(docs, on_done=mock1)
        # keep only the docs with label2
        qs = QueryLang({'name': 'FilterQL', 'priority': 1, 'parameters': {'lookups': {'tags__label': 'label2'}, 'traversal_paths': ['r']}})
        f.index(docs, queryset=qs, on_done=mock2)

    mock1.assert_called_once()
    validate_callback(mock1, validate_all_docs)
    mock2.assert_called_once()
    validate_callback(mock2, validate_label2_docs)
Пример #5
0
def test_request_extend_queryset():
    q1 = SliceQL(start=3, end=4)
    q2 = QueryLang(SliceQL(start=3, end=4, priority=1))
    q3 = jina_pb2.QueryLangProto()
    q3.name = 'SliceQL'
    q3.parameters['start'] = 3
    q3.parameters['end'] = 4
    q3.priority = 2
    r = Request()
    r.extend_queryset([q1, q2, q3])
    for idx, q in enumerate(r.queryset):
        assert q.priority == idx
        assert q.parameters['start'] == 3
        assert q.parameters['end'] == 4

    r = Request()
    r.extend_queryset(q1)
    r.extend_queryset(q2)
    r.extend_queryset(q3)
    for idx, q in enumerate(r.queryset):
        assert q.priority == idx
        assert q.parameters['start'] == 3
        assert q.parameters['end'] == 4

    with pytest.raises(TypeError):
        r.extend_queryset(1)
Пример #6
0
def test_queryset_with_struct(random_workspace):
    total_docs = 4
    docs = []
    for doc_id in range(total_docs):
        doc = jina_pb2.DocumentProto()
        doc.text = f'I am doc{doc_id}'
        NdArray(doc.embedding).value = np.array([doc_id])
        doc.tags['label'] = f'label{doc_id % 2 + 1}'
        docs.append(doc)

    f = (Flow().add(
        uses=
        '- !FilterQL | {lookups: {tags__label__in: [label1, label2]}, traversal_paths: [r]}'
    ))

    def validate_all_docs(resp):
        assert len(resp.docs) == total_docs

    def validate_label2_docs(resp):
        assert len(resp.docs) == total_docs / 2

    with f:
        # keep all the docs
        f.index(docs, output_fn=validate_all_docs, callback_on='body')
        # keep only the docs with label2
        qs = QueryLang(
            FilterQL(priority=1,
                     lookups={'tags__label': 'label2'},
                     traversal_paths=['r']))
        f.index(docs,
                queryset=qs,
                output_fn=validate_label2_docs,
                callback_on='body')
Пример #7
0
def test_topk_override(config):
    # Making queryset
    top_k_queryset = QueryLang(
        VectorSearchDriver(top_k=int(os.environ['JINA_TOPK_OVERRIDE']),
                           priority=1))

    with Flow.load_config('flow.yml') as index_flow:
        index_flow.index(input_fn=random_docs(100))
    with Flow.load_config('flow.yml') as search_flow:
        search_flow.search(input_fn=random_docs(int(os.environ['JINA_NDOCS'])),
                           output_fn=validate_override_results,
                           queryset=[top_k_queryset])
Пример #8
0
def test_topk_override(config, mocker):
    NDOCS = 3
    TOPK_OVERRIDE = 11

    def validate(resp):
        mock()
        assert len(resp.search.docs) == NDOCS
        for doc in resp.search.docs:
            assert len(doc.matches) == TOPK_OVERRIDE

    # Making queryset
    top_k_queryset = QueryLang(
        VectorSearchDriver(top_k=TOPK_OVERRIDE, priority=1))

    with Flow.load_config('flow.yml') as index_flow:
        index_flow.index(input_fn=random_docs(100))

    mock = mocker.Mock()
    with Flow.load_config('flow.yml') as search_flow:
        search_flow.search(input_fn=random_docs(NDOCS),
                           on_done=validate,
                           queryset=[top_k_queryset])
    mock.assert_called_once()
Пример #9
0
import numpy as np
import pytest

from jina import Document, Request, QueryLang, NdArray
from jina.types.score import NamedScore
from jina.types.arrays import ChunkArray
from jina.types.arrays.match import MatchArray


@pytest.mark.parametrize(
    'obj',
    [
        Document(),
        Request(),
        QueryLang(),
        NamedScore(),
        NdArray(),
        MatchArray([Document()], Document()),
        ChunkArray([Document()], Document()),
    ],
)
def test_builtin_str_repr_no_content(obj):
    print(obj)
    print(f'{obj!r}')


@pytest.mark.parametrize(
    'obj',
    [
        Document(content='123', chunks=[Document(content='abc')]),
        QueryLang({
Пример #10
0
 def queryset(self):
     q = QueryLang()
     q.name = 'MockVectorSearchDriverWithQS'
     q.priority = 1
     q.parameters['top_k'] = 4
     return [q]