def test_request_extend_queryset(): q1 = SliceQL(start=3, end=4) q2 = QueryLang(SliceQL(start=3, end=4, priority=1)) q3 = jina_pb2.QueryLangProto() q3.name = 'SliceQL' q3.parameters['start'] = 3 q3.parameters['end'] = 4 q3.priority = 2 r = Request() r.extend_queryset([q1, q2, q3]) for idx, q in enumerate(r.queryset): assert q.priority == idx assert q.parameters['start'] == 3 assert q.parameters['end'] == 4 r = Request() r.extend_queryset(q1) r.extend_queryset(q2) r.extend_queryset(q3) for idx, q in enumerate(r.queryset): assert q.priority == idx assert q.parameters['start'] == 3 assert q.parameters['end'] == 4 with pytest.raises(TypeError): r.extend_queryset(1)
def test_request_extend_queryset(): q1 = SliceQL(start=3, end=4) q2 = QueryLang(SliceQL(start=3, end=4, priority=1)) q3 = jina_pb2.QueryLangProto() q3.name = 'SliceQL' q3.parameters['start'] = 3 q3.parameters['end'] = 4 q3.priority = 2 r = Request() r.queryset.extend([q1, q2, q3]) assert isinstance(r.queryset, Sequence) for idx, q in enumerate(r.queryset): assert q.priority == idx assert q.parameters['start'] == 3 assert q.parameters['end'] == 4 # q1 and q2 refer to the same assert len({id(q) for q in r.queryset}) == 2 r2 = Request() r2.queryset.extend(r.queryset) assert len({id(q) for q in r2.queryset}) == 2 r = Request() r.queryset.append(q1) r.queryset.append(q2) r.queryset.append(q3) for idx, q in enumerate(r.queryset): assert q.priority == idx assert q.parameters['start'] == 3 assert q.parameters['end'] == 4 with pytest.raises(TypeError): r.queryset.extend(1)
def test_slice_ql_on_matches_and_chunks(): docs = random_docs_with_chunks_and_matches(10) driver = SliceQL(start=0, end=2, traversal_paths=('cc', 'c', 'r', 'mm', 'm')) assert len(docs) == 10 assert len(docs[0].chunks) == 10 assert len(docs[-1].chunks) == 10 assert len(docs[0].matches) == 10 assert len(docs[-1].matches) == 10 assert len(docs[0].matches[0].chunks) == 10 assert len(docs[0].matches[-1].chunks) == 10 assert len(docs[-1].matches[0].chunks) == 10 assert len(docs[-1].matches[-1].chunks) == 10 assert len(docs[0].chunks[0].chunks) == 10 assert len(docs[0].chunks[0].matches) == 10 assert len(docs[0].chunks[0].matches[0].chunks) == 10 assert len(docs[0].chunks[0].matches[-1].chunks) == 10 assert len(docs[0].chunks[-1].matches[0].chunks) == 10 assert len(docs[0].chunks[-1].matches[-1].chunks) == 10 assert len(docs[0].chunks[-1].chunks) == 10 assert len(docs[0].chunks[-1].matches) == 10 assert len(docs[-1].chunks[0].chunks) == 10 assert len(docs[-1].chunks[0].matches) == 10 assert len(docs[-1].chunks[-1].chunks) == 10 assert len(docs[-1].chunks[-1].matches) == 10 driver._traverse_apply(docs) assert len(docs) == 2 assert len(docs[0].chunks) == 2 # slice on level 1 assert len(docs[0].matches) == 2 # slice on level 1 assert len(docs[0].chunks[0].chunks) == 2 # slice on level 2 for chunks assert len(docs[0].chunks[-1].chunks) == 2 # slice on level 2 for chunks assert len(docs[0].chunks[0].matches) == 10 # traverses directly on matches assert len(docs[0].chunks[0].matches[0].chunks) == 10 assert len(docs[0].chunks[0].matches[-1].chunks) == 10 assert len(docs[0].chunks[-1].matches) == 10 # traverses directly on matches assert len(docs[0].chunks[-1].matches[0].chunks) == 10 assert len(docs[0].chunks[-1].matches[-1].chunks) == 10 assert len(docs[0].matches[0].chunks) == 10 assert len(docs[0].matches[-1].chunks) == 10 assert len(docs[-1].chunks) == 2 # slice on level 1 of chunks assert len(docs[-1].matches) == 2 # slice on level 1 of chunks assert len(docs[-1].chunks[0].chunks) == 2 # slice on level 2 for matches of chunks assert len(docs[-1].chunks[-1].chunks) == 2 # slice on level 2 for matches of chunks assert len(docs[-1].chunks[0].matches) == 10 # traverses directly on matches assert len(docs[-1].chunks[0].matches[0].chunks) == 10 assert len(docs[-1].chunks[0].matches[-1].chunks) == 10 assert len(docs[-1].chunks[-1].matches) == 10 # traverses directly on matches assert len(docs[-1].chunks[-1].matches[0].chunks) == 10 assert len(docs[-1].chunks[-1].matches[-1].chunks) == 10 assert len(docs[-1].matches[0].chunks) == 10 assert len(docs[-1].matches[-1].chunks) == 10
def test_slice_ql_on_matches_and_chunks(): docs = random_docs_with_chunks_and_matches(10) driver = SliceQL(start=0, end=2, granularity_range=(0, 2), adjacency_range=(0, 2)) assert len(docs) == 10 assert len(docs[0].chunks) == 10 assert len(docs[-1].chunks) == 10 assert len(docs[0].matches) == 10 assert len(docs[-1].matches) == 10 assert len(docs[0].matches[0].chunks) == 10 assert len(docs[0].matches[-1].chunks) == 10 assert len(docs[-1].matches[0].chunks) == 10 assert len(docs[-1].matches[-1].chunks) == 10 assert len(docs[0].chunks[0].chunks) == 10 assert len(docs[0].chunks[0].matches) == 10 assert len(docs[0].chunks[0].matches[0].chunks) == 10 assert len(docs[0].chunks[0].matches[-1].chunks) == 10 assert len(docs[0].chunks[-1].matches[0].chunks) == 10 assert len(docs[0].chunks[-1].matches[-1].chunks) == 10 assert len(docs[0].chunks[-1].chunks) == 10 assert len(docs[0].chunks[-1].matches) == 10 assert len(docs[-1].chunks[0].chunks) == 10 assert len(docs[-1].chunks[0].matches) == 10 assert len(docs[-1].chunks[-1].chunks) == 10 assert len(docs[-1].chunks[-1].matches) == 10 driver._traverse_apply(docs) assert len(docs) == 2 assert len(docs[0].chunks) == 2 # slice on level 1 assert len(docs[0].matches) == 2 # slice on level 1 assert len(docs[0].chunks[0].chunks) == 2 # slice on level 2 for chunks assert len(docs[0].chunks[-1].chunks) == 2 # slice on level 2 for chunks assert len(docs[0].chunks[0].matches) == 10 # traverses directly on matches assert len(docs[0].chunks[0].matches[0].chunks) == 10 assert len(docs[0].chunks[0].matches[-1].chunks) == 10 assert len(docs[0].chunks[-1].matches) == 10 # traverses directly on matches assert len(docs[0].chunks[-1].matches[0].chunks) == 10 assert len(docs[0].chunks[-1].matches[-1].chunks) == 10 assert len(docs[0].matches[0].chunks) == 10 assert len(docs[0].matches[-1].chunks) == 10 assert len(docs[-1].chunks) == 2 # slice on level 1 of chunks assert len(docs[-1].matches) == 2 # slice on level 1 of chunks assert len(docs[-1].chunks[0].chunks) == 2 # slice on level 2 for matches of chunks assert len(docs[-1].chunks[-1].chunks) == 2 # slice on level 2 for matches of chunks assert len(docs[-1].chunks[0].matches) == 10 # traverses directly on matches assert len(docs[-1].chunks[0].matches[0].chunks) == 10 assert len(docs[-1].chunks[0].matches[-1].chunks) == 10 assert len(docs[-1].chunks[-1].matches) == 10 # traverses directly on matches assert len(docs[-1].chunks[-1].matches[0].chunks) == 10 assert len(docs[-1].chunks[-1].matches[-1].chunks) == 10 assert len(docs[-1].matches[0].chunks) == 10 assert len(docs[-1].matches[-1].chunks) == 10
def test_selection(): docs = build_docs() driver = SliceQL(start=0, end=1, traversal_paths=['cmm', 'mcm']) driver._traverse_apply(docs) assert docs[0].chunks[0].matches[0].matches[0].granularity == 1 assert docs[0].chunks[0].matches[0].matches[0].adjacency == 2 assert len(docs[0].chunks[0].matches[0].matches) == 1 assert docs[0].matches[0].chunks[0].matches[0].granularity == 1 assert docs[0].matches[0].chunks[0].matches[0].adjacency == 2 assert len(docs[0].matches[0].chunks[0].matches) == 1
def test_match_chunk(): docs = build_docs() driver = SliceQL(start=0, end=1, traversal_paths=['mc']) driver._traverse_apply(docs) assert len(docs) == DOCUMENTS_PER_LEVEL assert len(docs[0].chunks) == DOCUMENTS_PER_LEVEL assert len(docs[0].chunks[0].matches) == DOCUMENTS_PER_LEVEL assert len(docs[0].matches) == DOCUMENTS_PER_LEVEL assert len(docs[0].matches[0].chunks) == 1 assert len(docs[0].matches[0].matches) == DOCUMENTS_PER_LEVEL assert len(docs[0].matches[0].matches[0].chunks) == DOCUMENTS_PER_LEVEL
def test_slice_ql_on_chunks(): docs = random_docs_with_chunks(10) driver = SliceQL(start=0, end=2, recur_on='chunks', recur_range=(0, 2)) driver._traverse_apply(docs) assert len(docs) == 2 assert len(docs[0].chunks) == 2 # slice on level 1 assert len(docs[0].chunks[0].chunks) == 2 # slice on level 2 for chunks assert len(docs[0].chunks[-1].chunks) == 2 # slice on level 2 for chunks assert len(docs[-1].chunks) == 2 # slice on level 1 assert len(docs[-1].chunks[0].chunks) == 2 # slice on level 2 for chunks assert len(docs[-1].chunks[-1].chunks) == 2 # slice on level 2 for chunks
def test_slice_ql_on_chunks(): docs = random_docs_with_chunks(10) driver = SliceQL(start=0, end=2, traversal_paths=('cc', 'c', 'r')) driver._traverse_apply(docs) assert len(docs) == 2 assert len(docs[0].chunks) == 2 # slice on level 1 assert len(docs[0].chunks[0].chunks) == 2 # slice on level 2 for chunks assert len(docs[0].chunks[-1].chunks) == 2 # slice on level 2 for chunks assert len(docs[-1].chunks) == 2 # slice on level 1 assert len(docs[-1].chunks[0].chunks) == 2 # slice on level 2 for chunks assert len(docs[-1].chunks[-1].chunks) == 2 # slice on level 2 for chunks
def test_traverse_apply(): docs = build_docs() doc = docs[0] doc.ClearField('chunks') docs = [ doc, ] driver = SliceQL(start=0, end=1, traversal_paths=['mcm']) assert docs[0].matches[0].chunks[0].matches[0].granularity == 1 assert docs[0].matches[0].chunks[0].matches[0].adjacency == 2 driver._traverse_apply(docs) assert len(docs[0].matches[0].chunks[0].matches) == 1
def test_only_adjacency(): docs = build_docs() driver = SliceQL(start=0, end=1, adjacency_range=(0, 2), granularity_range=(0, 0), recur_on=["matches"]) driver._traverse_apply(docs) assert len(docs) == 1 assert len(docs[0].chunks) == DOCUMENTS_PER_LEVEL assert len(docs[0].chunks[0].matches) == DOCUMENTS_PER_LEVEL assert len(docs[0].matches) == 1 assert len(docs[0].matches[0].chunks) == DOCUMENTS_PER_LEVEL assert len(docs[0].matches[0].matches) == 1 assert len(docs[0].matches[0].matches[0].chunks) == DOCUMENTS_PER_LEVEL
def test_both_from_0(): docs = build_docs() driver = SliceQL(start=0, end=1, traversal_paths=['r', 'c', 'm', 'cc', 'mm']) driver._traverse_apply(docs) assert len(docs) == 1 assert len(docs[0].chunks) == 1 assert len(docs[0].chunks[0].chunks) == 1 assert len(docs[0].chunks[0].chunks[0].matches) == DOCUMENTS_PER_LEVEL assert len(docs[0].chunks[0].matches) == DOCUMENTS_PER_LEVEL assert len(docs[0].matches) == 1 assert len(docs[0].matches[0].chunks) == DOCUMENTS_PER_LEVEL assert len(docs[0].matches[0].matches) == 1 assert len(docs[0].matches[0].matches[0].chunks) == DOCUMENTS_PER_LEVEL
def test_as_querylang(): sortql = SortQL(field='field_value', reverse=False, priority=2) sort_querylang = sortql.as_querylang assert sort_querylang.name == 'SortQL' assert sort_querylang.priority == 2 assert sort_querylang.parameters['field'] == 'field_value' assert not sort_querylang.parameters['reverse'] sliceql = SliceQL(start=10, end=20) slice_querylang = sliceql.as_querylang assert slice_querylang.name == 'SliceQL' assert slice_querylang.priority == 0 assert slice_querylang.parameters['start'] == 10 assert slice_querylang.parameters['end'] == 20 excludeql = ExcludeQL(fields=('field1', 'field2')) exclude_querylang = excludeql.as_querylang assert exclude_querylang.name == 'ExcludeQL' assert exclude_querylang.priority == 0 assert list(exclude_querylang.parameters['fields']) == ['field1', 'field2'] excludeql2 = ExcludeQL(fields='field1') exclude_querylang2 = excludeql2.as_querylang assert exclude_querylang2.name == 'ExcludeQL' assert exclude_querylang2.priority == 0 assert list(exclude_querylang2.parameters['fields']) == ['field1']
def test_read_from_req(mocker): def validate1(req): assert len(req.docs) == 5 def validate2(req): assert len(req.docs) == 3 response_mock = mocker.Mock(wrap=validate1) response_mock_2 = mocker.Mock(wrap=validate2) response_mock_3 = mocker.Mock(wrap=validate1) qs = QueryLang(SliceQL(start=1, end=4, priority=1)) f = Flow(callback_on='body').add(uses='- !SliceQL | {start: 0, end: 5}') # without queryset with f: f.index(random_docs(10), on_done=response_mock) response_mock.assert_called() # with queryset with f: f.index(random_docs(10), queryset=qs, on_done=response_mock_2) response_mock_2.assert_called() qs.priority = -1 # with queryset, but priority is no larger than driver's default with f: f.index(random_docs(10), queryset=qs, on_done=response_mock_3) response_mock_3.assert_called()
def test_adjacency0_granularity1(): docs = build_docs() driver = SliceQL(start=0, end=1, traversal_paths=['c', 'cc', 'cm', 'cmm']) driver._traverse_apply(docs) assert len(docs) == DOCUMENTS_PER_LEVEL assert len(docs[0].chunks) == 1 assert len(docs[0].chunks[0].chunks) == 1 assert len(docs[0].chunks[0].chunks[0].matches) == DOCUMENTS_PER_LEVEL assert len(docs[0].chunks[0].matches) == 1 assert len(docs[0].chunks[0].matches[0].chunks) == DOCUMENTS_PER_LEVEL assert len(docs[0].chunks[0].matches[0].matches) == 1 assert len( docs[0].chunks[0].matches[0].matches[0].chunks) == DOCUMENTS_PER_LEVEL assert len(docs[0].matches) == DOCUMENTS_PER_LEVEL assert len(docs[0].matches[0].chunks) == DOCUMENTS_PER_LEVEL assert len(docs[0].matches[0].matches) == DOCUMENTS_PER_LEVEL assert len(docs[0].matches[0].matches[0].chunks) == DOCUMENTS_PER_LEVEL
def test_slice_ql_on_matches(self): docs = random_docs_with_matches(10) driver = SliceQL(start=0, end=2, traverse_on='matches', depth_range=(0, 2)) driver._traverse_apply(docs) assert len(docs) == 10 # traverses directly on matches assert len(docs[0].matches) == 2 # slice on level 1 assert len( docs[0].matches[0].matches) == 2 # slice on level 2 for matches assert len( docs[0].matches[-1].matches) == 2 # slice on level 2 for matches assert len(docs[-1].matches) == 2 # slice on level 1 assert len( docs[-1].matches[0].matches) == 2 # slice on level 2 for matches assert len( docs[-1].matches[-1].matches) == 2 # slice on level 2 for matches
def test_ql_constructors_from_proto(source): ql = SliceQL(start=3, end=5, priority=999) q = QueryLang(ql).as_pb_object qlr = QueryLang(source(q)) assert qlr.name == 'SliceQL' assert qlr.parameters['start'] == 3 assert qlr.parameters['end'] == 5 assert qlr.priority == 999
def test_traverse_apply(): docs = build_docs() doc = docs[0] doc.ClearField('chunks') docs = [doc, ] driver = SliceQL( start=0, end=1, adjacency_range=(2, 2), granularity_range=(1, 1), recur_on=["matches", ] ) # check we have a match with (g=1, a=2) assert docs[0].matches[0].chunks[0].matches[0].granularity == 1 assert docs[0].matches[0].chunks[0].matches[0].adjacency == 2 # the following part will cause IndexError driver._traverse_apply(docs) assert len(docs[0].matches[0].chunks[0].matches) == 1
def test_ql_priority(): qs = QueryLang(SliceQL(start=1, end=4, priority=1)) assert qs.priority == 1 qs._querylang.priority = -1 assert qs._querylang.priority == -1 assert qs.priority == -1 qs.priority = -2 assert qs._querylang.priority == -2 assert qs.priority == -2
def test_selection(): docs = build_docs() driver = SliceQL( start=0, end=1, granularity_range=(1, 1), adjacency_range=(2, 2), recur_on=['matches', ] ) driver._traverse_apply(docs) # check the granularity and adjacency assert docs[0].chunks[0].matches[0].matches[0].granularity == 1 assert docs[0].chunks[0].matches[0].matches[0].adjacency == 2 assert len(docs[0].chunks[0].matches[0].matches) == 1 # check the granularity and adjacency assert docs[0].matches[0].chunks[0].matches[0].granularity == 1 assert docs[0].matches[0].chunks[0].matches[0].adjacency == 2 # For the chunks of the matches, the matches of these chunks are not effected by the apply() assert len(docs[0].matches[0].chunks[0].matches) == 1
def test_adjacency_chunks(): """ This combination of `range` and `recur_on` will only `_apply_all` to the root node. Thus, the remaining document structure should still be intact. In practice this combination should not be chosen, anyhow it nicely demonstrates the behaviour. """ docs = build_docs() driver = SliceQL(start=0, end=1, adjacency_range=(0, 1), granularity_range=(0, 0), recur_on=["chunks"]) driver._traverse_apply(docs) assert len(docs) == 1 assert len(docs[0].chunks) == DOCUMENTS_PER_LEVEL assert len(docs[0].chunks[0].matches) == DOCUMENTS_PER_LEVEL assert len(docs[0].matches) == DOCUMENTS_PER_LEVEL assert len(docs[0].matches[0].chunks) == DOCUMENTS_PER_LEVEL assert len(docs[0].matches[0].matches) == DOCUMENTS_PER_LEVEL assert len(docs[0].matches[0].matches[0].chunks) == DOCUMENTS_PER_LEVEL
def test_ql_constructors_from_driver(): ql = SliceQL(start=3, end=5, priority=999) q = QueryLang(ql) qb = q.as_pb_object assert q.name == 'SliceQL' assert q.parameters['start'] == 3 assert q.parameters['end'] == 5 assert q.priority == 999 assert qb.name == 'SliceQL' assert qb.parameters['start'] == 3 assert qb.parameters['end'] == 5 assert qb.priority == 999 assert isinstance(q.as_driver_object, SliceQL) assert ql.start == q.as_driver_object.start assert ql.end == q.as_driver_object.end assert ql._priority == q.as_driver_object._priority
def test_read_from_req(): def validate1(req): assert len(req.docs) == 5 def validate2(req): assert len(req.docs) == 3 qs = QueryLang(SliceQL(start=1, end=4, priority=1)) f = Flow(callback_on='body').add(uses='- !SliceQL | {start: 0, end: 5}') # without queryset with f: f.index(random_docs(10), output_fn=validate1) # with queryset with f: f.index(random_docs(10), queryset=qs, output_fn=validate2) qs.priority = -1 # with queryset, but priority is no larger than driver's default with f: f.index(random_docs(10), queryset=qs, output_fn=validate1)
def test_slice_ql_on_matches_and_chunks(self): docs = random_docs_with_chunks_and_matches(10) driver = SliceQL(start=0, end=2, traverse_on=['chunks', 'matches'], depth_range=(0, 2)) assert len(docs) == 10 assert len(docs[0].chunks) == 10 assert len(docs[-1].chunks) == 10 assert len(docs[0].matches) == 10 assert len(docs[-1].matches) == 10 assert len(docs[0].matches[0].chunks) == 10 assert len(docs[0].matches[-1].chunks) == 10 assert len(docs[-1].matches[0].chunks) == 10 assert len(docs[-1].matches[-1].chunks) == 10 assert len(docs[0].chunks[0].chunks) == 10 assert len(docs[0].chunks[0].matches) == 10 assert len(docs[0].chunks[0].matches[0].chunks) == 10 assert len(docs[0].chunks[0].matches[-1].chunks) == 10 assert len(docs[0].chunks[-1].matches[0].chunks) == 10 assert len(docs[0].chunks[-1].matches[-1].chunks) == 10 assert len(docs[0].chunks[-1].chunks) == 10 assert len(docs[0].chunks[-1].matches) == 10 assert len(docs[-1].chunks[0].chunks) == 10 assert len(docs[-1].chunks[0].matches) == 10 assert len(docs[-1].chunks[-1].chunks) == 10 assert len(docs[-1].chunks[-1].matches) == 10 driver._traverse_apply(docs) assert len(docs) == 2 assert len(docs[0].chunks) == 2 # slice on level 1 assert len(docs[0].matches) == 2 # slice on level 1 assert len( docs[0].chunks[0].chunks) == 2 # slice on level 2 for chunks assert len( docs[0].chunks[-1].chunks) == 2 # slice on level 2 for chunks assert len( docs[0].chunks[0].matches) == 10 # traverses directly on matches assert len(docs[0].chunks[0].matches[0].chunks) == 10 assert len(docs[0].chunks[0].matches[-1].chunks) == 10 assert len( docs[0].chunks[-1].matches) == 10 # traverses directly on matches assert len(docs[0].chunks[-1].matches[0].chunks) == 10 assert len(docs[0].chunks[-1].matches[-1].chunks) == 10 assert len(docs[0].matches[0].chunks) == 10 assert len(docs[0].matches[-1].chunks) == 10 assert len(docs[-1].chunks) == 2 # slice on level 1 of chunks assert len(docs[-1].matches) == 2 # slice on level 1 of chunks assert len(docs[-1].chunks[0].chunks ) == 2 # slice on level 2 for matches of chunks assert len(docs[-1].chunks[-1].chunks ) == 2 # slice on level 2 for matches of chunks assert len( docs[-1].chunks[0].matches) == 10 # traverses directly on matches assert len(docs[-1].chunks[0].matches[0].chunks) == 10 assert len(docs[-1].chunks[0].matches[-1].chunks) == 10 assert len( docs[-1].chunks[-1].matches) == 10 # traverses directly on matches assert len(docs[-1].chunks[-1].matches[0].chunks) == 10 assert len(docs[-1].chunks[-1].matches[-1].chunks) == 10 assert len(docs[-1].matches[0].chunks) == 10 assert len(docs[-1].matches[-1].chunks) == 10
def test_querylang_request(): qs = QueryLang(SliceQL(start=1, end=4, priority=1)) Client.check_input(random_docs(10), queryset=qs)