def test_pb_obj2dict(): document = jina_pb2.DocumentProto() document.text = 'this is text' document.tags['id'] = 'id in tags' document.tags['inner_dict'] = {'id': 'id in inner_dict'} chunk = document.chunks.add() chunk.text = 'text in chunk' chunk.tags['id'] = 'id in chunk tags' document = Document(document) res = document.get_attrs('text', 'tags', 'chunks') assert res['text'] == 'this is text' assert res['tags']['id'] == 'id in tags' assert res['tags']['inner_dict']['id'] == 'id in inner_dict' rcs = list(res['chunks']) assert len(rcs) == 1 assert isinstance(rcs[0], Document) assert rcs[0].text == 'text in chunk' assert rcs[0].tags['id'] == 'id in chunk tags'
def test_match2docranker_batching(ranker): NUM_DOCS_QUERIES = 15 NUM_MATCHES = 10 old_matches_scores = [] queries_metas = [] matches_metas = [] queries = DocumentSet([]) for i in range(NUM_DOCS_QUERIES): old_match_scores = [] match_metas = [] query = Document(id=f'query-{i}') for j in range(NUM_MATCHES): m = Document(id=f'match-{i}-{j}', tags={'dummy_score': j}) query.matches.append(m) old_match_scores.append(0) match_metas.append(m.get_attrs('tags__dummy_score')) queries.append(query) old_matches_scores.append(old_match_scores) queries_metas.append(None) matches_metas.append(match_metas) queries_scores = ranker.score(old_matches_scores, queries_metas, matches_metas) assert len(queries_scores) == NUM_DOCS_QUERIES for i, (query, matches_scores) in enumerate(zip(queries, queries_scores)): assert len(matches_scores) == NUM_MATCHES for j, (match, score) in enumerate(zip(query.matches, matches_scores)): match.score = NamedScore(value=j) assert score == j query.matches.sort(key=lambda x: x.score.value, reverse=True) for j, match in enumerate(query.matches, 1): assert match.id == f'match-{i}-{NUM_MATCHES - j}' assert match.score.value == NUM_MATCHES - j