def eval_request(): num_docs = 10 req = jina_pb2.RequestProto() for idx in range(num_docs): doc = Document(req.index.docs.add()) gt = Document(req.index.groundtruths.add()) doc.update_id() gt.update_id() chunk_doc = doc.add_chunk() chunk_gt = gt.add_chunk() chunk_doc.embedding = np.array([1, 1]) chunk_gt.embedding = np.array([2, 2]) return Request(req)
def doc_with_multimodal_chunks(embeddings): doc = Document() chunk1 = Document() chunk2 = Document() chunk3 = Document() chunk1.modality = 'visual1' chunk2.modality = 'visual2' chunk3.modality = 'textual' chunk1.embedding = embeddings[0] chunk2.embedding = embeddings[1] chunk3.embedding = embeddings[2] chunk1.update_id() chunk2.update_id() chunk3.update_id() doc.update_id() doc.add_chunk(chunk1) doc.add_chunk(chunk2) doc.add_chunk(chunk3) return doc
def create_document_to_search(): # 1-D embedding # doc: 0 # - chunk: 1 # - chunk: 2 # - chunk: 3 # - chunk: 4 # - chunk: 5 - will be missing from KV indexer doc = Document() doc.id = '0' * 16 for c in range(5): chunk = doc.add_chunk() chunk.id = str(c + 1) * 16 return doc
def create_document_to_search_with_matches_on_chunks(): # 1-D embedding # doc: 0 # - chunk: 1 # - match: 2 # - match: 3 # - match: 4 # - match: 5 - will be missing from KV indexer # - match: 6 - will be missing from KV indexer doc = Document() doc.id = '0' * 16 chunk = doc.add_chunk() chunk.id = '1' * 16 for m in range(5): match = chunk.add_match(doc_id=str(m + 2) * 16, score_value=1.) return doc