def random_queries(num_docs, chunks_per_doc=5): for j in range(num_docs): d = jina_pb2.DocumentProto() d.id = UniqueId(j) for k in range(chunks_per_doc): dd = d.chunks.add() dd.id = UniqueId(num_docs + j * chunks_per_doc + k) yield d
def input_fn(): doc1 = DocumentProto() NdArray(doc1.embedding).value = e1 c = doc1.chunks.add() NdArray(c.embedding).value = e2 c.id = UniqueId(1) doc2 = DocumentProto() NdArray(doc2.embedding).value = e3 d = doc2.chunks.add() d.id = UniqueId(2) NdArray(d.embedding).value = e4 return [doc1, doc2]
def input_fn(): with Document() as doc1: doc1.embedding = e1 with Document() as chunk1: chunk1.embedding = e2 chunk1.id = UniqueId(1) doc1.chunks.add(chunk1) with Document() as doc2: doc2.embedding = e3 with Document() as chunk2: chunk2.embedding = e4 chunk2.id = UniqueId(2) doc2.chunks.add(chunk2) return [doc1, doc2]
def input_fn(): doc1 = jina_pb2.DocumentProto() doc1.text = 'title: this is mode1 from doc1, body: this is mode2 from doc1' doc1.id = UniqueId(1) doc2 = jina_pb2.DocumentProto() doc2.text = 'title: this is mode1 from doc2, body: this is mode2 from doc2' doc2.id = UniqueId(2) doc3 = jina_pb2.DocumentProto() doc3.text = 'title: this is mode1 from doc3, body: this is mode2 from doc3' doc3.id = UniqueId(3) return [doc1, doc2, doc3]
def test_set_get_success(docset, document_factory): docset.build() doc = document_factory.create(4, 'test 4') doc_id = str(UniqueId(2)) docset[doc_id] = doc assert docset[doc_id].text == 'test 4' doc_0_id = docset[0].id docset[doc_0_id] = doc assert docset[doc_0_id].text == 'test 4'