示例#1
0
def create_chunk_matches_to_score():
    # doc: (id: 100, granularity=0)
    # |- chunks: (id: 10)
    # |  |- matches: (id: 11, parent_id: 1, score.value: 2),
    # |  |- matches: (id: 12, parent_id: 1, score.value: 3),
    # |- chunks: (id: 20)
    #    |- matches: (id: 21, parent_id: 2, score.value: 4),
    #    |- matches: (id: 22, parent_id: 2, score.value: 5)
    doc = Document()
    doc.id = '1'
    doc.granularity = 0
    num_matches = 2
    for parent_id in range(1, 3):
        chunk = Document()
        chunk_id = parent_id * 10
        chunk.id = str(chunk_id)
        chunk.granularity = doc.granularity + 1
        for score_value in range(parent_id * 2, parent_id * 2 + num_matches):
            match = Document()
            match.granularity = chunk.granularity
            match.parent_id = str(parent_id)
            match.score = NamedScore(value=score_value, ref_id=chunk.id)
            match.id = str(10 * int(parent_id) + score_value)
            match.length = 4
            chunk.matches.append(match)
        doc.chunks.append(chunk)
    return doc
示例#2
0
 def request(field_type):
     num_docs = 10
     req = jina_pb2.RequestProto()
     for idx in range(num_docs):
         doc = req.index.docs.add()
         gt = req.index.groundtruths.add()
         chunk_doc = Document(doc.chunks.add())
         chunk_gt = Document(gt.chunks.add())
         chunk_doc.granularity = 1
         chunk_gt.granularity = 1
         if field_type == 'text':
             chunk_doc.text = 'aaa'
             chunk_gt.text = 'aaaa'
         elif field_type == 'buffer':
             chunk_doc.buffer = b'\x01\x02\x03'
             chunk_gt.buffer = b'\x01\x02\x03\x04'
         elif field_type == 'blob':
             chunk_doc.blob = np.array([1, 1, 1])
             chunk_gt.blob = np.array([1, 1, 1, 1])
     return Request(req).as_typed_request('index')
示例#3
0
def build_docs():
    """ Builds up a complete chunk-match structure, with a depth of 2 in both directions recursively. """
    max_granularity = 2
    max_adjacency = 2

    def iterate_build(document, current_granularity, current_adjacency):
        if current_granularity < max_granularity:
            for i in range(DOCUMENTS_PER_LEVEL):
                chunk = add_chunk(document)
                iterate_build(chunk, chunk.granularity, chunk.adjacency)
        if current_adjacency < max_adjacency:
            for i in range(DOCUMENTS_PER_LEVEL):
                match = add_match(document)
                iterate_build(match, match.granularity, match.adjacency)

    docs = []
    for base_id in range(DOCUMENTS_PER_LEVEL):
        document = Document()
        document.granularity = 0
        document.adjacency = 0
        docs.append(document)
        iterate_build(document, 0, 0)
    return DocumentArray(docs)
示例#4
0
def add_match(doc):
    match = Document()
    match.granularity = doc.granularity
    match.adjacency = doc.adjacency + 1
    doc.matches.append(match)
    return match
示例#5
0
def add_chunk(doc):
    chunk = Document()
    chunk.granularity = doc.granularity + 1
    chunk.adjacency = doc.adjacency
    doc.chunks.append(chunk)
    return chunk
示例#6
0
def chunk_4(textual_embedding):
    chunk = Document()
    chunk.modality = 'textual'
    chunk.embedding = textual_embedding
    chunk.granularity = 1
    return chunk
示例#7
0
def chunk_2(textual_embedding):
    chunk = Document()
    chunk.modality = 'textual'
    chunk.content = textual_embedding
    chunk.granularity = 0
    return chunk
示例#8
0
def chunk_1(visual_embedding):
    chunk = Document()
    chunk.modality = 'visual'
    chunk.embedding = visual_embedding
    chunk.granularity = 0
    return chunk