示例#1
0
def create_chunk_chunk_matches_to_score():
    # doc: (id: 100, granularity=0)
    # |- chunk: (id: 101, granularity=1)
    #       |- chunks: (id: 10)
    #       |   |- matches: (id: 11, parent_id: 1, score.value: 2),
    #       |   |- matches: (id: 12, parent_id: 1, score.value: 3),
    #       |- chunks: (id: 20)
    #           |- matches: (id: 21, parent_id: 2, score.value: 4),
    #           |- matches: (id: 22, parent_id: 2, score.value: 5)
    doc = Document()
    doc.id = '100'
    doc.granularity = 0
    chunk = Document()
    chunk.id = '101'
    chunk.parent_id = doc.id
    chunk.granularity = doc.granularity + 1
    num_matches = 2
    for parent_id in range(1, 3):
        chunk_chunk = Document()
        chunk_chunk.id = str(parent_id * 10)
        chunk_chunk.parent_id = str(parent_id)
        chunk_chunk.granularity = chunk.granularity + 1
        for score_value in range(parent_id * 2, parent_id * 2 + num_matches):
            match = Document()
            match.parent_id = str(parent_id)
            match.score = NamedScore(value=score_value, ref_id=chunk_chunk.id)
            match.id = str(10 * parent_id + score_value)
            match.length = 4
            chunk_chunk.matches.append(match)
        chunk.chunks.append(chunk_chunk)
    doc.chunks.append(chunk)
    return Document(doc)
示例#2
0
def create_document_to_score():
    # doc: 1
    # |- chunk: 2
    # |  |- matches: (id: 4, parent_id: 40, score.value: 4),
    # |  |- matches: (id: 5, parent_id: 50, score.value: 5),
    # |
    # |- chunk: 3
    #    |- matches: (id: 6, parent_id: 60, score.value: 6),
    #    |- matches: (id: 7, parent_id: 70, score.value: 7)
    doc = Document()
    doc.id = '1'
    for c in range(2):
        chunk = Document()
        chunk_id = str(c + 2)
        chunk.id = chunk_id
        for m in range(2):
            match = Document()
            match_id = 2 * int(chunk_id) + m
            match.id = str(match_id)
            parent_id = 10 * int(match_id)
            match.parent_id = str(parent_id)
            match.length = int(match_id)
            # to be used by MaxRanker and MinRanker
            match.score = NamedScore(value=int(match_id), ref_id=chunk.id)
            match.tags['price'] = match.score.value
            match.tags['discount'] = DISCOUNT_VAL
            chunk.matches.append(match)
        doc.chunks.append(chunk)
    return doc
示例#3
0
 def search(self, docs: 'DocumentArray', parameters: Dict, **kwargs) \
         -> DocumentArray:
     a = np.stack(docs.get_attributes('embedding'))
     b = np.stack(self._docs.get_attributes('embedding'))
     q_emb = _ext_A(_norm(a))
     d_emb = _ext_B(_norm(b))
     dists = _cosine(q_emb, d_emb)
     top_k = int(parameters.get('top_k', 5))
     assert top_k > 0
     idx, dist = self._get_sorted_top_k(dists, top_k)
     for _q, _ids, _dists in zip(docs, idx, dist):
         for _id, _dist in zip(_ids, _dists):
             doc = Document(self._docs[int(_id)], copy=True)
             doc.score.value = 1 - _dist
             doc.parent_id = int(_id)
             _q.matches.append(doc)
     return docs
def create_document_to_score_same_depth_level():
    # doc: 1
    # |  matches: (id: 2, parent_id: 20, score.value: 30, length: 3),
    # |  matches: (id: 3, parent_id: 20, score.value: 40, length: 4),
    # |  matches: (id: 4, parent_id: 30, score.value: 20, length: 2),
    # |  matches: (id: 5, parent_id: 30, score.value: 10, length: 1),

    doc = Document()
    doc.id = 1

    for match_id, parent_id, match_score, weight in [
        (2, 20, 30, 3),
        (3, 20, 40, 4),
        (4, 30, 20, 2),
        (5, 30, 10, 1),
    ]:
        match = Document()
        match.id = match_id
        match.parent_id = parent_id
        match.weight = weight
        match.score = NamedScore(value=match_score, ref_id=doc.id)
        doc.matches.append(match)
    return doc