示例#1
0
def create_document_to_score():
    # doc: 1
    # |- chunk: 2
    # |  |- matches: (id: 4, parent_id: 40, score.value: 4),
    # |  |- matches: (id: 5, parent_id: 50, score.value: 5),
    # |
    # |- chunk: 3
    #    |- matches: (id: 6, parent_id: 60, score.value: 6),
    #    |- matches: (id: 7, parent_id: 70, score.value: 7)
    doc = Document()
    doc.id = '1'
    for c in range(2):
        chunk = Document()
        chunk_id = str(c + 2)
        chunk.id = chunk_id
        for m in range(2):
            match = Document()
            match_id = 2 * int(chunk_id) + m
            match.id = str(match_id)
            parent_id = 10 * int(match_id)
            match.parent_id = str(parent_id)
            match.length = int(match_id)
            # to be used by MaxRanker and MinRanker
            match.score = NamedScore(value=int(match_id), ref_id=chunk.id)
            match.tags['price'] = match.score.value
            match.tags['discount'] = DISCOUNT_VAL
            chunk.matches.append(match)
        doc.chunks.append(chunk)
    return doc
示例#2
0
def create_chunk_chunk_matches_to_score():
    # doc: (id: 100, granularity=0)
    # |- chunk: (id: 101, granularity=1)
    #       |- chunks: (id: 10)
    #       |   |- matches: (id: 11, parent_id: 1, score.value: 2),
    #       |   |- matches: (id: 12, parent_id: 1, score.value: 3),
    #       |- chunks: (id: 20)
    #           |- matches: (id: 21, parent_id: 2, score.value: 4),
    #           |- matches: (id: 22, parent_id: 2, score.value: 5)
    doc = Document()
    doc.id = '100'
    doc.granularity = 0
    chunk = Document()
    chunk.id = '101'
    chunk.parent_id = doc.id
    chunk.granularity = doc.granularity + 1
    num_matches = 2
    for parent_id in range(1, 3):
        chunk_chunk = Document()
        chunk_chunk.id = str(parent_id * 10)
        chunk_chunk.parent_id = str(parent_id)
        chunk_chunk.granularity = chunk.granularity + 1
        for score_value in range(parent_id * 2, parent_id * 2 + num_matches):
            match = Document()
            match.parent_id = str(parent_id)
            match.score = NamedScore(value=score_value, ref_id=chunk_chunk.id)
            match.id = str(10 * parent_id + score_value)
            match.length = 4
            chunk_chunk.matches.append(match)
        chunk.chunks.append(chunk_chunk)
    doc.chunks.append(chunk)
    return Document(doc)
def create_document_to_score_same_depth_level():
    # doc: 1
    # |  matches: (id: 2, parent_id: 20, score.value: 30, length: 3),
    # |  matches: (id: 3, parent_id: 20, score.value: 40, length: 4),
    # |  matches: (id: 4, parent_id: 30, score.value: 20, length: 2),
    # |  matches: (id: 5, parent_id: 30, score.value: 10, length: 1),

    doc = Document()
    doc.id = 1

    for match_id, parent_id, match_score, weight in [
        (2, 20, 30, 3),
        (3, 20, 40, 4),
        (4, 30, 20, 2),
        (5, 30, 10, 1),
    ]:
        match = Document()
        match.id = match_id
        match.parent_id = parent_id
        match.weight = weight
        match.score = NamedScore(value=match_score, ref_id=doc.id)
        doc.matches.append(match)
    return doc