Exemplo n.º 1
0
def analyse_operations_sentence(orig_sent, sys_sent, ref_sents, orig_parse,
                                sys_parse, ref_parses):
    word_aligns_orig_sys = align(orig_parse, sys_parse)[0]
    orig_annots, sys_annots = annotate_sentence(orig_sent.split(),
                                                sys_sent.split(),
                                                word_aligns_orig_sys,
                                                orig_parse, sys_parse)

    orig_auto_labels = _from_annots_to_labels(orig_annots, ORIG_OPS_LABELS,
                                              'C')

    curr_sent_scores = []
    for ref_sent, ref_parse in zip(ref_sents, ref_parses):
        word_aligns_orig_ref = align(orig_parse, ref_parse)[0]

        orig_annots, ref_annots = annotate_sentence(orig_sent.split(),
                                                    ref_sent.split(),
                                                    word_aligns_orig_ref,
                                                    orig_parse, ref_parse)

        orig_silver_labels = _from_annots_to_labels(orig_annots,
                                                    ORIG_OPS_LABELS, 'C')

        f1_per_label = f1_score(orig_silver_labels,
                                orig_auto_labels,
                                labels=ORIG_OPS_LABELS,
                                average=None)

        curr_sent_scores.append(f1_per_label)

    return np.amax(curr_sent_scores, axis=0)
Exemplo n.º 2
0
def test_align_str():
    # aligning strings (output indexes start at 1)
    sentence1 = "Four men died in an accident."
    sentence2 = "4 people are dead from a collision."

    alignments = aligner.align(sentence1, sentence2)

    assert alignments[0] == [[7, 8], [2, 2], [3, 4], [1, 1], [6, 7], [5, 6]]
    assert alignments[1] == [['.', '.'], ['men', 'people'], ['died', 'dead'], ['Four', '4'],
                             ['accident', 'collision'], ['an', 'a']]
Exemplo n.º 3
0
def test_align_tokens():
    # aligning sets of tokens (output indexes start at 1)
    sentence1 = ['Four', 'men', 'died', 'in', 'an', 'accident', '.']
    sentence2 = ['4', 'people', 'are', 'dead', 'from', 'a', 'collision', '.']

    alignments = aligner.align(sentence1, sentence2)

    assert alignments[0] == [[7, 8], [2, 2], [3, 4], [1, 1], [6, 7], [5, 6]]
    assert alignments[1] == [['.', '.'], ['men', 'people'], ['died', 'dead'], ['Four', '4'],
                             ['accident', 'collision'], ['an', 'a']]
Exemplo n.º 4
0
def align_scenes_sentences(scenes, synt_parse_sentences):
    # parse the scenes
    synt_parse_scenes = syntactic_parse_texts(scenes)
    all_scenes_alignments = []
    for synt_scene in synt_parse_scenes:
        scene_alignments = []
        for synt_sent in synt_parse_sentences:
            # word_alignments = [[word1_scene, word1_sentence], [word2_scene, word3_sentence], ...]
            word_alignments = align(synt_scene, synt_sent)[1]
            scene_alignments.append(word_alignments)
        all_scenes_alignments.append(scene_alignments)
    return all_scenes_alignments
Exemplo n.º 5
0
def align_scenes_sentences(synt_scenes, synt_sents, allow_mutiple_matches):
    scenes_sents_aligns = []
    already_matched = []
    for synt_scene in synt_scenes:
        max_sent_aligns = []
        for sent_num, synt_sent in enumerate(synt_sents):
            if not allow_mutiple_matches and sent_num in already_matched:
                continue
            word_alignments = align(synt_scene, synt_sent)[1]
            if len(word_alignments) > len(max_sent_aligns):
                max_sent_aligns = word_alignments
                max_sent_num = sent_num
        scenes_sents_aligns.append(max_sent_aligns)
        if len(max_sent_aligns) > 0:
            already_matched.append(max_sent_num)

    return scenes_sents_aligns