示例#1
0
def error_chars(transcription, ground_truth, missing_char="_"):
    alignment = sequence_alignment.levenshtein_alignment(
        list(transcription), list(ground_truth))

    source_transcription = ""
    target_transcription = ""
    errors_source = []
    errors_target = []

    for (source_char, target_char) in alignment:
        if source_char != target_char:
            color_target = False

            if source_char is None:
                source_char = missing_char

            if target_char is None:
                target_char = missing_char
                color_target = True

            errors_source.append(True)
            errors_target.append(color_target)

        else:
            errors_source.append(False)
            errors_target.append(False)

        source_transcription += source_char
        target_transcription += target_char

    return source_transcription, target_transcription, errors_source, errors_target
示例#2
0
def console_transcription_errors(transcription,
                                 ground_truth,
                                 color=Color.RED,
                                 missing_char="_"):
    alignment = sequence_alignment.levenshtein_alignment(
        list(transcription), list(ground_truth))

    source_transcription = ""
    target_transcription = ""

    for pair in alignment:
        source_char, target_char = pair

        if source_char != target_char:
            color_target = False

            if source_char is None:
                source_char = missing_char

            if target_char is None:
                target_char = missing_char
                color_target = True

            source_char = color.color_console_text(source_char)

            if color_target:
                target_char = color.color_console_text(target_char)

        source_transcription += source_char
        target_transcription += target_char

    return source_transcription, target_transcription
示例#3
0
    def test_inner_replacement_rev(self):
        a = ['a', 'x', 'y', 'c']
        b = ['a', 'b', 'c']

        self.assertTrue(
            levenshtein_alignment(a, b) in [
                [('a', 'a'), ('x', None), ('y', 'b'), ('c', 'c')],
                [('a', 'a'), ('x', 'b'), ('y', None), ('c', 'c')],
            ])
示例#4
0
    def from_lists(cls, ref, hyp):
        ref_len = len(ref)
        nb_errors = levenshtein_distance(ref, hyp)

        alignment = levenshtein_alignment(hyp, ref)
        _, _, nb_inss, nb_dels, nb_subs = edit_stats_for_alignment(alignment)

        confusions = defaultdict(Counter)
        for hyp_sym, ref_sym in alignment:
            confusions[ref_sym][hyp_sym] += 1

        match_types = [get_match_type(a[1], a[0]) for a in alignment]
        ending_mistakes = get_non_matching_suffix(match_types)
        end_errors = BoundaryErrorsSummary(ending_mistakes)

        return cls(1, ref_len, nb_errors, nb_subs, nb_inss, nb_dels,
                   confusions, end_errors)
示例#5
0
 def test_insertion_only(self):
     a = []
     b = ['a', 'b', 'c']
     self.assertEqual(levenshtein_alignment(a, b),
                      [(None, 'a'), (None, 'b'), (None, 'c')])
示例#6
0
 def test_deletion_only(self):
     a = ['a', 'b', 'c']
     b = []
     self.assertEqual(levenshtein_alignment(a, b),
                      [('a', None), ('b', None), ('c', None)])
示例#7
0
 def test_trivial_deletion(self):
     a = ['a', 'b']
     b = ['a']
     self.assertEqual(levenshtein_alignment(a, b), [('a', 'a'),
                                                    ('b', None)])
示例#8
0
 def test_trivial_insertion(self):
     a = ['a']
     b = ['b', 'a']
     self.assertEqual(levenshtein_alignment(a, b), [(None, 'b'),
                                                    ('a', 'a')])
示例#9
0
 def test_trivial_substitution(self):
     a = ['a']
     b = ['b']
     self.assertEqual(levenshtein_alignment(a, b), [('a', 'b')])
示例#10
0
 def test_trivial_match(self):
     a = ['a']
     b = ['a']
     self.assertEqual(levenshtein_alignment(a, b), [('a', 'a')])
示例#11
0
 def test_alignment_to_eps_rev(self):
     a = ['a', 'b', 'c']
     b = ['a', None, 'c']
     self.assertEqual(levenshtein_alignment(a, b), [('a', 'a'), ('b', None),
                                                    ('c', 'c')])