def error_chars(transcription, ground_truth, missing_char="_"): alignment = sequence_alignment.levenshtein_alignment( list(transcription), list(ground_truth)) source_transcription = "" target_transcription = "" errors_source = [] errors_target = [] for (source_char, target_char) in alignment: if source_char != target_char: color_target = False if source_char is None: source_char = missing_char if target_char is None: target_char = missing_char color_target = True errors_source.append(True) errors_target.append(color_target) else: errors_source.append(False) errors_target.append(False) source_transcription += source_char target_transcription += target_char return source_transcription, target_transcription, errors_source, errors_target
def console_transcription_errors(transcription, ground_truth, color=Color.RED, missing_char="_"): alignment = sequence_alignment.levenshtein_alignment( list(transcription), list(ground_truth)) source_transcription = "" target_transcription = "" for pair in alignment: source_char, target_char = pair if source_char != target_char: color_target = False if source_char is None: source_char = missing_char if target_char is None: target_char = missing_char color_target = True source_char = color.color_console_text(source_char) if color_target: target_char = color.color_console_text(target_char) source_transcription += source_char target_transcription += target_char return source_transcription, target_transcription
def test_inner_replacement_rev(self): a = ['a', 'x', 'y', 'c'] b = ['a', 'b', 'c'] self.assertTrue( levenshtein_alignment(a, b) in [ [('a', 'a'), ('x', None), ('y', 'b'), ('c', 'c')], [('a', 'a'), ('x', 'b'), ('y', None), ('c', 'c')], ])
def from_lists(cls, ref, hyp): ref_len = len(ref) nb_errors = levenshtein_distance(ref, hyp) alignment = levenshtein_alignment(hyp, ref) _, _, nb_inss, nb_dels, nb_subs = edit_stats_for_alignment(alignment) confusions = defaultdict(Counter) for hyp_sym, ref_sym in alignment: confusions[ref_sym][hyp_sym] += 1 match_types = [get_match_type(a[1], a[0]) for a in alignment] ending_mistakes = get_non_matching_suffix(match_types) end_errors = BoundaryErrorsSummary(ending_mistakes) return cls(1, ref_len, nb_errors, nb_subs, nb_inss, nb_dels, confusions, end_errors)
def test_insertion_only(self): a = [] b = ['a', 'b', 'c'] self.assertEqual(levenshtein_alignment(a, b), [(None, 'a'), (None, 'b'), (None, 'c')])
def test_deletion_only(self): a = ['a', 'b', 'c'] b = [] self.assertEqual(levenshtein_alignment(a, b), [('a', None), ('b', None), ('c', None)])
def test_trivial_deletion(self): a = ['a', 'b'] b = ['a'] self.assertEqual(levenshtein_alignment(a, b), [('a', 'a'), ('b', None)])
def test_trivial_insertion(self): a = ['a'] b = ['b', 'a'] self.assertEqual(levenshtein_alignment(a, b), [(None, 'b'), ('a', 'a')])
def test_trivial_substitution(self): a = ['a'] b = ['b'] self.assertEqual(levenshtein_alignment(a, b), [('a', 'b')])
def test_trivial_match(self): a = ['a'] b = ['a'] self.assertEqual(levenshtein_alignment(a, b), [('a', 'a')])
def test_alignment_to_eps_rev(self): a = ['a', 'b', 'c'] b = ['a', None, 'c'] self.assertEqual(levenshtein_alignment(a, b), [('a', 'a'), ('b', None), ('c', 'c')])