示例#1
0
 def test_5(self):
     """
     """
     result = self.calculator.get_distance('AVERY', 'GARVEY')
     self.assertEqual(result.distance, 3)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(ref_token="A",
                                         output_tokens=["G", "A", "R"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="V",
                                         output_tokens=["V"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="E",
                                         output_tokens=["E"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="R",
                                         output_tokens=[],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="Y",
                                         output_tokens=["Y"],
                                         add_to_left=False)
     expected_alignment_result.merge_none_tokens()
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
示例#2
0
 def test_4(self):
     """
     """
     result = self.calculator.get_distance('b', 'batman')
     self.assertEqual(result.distance, 5)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(
         ref_token="b",
         output_tokens=["b", "a", "t", "m", "a", "n"],
         add_to_left=False)
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
示例#3
0
 def test_2nd(self):
     """
     """
     result = self.calculator.get_distance('AV', 'Abc')
     self.assertEqual(result.distance, 2)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(ref_token="A",
                                         output_tokens=["A", "b"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="V",
                                         output_tokens=["c"],
                                         add_to_left=False)
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
 def test_4(self):
     """
     """
     result = self.calculator.get_distance('b', 'batman')
     self.assertEqual(result.distance, 5)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(
         ref_token="b",
         output_tokens=["b", "a", "t", "m", "a", "n"],
         add_to_left=False)
     distance, substitution, insertion, deletion = expected_alignment_result.calculate_three_kinds_of_distance(
     )
     print(distance, substitution, insertion, deletion)
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
 def test_2nd(self):
     """
     """
     result = self.calculator.get_distance('AV', 'Abc')
     self.assertEqual(result.distance, 2)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(ref_token="A",
                                         output_tokens=["A", "b"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="V",
                                         output_tokens=["c"],
                                         add_to_left=False)
     distance, substitution, insertion, deletion = expected_alignment_result.calculate_three_kinds_of_distance(
     )
     print(distance, substitution, insertion, deletion)
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
示例#6
0
 def test_first(self):
     """
     """
     result = self.calculator.get_distance('abc', 'dfg')
     self.assertEqual(result.distance, 3)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["d"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="b",
                                         output_tokens=["f"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="c",
                                         output_tokens=["g"],
                                         add_to_left=False)
     # result.alignment_result == expected_alignment_result
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
    def test_first(self):
        """
        """
        result = self.calculator.get_distance('abc', 'dfg')
        self.assertEqual(result.distance, 3)
        expected_alignment_result = AlignmentResult()
        expected_alignment_result.add_token(ref_token="a",
                                            output_tokens=["d"],
                                            add_to_left=False)
        expected_alignment_result.add_token(ref_token="b",
                                            output_tokens=["f"],
                                            add_to_left=False)
        expected_alignment_result.add_token(ref_token="c",
                                            output_tokens=["g"],
                                            add_to_left=False)
        # result.alignment_result == expected_alignment_result
        distance, substitution, insertion, deletion = expected_alignment_result.calculate_three_kinds_of_distance(
        )
        print(distance, substitution, insertion, deletion)

        print(result.alignment_result)
        print(expected_alignment_result)
        self.assertEqual(result.alignment_result, expected_alignment_result)
示例#8
0
                tmp_result = x
    if tmp_result is None:
        return None
    calculator2 = UKKLevenshteinDistanceCalculator(tokenizer=WordTokenizer(),
                                                   get_alignment_result=True)
    update_result = calculator2.get_distance(tmp_result,
                                             output_string).alignment_result
    return update_result


p = inflect.engine()

alignment_result = AlignmentResult()

alignment_result.add_token(ref_token="w",
                           output_tokens=["w"],
                           add_to_left=False)
alignment_result.add_token(ref_token="5",
                           output_tokens=["e"],
                           add_to_left=False)
alignment_result.add_token(ref_token="r",
                           output_tokens=["r"],
                           add_to_left=False)
alignment_result.add_token(ref_token="g", output_tokens=[], add_to_left=False)
alignment_result.add_token(ref_token="2", output_tokens=[], add_to_left=False)
alignment_result.add_token("1", ["one"])

alignment_result.add_token("21", ["twenty-one", 'a', 'c'], add_to_left=False)

alignment_result.add_token("312", ["three", "one", "two"], add_to_left=False)
alignment_result.add_token(ref_token="e",
示例#9
0
 def test_9(self):
     """
     """
     result = self.calculator.get_distance('happyeveryday', 'happybirthday')
     self.assertEqual(result.distance, 5)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(ref_token="h",
                                         output_tokens=["h"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["a"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="p",
                                         output_tokens=["p"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="p",
                                         output_tokens=["p"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="y",
                                         output_tokens=["y"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="e",
                                         output_tokens=["b"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="v",
                                         output_tokens=["i"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="e",
                                         output_tokens=["r"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="r",
                                         output_tokens=["t"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="y",
                                         output_tokens=["h"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="d",
                                         output_tokens=["d"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["a"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="y",
                                         output_tokens=["y"],
                                         add_to_left=False)
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
示例#10
0
 def test_9(self):
     """
     """
     result = self.calculator.get_distance('helloa a a ?', 'HHHHHHHoooooo')
     self.assertEqual(result.distance, 13)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(ref_token="h",
                                         output_tokens=["H", "H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="e",
                                         output_tokens=["H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="l",
                                         output_tokens=["H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="l",
                                         output_tokens=["H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="o",
                                         output_tokens=["H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token=" ",
                                         output_tokens=["o"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["o"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token=" ",
                                         output_tokens=["o"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["o"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token=" ",
                                         output_tokens=["o"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="?",
                                         output_tokens=["o"],
                                         add_to_left=False)
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
示例#11
0
 def test_8(self):
     """
     """
     result = self.calculator.get_distance('jijizhazha', 'hahahaaaa???')
     self.assertEqual(result.distance, 10)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(ref_token="j",
                                         output_tokens=[],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="i",
                                         output_tokens=["h"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="j",
                                         output_tokens=["a"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="i",
                                         output_tokens=["h"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="z",
                                         output_tokens=["a"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="h",
                                         output_tokens=["h"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["a"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="z",
                                         output_tokens=["a"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="h",
                                         output_tokens=["a"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["a", "?", "?", "?"],
                                         add_to_left=False)
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
示例#12
0
 def test_7(self):
     """
     """
     result = self.calculator.get_distance('werewolf', 'were  wolf')
     self.assertEqual(result.distance, 2)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(ref_token="w",
                                         output_tokens=["w"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="e",
                                         output_tokens=["e"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="r",
                                         output_tokens=["r"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="e",
                                         output_tokens=["e", " ", " "],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="w",
                                         output_tokens=["w"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="o",
                                         output_tokens=["o"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="l",
                                         output_tokens=["l"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="f",
                                         output_tokens=["f"],
                                         add_to_left=False)
     expected_alignment_result.merge_none_tokens()
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
示例#13
0
 def test_6(self):
     """
     """
     result = self.calculator.get_distance('ernest', 'nester')
     self.assertEqual(result.distance, 4)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(ref_token="e",
                                         output_tokens=[],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="r",
                                         output_tokens=[],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="n",
                                         output_tokens=["n"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="e",
                                         output_tokens=["e"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="s",
                                         output_tokens=["s"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="t",
                                         output_tokens=["t", "e", "r"],
                                         add_to_left=False)
     expected_alignment_result.merge_none_tokens()
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
示例#14
0
import inflect
p = inflect.engine()
from transcription_compare.levenshtein_distance_calculator import UKKLevenshteinDistanceCalculator
from transcription_compare.tokenizer import CharacterTokenizer, WordTokenizer
from transcription_compare.utils import SimpleReferenceCombinationGenerator

from transcription_compare.results import AlignmentResult
alignment_result = AlignmentResult()
alignment_result.add_token(ref_token=None,
                           output_tokens=["1"],
                           add_to_left=False)
alignment_result.add_token(ref_token=None,
                           output_tokens=["2"],
                           add_to_left=False)
alignment_result.add_token(ref_token=None,
                           output_tokens=["3"],
                           add_to_left=False)
alignment_result.add_token(ref_token="1",
                           output_tokens=["4"],
                           add_to_left=False)
alignment_result.add_token(ref_token=None,
                           output_tokens=["5"],
                           add_to_left=False)
alignment_result.add_token(ref_token="ha",
                           output_tokens=["in", "and", "some"],
                           add_to_left=False)
alignment_result.add_token(ref_token="someday",
                           output_tokens=["days"],
                           add_to_left=False)
alignment_result.add_token(ref_token="one",
                           output_tokens=["1"],
 def test_10(self):
     """
     """
     result = self.calculator.get_distance('happyeveryday', 'happybirthday')
     self.assertEqual(result.distance, 5)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(ref_token="h",
                                         output_tokens=["h"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["a"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="p",
                                         output_tokens=["p"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="p",
                                         output_tokens=["p"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="y",
                                         output_tokens=["y"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="e",
                                         output_tokens=["b"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="v",
                                         output_tokens=["i"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="e",
                                         output_tokens=["r"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="r",
                                         output_tokens=["t"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="y",
                                         output_tokens=["h"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="d",
                                         output_tokens=["d"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["a"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="y",
                                         output_tokens=["y"],
                                         add_to_left=False)
     distance, substitution, insertion, deletion = expected_alignment_result.calculate_three_kinds_of_distance(
     )
     print(distance, substitution, insertion, deletion)
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
 def test_9(self):
     """
     """
     result = self.calculator.get_distance('helloa a a ?', 'HHHHHHHoooooo')
     print(result.alignment_result)
     print(result.distance)
     self.assertEqual(result.distance, 13)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(ref_token="h",
                                         output_tokens=["H", "H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="e",
                                         output_tokens=["H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="l",
                                         output_tokens=["H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="l",
                                         output_tokens=["H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="o",
                                         output_tokens=["H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token=" ",
                                         output_tokens=["o"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["o"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token=" ",
                                         output_tokens=["o"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["o"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token=" ",
                                         output_tokens=["o"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="?",
                                         output_tokens=["o"],
                                         add_to_left=False)
     distance, substitution, insertion, deletion = expected_alignment_result.calculate_three_kinds_of_distance(
     )
     print(distance, substitution, insertion, deletion)
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)