def test_character_detection(self): subtitles = [ Subtitle(0, 0, "Hello sir"), Subtitle(0, 0, "my name is micha"), ] script_entities = [ ScriptEntity(**{ "character": "CHAR0", "text": "Hallo Mr", "type": "speech" }), ScriptEntity(**{ "character": "CHAR1", "text": "my name is michi", "type": "speech" }), ] alignment = self._sut(script_entities, subtitles, logger=self._logger) nw.pretty_print_grid(alignment) assert len(alignment.subtitles) == 2 assert alignment.subtitles[0].character == "CHAR0" assert alignment.subtitles[1].character == "CHAR1"
def test_character_difference_same_length(self): s1 = "millegeeee" s2 = "mille asde" subtitles = mock_subtitles([s1]) speeches = mock_speeches([("CHAR0", s2)]) nw_alignment = self._sut_nw(speeches, subtitles, logger=self._logger) nw.pretty_print_grid(nw_alignment) assert nw_alignment.subtitles[0].character is "CHAR0"
def test_bio_string(self): subtitles = [ Subtitle(0, 0, "GCATGCU"), ] script_entities = [ ScriptEntity(**{ "character": "CHAR0", "text": "GATTACA", "type": "speech" }), ] alignment = self._sut(script_entities, subtitles, logger=self._logger) nw.pretty_print_grid(alignment)
def test_unseparated_words_2(self): s1 = "Millage Ville, Georgia." s2 = "Milledgeville, Georgia." subtitles = mock_subtitles([s1]) speeches = mock_speeches([("CHAR0", s2)]) dtw_alignment = self._sut_dtw(speeches, subtitles, self._logger, distance_function=dtw.binary_distance) dtw.pretty_print_grid(dtw_alignment) assert dtw_alignment.subtitles[0].character is None seperator() nw_alignment = self._sut_nw(speeches, subtitles, logger=self._logger) nw.pretty_print_grid(nw_alignment) assert nw_alignment.subtitles[0].character is "CHAR0"
def test_character_difference_repeat(self): s1 = "three" s2 = "threeeee" subtitles = mock_subtitles([s1]) speeches = mock_speeches([("CHAR0", s2)]) dtw_alignment = self._sut_dtw(speeches, subtitles, self._logger, distance_function=dtw.levenstein_distance) dtw.pretty_print_grid(dtw_alignment) assert dtw_alignment.subtitles[0].character is None seperator() subtitles = mock_subtitles([s1]) speeches = mock_speeches([("CHAR0", s2)]) nw_alignment = self._sut_nw(speeches, subtitles, logger=self._logger) nw.pretty_print_grid(nw_alignment) assert nw_alignment.subtitles[0].character is "CHAR0"
def test_gap_penalty_alignment_adaptive(self): s1 = "GAAAAAAT" s1_index = [(s, None) for s in list(s1)] s2 = "GAAT" s2_index = [(s, None) for s in list(s2)] grid, traceback = nw.nw(s1_index, s2_index, Weighting(1, -1, AdaptiveGapPenalty(-5, -1))) alignment = nw.calculate_backtrace(grid, traceback, s1_index, s2_index) nw.pretty_print_grid(alignment) s1_string = "".join([char for char, _ in alignment.vertical_index]) s2_string = "".join([char for char, _ in alignment.horizontal_index]) print(s1_string) print(s2_string) assert s2_string == "GAA____T"
def test_bio_string_2(self): subtitles = [ Subtitle(0, 0, "CNJRQCLU"), ] script_entities = [ ScriptEntity(**{ "character": "CHAR0", "text": "CJRQDLN", "type": "speech" }), ] alignment = self._sut(script_entities, subtitles, Weighting(1, -1, AdaptiveGapPenalty(-5, -1)), logger=self._logger) nw.pretty_print_grid(alignment)
def test_unseperated_words_short(self): s1 = "All right" s2 = "Alright" subtitles = mock_subtitles([s1]) speeches = mock_speeches([("CHAR0", s2)]) dtw_alignment = self._sut_dtw(speeches, subtitles, self._logger, distance_function=dtw.levenstein_distance) dtw.pretty_print_grid(dtw_alignment) assert dtw_alignment.subtitles[0].character is "CHAR0" seperator() subtitles = mock_subtitles([s1]) speeches = mock_speeches([("CHAR0", s2)]) nw_alignment = self._sut_nw(speeches, subtitles, logger=self._logger) nw.pretty_print_grid(nw_alignment) assert nw_alignment.subtitles[0].character is "CHAR0"
def test_unseperated_words(self): s1 = "Hey, All right. All reight." s2 = "Alright...alright" subtitles = mock_subtitles([s1]) speeches = mock_speeches([("CHAR0", s2)]) dtw_alignment = self._sut_dtw(speeches, subtitles, self._logger, distance_function=dtw.binary_distance) dtw.pretty_print_grid(dtw_alignment) assert dtw_alignment.subtitles[0].character is None seperator() subtitles = mock_subtitles([s1]) speeches = mock_speeches([("CHAR0", s2)]) nw_alignment = self._sut_nw(speeches, subtitles, logger=self._logger) nw.pretty_print_grid(nw_alignment) assert nw_alignment.subtitles[0].character is "CHAR0"
def test_unseperated_words_long(self): s1 = "Developed by Master Wuxi in the Third Dynasty" s2 = "DevelopedbyMasterWuxiInTheThirdDynasty" subtitles = mock_subtitles([s1]) speeches = mock_speeches([("CHAR0", s2)]) dtw_alignment = self._sut_dtw(speeches, subtitles, self._logger, distance_function=dtw.binary_distance) dtw.pretty_print_grid(dtw_alignment) assert dtw_alignment.subtitles[0].character is None seperator() subtitles = mock_subtitles([s1]) speeches = mock_speeches([("CHAR0", s2)]) nw_alignment = self._sut_nw(speeches, subtitles, logger=self._logger) nw.pretty_print_grid(nw_alignment) assert nw_alignment.subtitles[0].character is "CHAR0"