def test_1(self): subtitles = [ Subtitle(0, 0, "hello my name is micha"), Subtitle(0, 0, "bla my bla is micha"), Subtitle(0, 0, "he is up"), ] script_entities = [ ScriptEntity( **{ "character": "CHAR0", "text": "hello my name is michi", "type": "speech" }), ScriptEntity(**{ "character": "CHAR1", "text": "who he is", "type": "speech" }), ] alignment = self._sut(script_entities, subtitles, self._logger, dtw_merger.binary_distance, verbose=True) dtw_merger.pretty_print_grid(alignment)
def test_character_detection(self): subtitles = [ Subtitle(0, 0, "Hello sir"), Subtitle(0, 0, "my name is micha"), ] script_entities = [ ScriptEntity(**{ "character": "CHAR0", "text": "Hallo Mr", "type": "speech" }), ScriptEntity(**{ "character": "CHAR1", "text": "my name is michi", "type": "speech" }), ] alignment = self._sut(script_entities, subtitles, logger=self._logger) nw.pretty_print_grid(alignment) assert len(alignment.subtitles) == 2 assert alignment.subtitles[0].character == "CHAR0" assert alignment.subtitles[1].character == "CHAR1"
def run(url): script_text, encoding = load(url) parsed_entities = analyze_content(script_text, encoding) script = clean_script(parsed_entities) entities = [ScriptEntity.from_dict(d) for d in script] return entities
def test_2(self): subtitles = [ Subtitle(0, 0, "It's cute?"), ] script_entities = [ ScriptEntity("CHAR0", "I sort of like it. I mean, it's cute."), ScriptEntity("CHAR1", "Cute?"), ] alignment = self._sut(script_entities, subtitles, self._logger, dtw.levenstein_distance, verbose=True) dtw.pretty_print_grid(alignment) assert alignment.subtitles[0].character == "CHAR0"
def characters(ctx, subtitles_path, movie_script, algorithm, partial): """Merges movie scripts and subtitles.""" project = ctx.obj[PROJECT_KEY] logger = ctx.obj[LOGGER_KEY] partial = 1 if partial is None else partial if subtitles_path: subtitles = Subtitle.from_dicts(path_utils.load_json(subtitles_path)) elif SUBTITLES_KEY in ctx.obj: subtitles = ctx.obj[SUBTITLES_KEY] else: data = project.read(Project.File.subtitles) subtitles = Subtitle.from_dicts(data) if movie_script is not None: obj = path_utils.load_json(movie_script) speeches = ScriptEntity.from_dicts(obj) elif SCRIPT_KEY in ctx.obj: speeches = ctx.obj[SCRIPT_KEY] else: data = project.read(Project.File.script) data = script_parser.clean_script( data) # remove when no longer required speeches = ScriptEntity.from_dicts(data) speeches = [speech for speech in speeches if speech.type == "speech"] if algorithm == CharactersAlgorithm.dtw.value: alignment = dtw_merger.run(speeches, subtitles, partial, logger, ctx.obj[VERBOSE_KEY]) elif algorithm == CharactersAlgorithm.nw.value: alignment = needleman_wunsch.run(speeches, subtitles, partial, logger, ctx.obj[VERBOSE_KEY]) else: alignment = dtw_merger.run(speeches, subtitles, partial, logger, ctx.obj[VERBOSE_KEY]) merged_subtitles = alignment.subtitles ctx.obj[SUBTITLES_KEY] = merged_subtitles data = objects_as_dict(merged_subtitles) data and project.write(data, Project.File.merged_subtitles) logger.write()
def test_1(self): subtitles = [ Subtitle(0, 0, "You're playing music?"), ] script_entities = [ ScriptEntity( "CHAR0", "Yeah, they really want you... they really want you... they really do." ), ScriptEntity("CHAR1", "You guys are playing music?") ] alignment = self._sut(script_entities, subtitles, self._logger, dtw.levenstein_distance, verbose=True) dtw.pretty_print_grid(alignment) assert alignment.subtitles[0].character == "CHAR1"
def export(ctx, subtitles, movie_script): """Export merge script and subtitles. (Testing)""" project = ctx.obj[PROJECT_KEY] if subtitles: data = project.read(Project.File.merged_subtitles) subtitles = Subtitle.from_dicts(data) export_subtitles(project, subtitles) if movie_script: data = project.read(Project.File.script) script = ScriptEntity.from_dicts(data) export_script(project, script)
def screenplay(ctx, url): """Parses movie scripts.""" project = ctx.obj[PROJECT_KEY] if url: entities = script_parser.run(url) data = objects_as_dict(entities) data and project.write(data, Project.File.script) else: result = project.read(Project.File.script) entities = ScriptEntity.from_dicts(result) ctx.obj[VERBOSE_KEY] and pprint(entities) ctx.obj[SCRIPT_KEY] = entities
def test_punctuation_removal(self): s1 = [Subtitle(0, 0, "m.")] s2 = [ ScriptEntity(**{ "character": "CHAR0", "text": ".m", "type": "speech" }) ] alignment = self._sut(s1, s2, logger=self._logger) assert len(alignment.vertical_index) == 1 assert len(alignment.horizontal_index) == 1
def test_bio_string(self): subtitles = [ Subtitle(0, 0, "GCATGCU"), ] script_entities = [ ScriptEntity(**{ "character": "CHAR0", "text": "GATTACA", "type": "speech" }), ] alignment = self._sut(script_entities, subtitles, logger=self._logger) nw.pretty_print_grid(alignment)
def test_split_words(self): subtitles = [ Subtitle(0, 0, "Millage Ville, Georgia."), ] script_entities = [ ScriptEntity( **{ "character": "CHAR0", "text": "Milledgeville, Georgia", "type": "speech" }), ] alignment = self._sut(script_entities, subtitles, logger=self._logger) assert alignment.subtitles[0].character == "CHAR0"
def test_bio_string_2(self): subtitles = [ Subtitle(0, 0, "CNJRQCLU"), ] script_entities = [ ScriptEntity(**{ "character": "CHAR0", "text": "CJRQDLN", "type": "speech" }), ] alignment = self._sut(script_entities, subtitles, Weighting(1, -1, AdaptiveGapPenalty(-5, -1)), logger=self._logger) nw.pretty_print_grid(alignment)
def test_3(self): subtitles = [ Subtitle(0, 0, "hello what are we doing"), ] script_entities = [ ScriptEntity( **{ "character": "CHAR0", "text": "hello what is it we do", "type": "speech" }), ] alignment = self._sut(script_entities, subtitles, self._logger, dtw_merger.binary_distance, verbose=True) dtw_merger.pretty_print_grid(alignment)
def test_2(self): subtitles = [ Subtitle(0, 0, "hello my name is micha"), ] script_entities = [ ScriptEntity( **{ "character": "CHAR0", "text": "hello my name micha is", "type": "speech" }), ] alignment = self._sut(script_entities, subtitles, self._logger, dtw_merger.levenstein_distance, verbose=True) dtw_merger.pretty_print_grid(alignment)
def mock_speeches(char_text_list): return [ScriptEntity(char, text) for char, text in char_text_list]
def convert_to_script_entities(script): script_entities = [ ScriptEntity.from_dict(script_dict) for script_dict in script ] return list(filter(lambda x: x.type == "speech", script_entities))