def test_read_from_file(self): os.write( self.script_description, """1 00:00:17,500 --> 00:00:18,870 Yeah, really! 2 00:00:17,500 --> 00:00:18,870 3 00:00:17,500 --> 00:00:18,870 House number 35 4 00:00:21,250 --> 00:00:22,750 Serves you right.""") parsed = SrtScript.from_file(self.script_path).events self.assertEquals(17.5, parsed[0].start) self.assertEquals(18.87, parsed[0].end) self.assertEquals("Yeah, really!", parsed[0].text) self.assertEquals(17.5, parsed[1].start) self.assertEquals(18.87, parsed[1].end) self.assertEquals("", parsed[1].text) self.assertEquals(17.5, parsed[2].start) self.assertEquals(18.87, parsed[2].end) self.assertEquals("House number\n35", parsed[2].text) self.assertEquals(21.25, parsed[3].start) self.assertEquals(22.75, parsed[3].end) self.assertEquals("Serves you right.", parsed[3].text)
def test_read_from_file(self): os.write(self.script_description, """1 00:00:17,500 --> 00:00:18,870 Yeah, really! 2 00:00:17,500 --> 00:00:18,870 3 00:00:17,500 --> 00:00:18,870 House number 35 4 00:00:21,250 --> 00:00:22,750 Serves you right.""") parsed = SrtScript.from_file(self.script_path).events self.assertEquals(17.5, parsed[0].start) self.assertEquals(18.87, parsed[0].end) self.assertEquals("Yeah, really!", parsed[0].text) self.assertEquals(17.5, parsed[1].start) self.assertEquals(18.87, parsed[1].end) self.assertEquals("", parsed[1].text) self.assertEquals(17.5, parsed[2].start) self.assertEquals(18.87, parsed[2].end) self.assertEquals("House number\n35", parsed[2].text) self.assertEquals(21.25, parsed[3].start) self.assertEquals(22.75, parsed[3].end) self.assertEquals("Serves you right.", parsed[3].text)
def run(args): ignore_chapters = args.chapters_file is not None and args.chapters_file.lower() == 'none' write_plot = plot_enabled and args.plot_path if write_plot: plt.clf() plt.ylabel('Shift, seconds') plt.xlabel('Event index') # first part should do all possible validation and should NOT take significant amount of time check_file_exists(args.source, 'Source') check_file_exists(args.destination, 'Destination') check_file_exists(args.src_timecodes, 'Source timecodes') check_file_exists(args.dst_timecodes, 'Source timecodes') check_file_exists(args.script_file, 'Script') if not ignore_chapters: check_file_exists(args.chapters_file, 'Chapters') if args.src_keyframes not in ('auto', 'make'): check_file_exists(args.src_keyframes, 'Source keyframes') if args.dst_keyframes not in ('auto', 'make'): check_file_exists(args.dst_keyframes, 'Destination keyframes') if (args.src_timecodes and args.src_fps) or (args.dst_timecodes and args.dst_fps): raise SushiError('Both fps and timecodes file cannot be specified at the same time') src_demuxer = Demuxer(args.source) dst_demuxer = Demuxer(args.destination) if src_demuxer.is_wav and not args.script_file: raise SushiError("Script file isn't specified") if (args.src_keyframes and not args.dst_keyframes) or (args.dst_keyframes and not args.src_keyframes): raise SushiError('Either none or both of src and dst keyframes should be provided') create_directory_if_not_exists(args.temp_dir) # selecting source audio if src_demuxer.is_wav: src_audio_path = args.source else: src_audio_path = format_full_path(args.temp_dir, args.source, '.sushi.wav') src_demuxer.set_audio(stream_idx=args.src_audio_idx, output_path=src_audio_path, sample_rate=args.sample_rate) # selecting destination audio if dst_demuxer.is_wav: dst_audio_path = args.destination else: dst_audio_path = format_full_path(args.temp_dir, args.destination, '.sushi.wav') dst_demuxer.set_audio(stream_idx=args.dst_audio_idx, output_path=dst_audio_path, sample_rate=args.sample_rate) # selecting source subtitles if args.script_file: src_script_path = args.script_file else: stype = src_demuxer.get_subs_type(args.src_script_idx) src_script_path = format_full_path(args.temp_dir, args.source, '.sushi'+ stype) src_demuxer.set_script(stream_idx=args.src_script_idx, output_path=src_script_path) script_extension = get_extension(src_script_path) if script_extension not in ('.ass', '.srt'): raise SushiError('Unknown script type') # selection destination subtitles if args.output_script: dst_script_path = args.output_script dst_script_extension = get_extension(args.output_script) if dst_script_extension != script_extension: raise SushiError("Source and destination script file types don't match ({0} vs {1})" .format(script_extension, dst_script_extension)) else: dst_script_path = format_full_path(args.temp_dir, args.destination, '.sushi' + script_extension) # selecting chapters if args.grouping and not ignore_chapters: if args.chapters_file: if get_extension(args.chapters_file) == '.xml': chapter_times = chapters.get_xml_start_times(args.chapters_file) else: chapter_times = chapters.get_ogm_start_times(args.chapters_file) elif not src_demuxer.is_wav: chapter_times = src_demuxer.chapters output_path = format_full_path(args.temp_dir, src_demuxer.path, ".sushi.chapters.txt") src_demuxer.set_chapters(output_path) else: chapter_times = [] else: chapter_times = [] # selecting keyframes and timecodes if args.src_keyframes: def select_keyframes(file_arg, demuxer): auto_file = format_full_path(args.temp_dir, demuxer.path, '.sushi.keyframes.txt') if file_arg in ('auto', 'make'): if file_arg == 'make' or not os.path.exists(auto_file): if not demuxer.has_video: raise SushiError("Cannot make keyframes for {0} because it doesn't have any video!" .format(demuxer.path)) demuxer.set_keyframes(output_path=auto_file) return auto_file else: return file_arg def select_timecodes(external_file, fps_arg, demuxer): if external_file: return external_file elif fps_arg: return None elif demuxer.has_video: path = format_full_path(args.temp_dir, demuxer.path, '.sushi.timecodes.txt') demuxer.set_timecodes(output_path=path) return path else: raise SushiError('Fps, timecodes or video files must be provided if keyframes are used') src_keyframes_file = select_keyframes(args.src_keyframes, src_demuxer) dst_keyframes_file = select_keyframes(args.dst_keyframes, dst_demuxer) src_timecodes_file = select_timecodes(args.src_timecodes, args.src_fps, src_demuxer) dst_timecodes_file = select_timecodes(args.dst_timecodes, args.dst_fps, dst_demuxer) # after this point nothing should fail so it's safe to start slow operations # like running the actual demuxing src_demuxer.demux() dst_demuxer.demux() try: if args.src_keyframes: src_timecodes = Timecodes.cfr(args.src_fps) if args.src_fps else Timecodes.from_file(src_timecodes_file) src_keytimes = [src_timecodes.get_frame_time(f) for f in keyframes.parse_keyframes(src_keyframes_file)] dst_timecodes = Timecodes.cfr(args.dst_fps) if args.dst_fps else Timecodes.from_file(dst_timecodes_file) dst_keytimes = [dst_timecodes.get_frame_time(f) for f in keyframes.parse_keyframes(dst_keyframes_file)] script = AssScript.from_file(src_script_path) if script_extension == '.ass' else SrtScript.from_file(src_script_path) script.sort_by_time() src_stream = WavStream(src_audio_path, sample_rate=args.sample_rate, sample_type=args.sample_type) dst_stream = WavStream(dst_audio_path, sample_rate=args.sample_rate, sample_type=args.sample_type) search_groups = prepare_search_groups(script.events, source_duration=src_stream.duration_seconds, chapter_times=chapter_times, max_ts_duration=args.max_ts_duration, max_ts_distance=args.max_ts_distance) calculate_shifts(src_stream, dst_stream, search_groups, normal_window=args.window, max_window=args.max_window, rewind_thresh=args.rewind_thresh if args.grouping else 0) events = script.events if write_plot: plt.plot([x.shift for x in events], label='From audio') if args.grouping: if not ignore_chapters and chapter_times: groups = groups_from_chapters(events, chapter_times) for g in groups: fix_near_borders(g) smooth_events([x for x in g if not x.linked], args.smooth_radius) groups = split_broken_groups(groups) else: fix_near_borders(events) smooth_events([x for x in events if not x.linked], args.smooth_radius) groups = detect_groups(events) if write_plot: plt.plot([x.shift for x in events], label='Borders fixed') for g in groups: start_shift = g[0].shift end_shift = g[-1].shift avg_shift = average_shifts(g) logging.info(u'Group (start: {0}, end: {1}, lines: {2}), ' u'shifts (start: {3}, end: {4}, average: {5})' .format(format_time(g[0].start), format_time(g[-1].end), len(g), start_shift, end_shift, avg_shift)) if args.src_keyframes: for e in (x for x in events if x.linked): e.resolve_link() for g in groups: snap_groups_to_keyframes(g, chapter_times, args.max_ts_duration, args.max_ts_distance, src_keytimes, dst_keytimes, src_timecodes, dst_timecodes, args.max_kf_distance, args.kf_mode) else: fix_near_borders(events) if write_plot: plt.plot([x.shift for x in events], label='Borders fixed') if args.src_keyframes: for e in (x for x in events if x.linked): e.resolve_link() snap_groups_to_keyframes(events, chapter_times, args.max_ts_duration, args.max_ts_distance, src_keytimes, dst_keytimes, src_timecodes, dst_timecodes, args.max_kf_distance, args.kf_mode) for event in events: event.apply_shift() script.save_to_file(dst_script_path) if write_plot: plt.plot([x.shift + (x._start_shift + x._end_shift)/2.0 for x in events], label='After correction') plt.legend(fontsize=5, frameon=False, fancybox=False) plt.savefig(args.plot_path, dpi=300) finally: if args.cleanup: src_demuxer.cleanup() dst_demuxer.cleanup()
def run(args): ignore_chapters = args.chapters_file is not None and args.chapters_file.lower( ) == 'none' write_plot = plot_enabled and args.plot_path if write_plot: plt.clf() plt.ylabel('Shift, seconds') plt.xlabel('Event index') # first part should do all possible validation and should NOT take significant amount of time check_file_exists(args.source, 'Source') check_file_exists(args.destination, 'Destination') check_file_exists(args.src_timecodes, 'Source timecodes') check_file_exists(args.dst_timecodes, 'Source timecodes') check_file_exists(args.script_file, 'Script') if not ignore_chapters: check_file_exists(args.chapters_file, 'Chapters') if args.src_keyframes not in ('auto', 'make'): check_file_exists(args.src_keyframes, 'Source keyframes') if args.dst_keyframes not in ('auto', 'make'): check_file_exists(args.dst_keyframes, 'Destination keyframes') if (args.src_timecodes and args.src_fps) or (args.dst_timecodes and args.dst_fps): raise SushiError( 'Both fps and timecodes file cannot be specified at the same time') src_demuxer = Demuxer(args.source) dst_demuxer = Demuxer(args.destination) if src_demuxer.is_wav and not args.script_file: raise SushiError("Script file isn't specified") if (args.src_keyframes and not args.dst_keyframes) or (args.dst_keyframes and not args.src_keyframes): raise SushiError( 'Either none or both of src and dst keyframes should be provided') create_directory_if_not_exists(args.temp_dir) # selecting source audio if src_demuxer.is_wav: src_audio_path = args.source else: src_audio_path = format_full_path(args.temp_dir, args.source, '.sushi.wav') src_demuxer.set_audio(stream_idx=args.src_audio_idx, output_path=src_audio_path, sample_rate=args.sample_rate) # selecting destination audio if dst_demuxer.is_wav: dst_audio_path = args.destination else: dst_audio_path = format_full_path(args.temp_dir, args.destination, '.sushi.wav') dst_demuxer.set_audio(stream_idx=args.dst_audio_idx, output_path=dst_audio_path, sample_rate=args.sample_rate) # selecting source subtitles if args.script_file: src_script_path = args.script_file else: stype = src_demuxer.get_subs_type(args.src_script_idx) src_script_path = format_full_path(args.temp_dir, args.source, '.sushi' + stype) src_demuxer.set_script(stream_idx=args.src_script_idx, output_path=src_script_path) script_extension = get_extension(src_script_path) if script_extension not in ('.ass', '.srt'): raise SushiError('Unknown script type') # selection destination subtitles if args.output_script: dst_script_path = args.output_script dst_script_extension = get_extension(args.output_script) if dst_script_extension != script_extension: raise SushiError( "Source and destination script file types don't match ({0} vs {1})" .format(script_extension, dst_script_extension)) else: dst_script_path = format_full_path(args.temp_dir, args.destination, '.sushi' + script_extension) # selecting chapters if args.grouping and not ignore_chapters: if args.chapters_file: if get_extension(args.chapters_file) == '.xml': chapter_times = chapters.get_xml_start_times( args.chapters_file) else: chapter_times = chapters.get_ogm_start_times( args.chapters_file) elif not src_demuxer.is_wav: chapter_times = src_demuxer.chapters output_path = format_full_path(args.temp_dir, src_demuxer.path, ".sushi.chapters.txt") src_demuxer.set_chapters(output_path) else: chapter_times = [] else: chapter_times = [] # selecting keyframes and timecodes if args.src_keyframes: def select_keyframes(file_arg, demuxer): auto_file = format_full_path(args.temp_dir, demuxer.path, '.sushi.keyframes.txt') if file_arg in ('auto', 'make'): if file_arg == 'make' or not os.path.exists(auto_file): if not demuxer.has_video: raise SushiError( "Cannot make keyframes for {0} because it doesn't have any video!" .format(demuxer.path)) demuxer.set_keyframes(output_path=auto_file) return auto_file else: return file_arg def select_timecodes(external_file, fps_arg, demuxer): if external_file: return external_file elif fps_arg: return None elif demuxer.has_video: path = format_full_path(args.temp_dir, demuxer.path, '.sushi.timecodes.txt') demuxer.set_timecodes(output_path=path) return path else: raise SushiError( 'Fps, timecodes or video files must be provided if keyframes are used' ) src_keyframes_file = select_keyframes(args.src_keyframes, src_demuxer) dst_keyframes_file = select_keyframes(args.dst_keyframes, dst_demuxer) src_timecodes_file = select_timecodes(args.src_timecodes, args.src_fps, src_demuxer) dst_timecodes_file = select_timecodes(args.dst_timecodes, args.dst_fps, dst_demuxer) # after this point nothing should fail so it's safe to start slow operations # like running the actual demuxing src_demuxer.demux() dst_demuxer.demux() try: if args.src_keyframes: src_timecodes = Timecodes.cfr( args.src_fps) if args.src_fps else Timecodes.from_file( src_timecodes_file) src_keytimes = [ src_timecodes.get_frame_time(f) for f in parse_keyframes(src_keyframes_file) ] dst_timecodes = Timecodes.cfr( args.dst_fps) if args.dst_fps else Timecodes.from_file( dst_timecodes_file) dst_keytimes = [ dst_timecodes.get_frame_time(f) for f in parse_keyframes(dst_keyframes_file) ] script = AssScript.from_file( src_script_path ) if script_extension == '.ass' else SrtScript.from_file( src_script_path) script.sort_by_time() src_stream = WavStream(src_audio_path, sample_rate=args.sample_rate, sample_type=args.sample_type) dst_stream = WavStream(dst_audio_path, sample_rate=args.sample_rate, sample_type=args.sample_type) calculate_shifts( src_stream, dst_stream, script.events, chapter_times=chapter_times, window=args.window, max_window=args.max_window, rewind_thresh=args.rewind_thresh if args.grouping else 0, max_ts_duration=args.max_ts_duration, max_ts_distance=args.max_ts_distance) events = script.events if write_plot: plt.plot([x.shift for x in events], label='From audio') if args.grouping: if not ignore_chapters and chapter_times: groups = groups_from_chapters(events, chapter_times) for g in groups: fix_near_borders(g) smooth_events([x for x in g if not x.linked], args.smooth_radius) groups = split_broken_groups(groups, args.min_group_size) else: fix_near_borders(events) smooth_events([x for x in events if not x.linked], args.smooth_radius) groups = detect_groups(events, args.min_group_size) if write_plot: plt.plot([x.shift for x in events], label='Borders fixed') for g in groups: start_shift = g[0].shift end_shift = g[-1].shift avg_shift = average_shifts(g) logging.info( u'Group (start: {0}, end: {1}, lines: {2}), ' u'shifts (start: {3}, end: {4}, average: {5})'.format( format_time(g[0].start), format_time(g[-1].end), len(g), start_shift, end_shift, avg_shift)) if args.src_keyframes: for e in (x for x in events if x.linked): e.resolve_link() for g in groups: snap_groups_to_keyframes( g, chapter_times, args.max_ts_duration, args.max_ts_distance, src_keytimes, dst_keytimes, src_timecodes, dst_timecodes, args.max_kf_distance, args.kf_mode) if args.write_avs: write_shift_avs(dst_script_path + '.avs', groups, src_audio_path, dst_audio_path) else: fix_near_borders(events) if write_plot: plt.plot([x.shift for x in events], label='Borders fixed') if args.src_keyframes: for e in (x for x in events if x.linked): e.resolve_link() snap_groups_to_keyframes(events, chapter_times, args.max_ts_duration, args.max_ts_distance, src_keytimes, dst_keytimes, src_timecodes, dst_timecodes, args.max_kf_distance, args.kf_mode) for event in events: event.apply_shift() script.save_to_file(dst_script_path) if write_plot: plt.plot([ x.shift + (x._start_shift + x._end_shift) / 2.0 for x in events ], label='After correction') plt.legend(fontsize=5, frameon=False, fancybox=False) plt.savefig(args.plot_path, dpi=300) finally: if args.cleanup: src_demuxer.cleanup() dst_demuxer.cleanup()