示例#1
0
    def test_read_from_file(self):
        os.write(
            self.script_description, """1
00:00:17,500 --> 00:00:18,870
Yeah, really!

2
00:00:17,500 --> 00:00:18,870


3
00:00:17,500 --> 00:00:18,870
House number
35

4
00:00:21,250 --> 00:00:22,750
Serves you right.""")
        parsed = SrtScript.from_file(self.script_path).events
        self.assertEquals(17.5, parsed[0].start)
        self.assertEquals(18.87, parsed[0].end)
        self.assertEquals("Yeah, really!", parsed[0].text)
        self.assertEquals(17.5, parsed[1].start)
        self.assertEquals(18.87, parsed[1].end)
        self.assertEquals("", parsed[1].text)
        self.assertEquals(17.5, parsed[2].start)
        self.assertEquals(18.87, parsed[2].end)
        self.assertEquals("House number\n35", parsed[2].text)
        self.assertEquals(21.25, parsed[3].start)
        self.assertEquals(22.75, parsed[3].end)
        self.assertEquals("Serves you right.", parsed[3].text)
示例#2
0
    def test_read_from_file(self):
        os.write(self.script_description, """1
00:00:17,500 --> 00:00:18,870
Yeah, really!

2
00:00:17,500 --> 00:00:18,870


3
00:00:17,500 --> 00:00:18,870
House number
35

4
00:00:21,250 --> 00:00:22,750
Serves you right.""")
        parsed = SrtScript.from_file(self.script_path).events
        self.assertEquals(17.5, parsed[0].start)
        self.assertEquals(18.87, parsed[0].end)
        self.assertEquals("Yeah, really!", parsed[0].text)
        self.assertEquals(17.5, parsed[1].start)
        self.assertEquals(18.87, parsed[1].end)
        self.assertEquals("", parsed[1].text)
        self.assertEquals(17.5, parsed[2].start)
        self.assertEquals(18.87, parsed[2].end)
        self.assertEquals("House number\n35", parsed[2].text)
        self.assertEquals(21.25, parsed[3].start)
        self.assertEquals(22.75, parsed[3].end)
        self.assertEquals("Serves you right.", parsed[3].text)
示例#3
0
文件: sushi.py 项目: tp7/Sushi
def run(args):
    ignore_chapters = args.chapters_file is not None and args.chapters_file.lower() == 'none'
    write_plot = plot_enabled and args.plot_path
    if write_plot:
        plt.clf()
        plt.ylabel('Shift, seconds')
        plt.xlabel('Event index')

    # first part should do all possible validation and should NOT take significant amount of time
    check_file_exists(args.source, 'Source')
    check_file_exists(args.destination, 'Destination')
    check_file_exists(args.src_timecodes, 'Source timecodes')
    check_file_exists(args.dst_timecodes, 'Source timecodes')
    check_file_exists(args.script_file, 'Script')

    if not ignore_chapters:
        check_file_exists(args.chapters_file, 'Chapters')
    if args.src_keyframes not in ('auto', 'make'):
        check_file_exists(args.src_keyframes, 'Source keyframes')
    if args.dst_keyframes not in ('auto', 'make'):
        check_file_exists(args.dst_keyframes, 'Destination keyframes')

    if (args.src_timecodes and args.src_fps) or (args.dst_timecodes and args.dst_fps):
        raise SushiError('Both fps and timecodes file cannot be specified at the same time')

    src_demuxer = Demuxer(args.source)
    dst_demuxer = Demuxer(args.destination)

    if src_demuxer.is_wav and not args.script_file:
        raise SushiError("Script file isn't specified")

    if (args.src_keyframes and not args.dst_keyframes) or (args.dst_keyframes and not args.src_keyframes):
        raise SushiError('Either none or both of src and dst keyframes should be provided')

    create_directory_if_not_exists(args.temp_dir)

    # selecting source audio
    if src_demuxer.is_wav:
        src_audio_path = args.source
    else:
        src_audio_path = format_full_path(args.temp_dir, args.source, '.sushi.wav')
        src_demuxer.set_audio(stream_idx=args.src_audio_idx, output_path=src_audio_path, sample_rate=args.sample_rate)

    # selecting destination audio
    if dst_demuxer.is_wav:
        dst_audio_path = args.destination
    else:
        dst_audio_path = format_full_path(args.temp_dir, args.destination, '.sushi.wav')
        dst_demuxer.set_audio(stream_idx=args.dst_audio_idx, output_path=dst_audio_path, sample_rate=args.sample_rate)

    # selecting source subtitles
    if args.script_file:
        src_script_path = args.script_file
    else:
        stype = src_demuxer.get_subs_type(args.src_script_idx)
        src_script_path = format_full_path(args.temp_dir, args.source, '.sushi'+ stype)
        src_demuxer.set_script(stream_idx=args.src_script_idx, output_path=src_script_path)

    script_extension = get_extension(src_script_path)
    if script_extension not in ('.ass', '.srt'):
        raise SushiError('Unknown script type')

    # selection destination subtitles
    if args.output_script:
        dst_script_path = args.output_script
        dst_script_extension = get_extension(args.output_script)
        if dst_script_extension != script_extension:
            raise SushiError("Source and destination script file types don't match ({0} vs {1})"
                             .format(script_extension, dst_script_extension))
    else:
        dst_script_path = format_full_path(args.temp_dir, args.destination, '.sushi' + script_extension)

    # selecting chapters
    if args.grouping and not ignore_chapters:
        if args.chapters_file:
            if get_extension(args.chapters_file) == '.xml':
                chapter_times = chapters.get_xml_start_times(args.chapters_file)
            else:
                chapter_times = chapters.get_ogm_start_times(args.chapters_file)
        elif not src_demuxer.is_wav:
            chapter_times = src_demuxer.chapters
            output_path = format_full_path(args.temp_dir, src_demuxer.path, ".sushi.chapters.txt")
            src_demuxer.set_chapters(output_path)
        else:
            chapter_times = []
    else:
        chapter_times = []

    # selecting keyframes and timecodes
    if args.src_keyframes:
        def select_keyframes(file_arg, demuxer):
            auto_file = format_full_path(args.temp_dir, demuxer.path, '.sushi.keyframes.txt')
            if file_arg in ('auto', 'make'):
                if file_arg == 'make' or not os.path.exists(auto_file):
                    if not demuxer.has_video:
                        raise SushiError("Cannot make keyframes for {0} because it doesn't have any video!"
                                         .format(demuxer.path))
                    demuxer.set_keyframes(output_path=auto_file)
                return auto_file
            else:
                return file_arg

        def select_timecodes(external_file, fps_arg, demuxer):
            if external_file:
                return external_file
            elif fps_arg:
                return None
            elif demuxer.has_video:
                path = format_full_path(args.temp_dir, demuxer.path, '.sushi.timecodes.txt')
                demuxer.set_timecodes(output_path=path)
                return path
            else:
                raise SushiError('Fps, timecodes or video files must be provided if keyframes are used')

        src_keyframes_file = select_keyframes(args.src_keyframes, src_demuxer)
        dst_keyframes_file = select_keyframes(args.dst_keyframes, dst_demuxer)
        src_timecodes_file = select_timecodes(args.src_timecodes, args.src_fps, src_demuxer)
        dst_timecodes_file = select_timecodes(args.dst_timecodes, args.dst_fps, dst_demuxer)

    # after this point nothing should fail so it's safe to start slow operations
    # like running the actual demuxing
    src_demuxer.demux()
    dst_demuxer.demux()

    try:
        if args.src_keyframes:
            src_timecodes = Timecodes.cfr(args.src_fps) if args.src_fps else Timecodes.from_file(src_timecodes_file)
            src_keytimes = [src_timecodes.get_frame_time(f) for f in keyframes.parse_keyframes(src_keyframes_file)]

            dst_timecodes = Timecodes.cfr(args.dst_fps) if args.dst_fps else Timecodes.from_file(dst_timecodes_file)
            dst_keytimes = [dst_timecodes.get_frame_time(f) for f in keyframes.parse_keyframes(dst_keyframes_file)]

        script = AssScript.from_file(src_script_path) if script_extension == '.ass' else SrtScript.from_file(src_script_path)
        script.sort_by_time()

        src_stream = WavStream(src_audio_path, sample_rate=args.sample_rate, sample_type=args.sample_type)
        dst_stream = WavStream(dst_audio_path, sample_rate=args.sample_rate, sample_type=args.sample_type)

        search_groups = prepare_search_groups(script.events,
                                              source_duration=src_stream.duration_seconds,
                                              chapter_times=chapter_times,
                                              max_ts_duration=args.max_ts_duration,
                                              max_ts_distance=args.max_ts_distance)

        calculate_shifts(src_stream, dst_stream, search_groups,
                         normal_window=args.window,
                         max_window=args.max_window,
                         rewind_thresh=args.rewind_thresh if args.grouping else 0)

        events = script.events

        if write_plot:
            plt.plot([x.shift for x in events], label='From audio')

        if args.grouping:
            if not ignore_chapters and chapter_times:
                groups = groups_from_chapters(events, chapter_times)
                for g in groups:
                    fix_near_borders(g)
                    smooth_events([x for x in g if not x.linked], args.smooth_radius)
                groups = split_broken_groups(groups)
            else:
                fix_near_borders(events)
                smooth_events([x for x in events if not x.linked], args.smooth_radius)
                groups = detect_groups(events)

            if write_plot:
                plt.plot([x.shift for x in events], label='Borders fixed')

            for g in groups:
                start_shift = g[0].shift
                end_shift = g[-1].shift
                avg_shift = average_shifts(g)
                logging.info(u'Group (start: {0}, end: {1}, lines: {2}), '
                             u'shifts (start: {3}, end: {4}, average: {5})'
                             .format(format_time(g[0].start), format_time(g[-1].end), len(g), start_shift, end_shift,
                                     avg_shift))

            if args.src_keyframes:
                for e in (x for x in events if x.linked):
                    e.resolve_link()
                for g in groups:
                    snap_groups_to_keyframes(g, chapter_times, args.max_ts_duration, args.max_ts_distance, src_keytimes,
                                             dst_keytimes, src_timecodes, dst_timecodes, args.max_kf_distance, args.kf_mode)
        else:
            fix_near_borders(events)
            if write_plot:
                plt.plot([x.shift for x in events], label='Borders fixed')

            if args.src_keyframes:
                for e in (x for x in events if x.linked):
                    e.resolve_link()
                snap_groups_to_keyframes(events, chapter_times, args.max_ts_duration, args.max_ts_distance, src_keytimes,
                                         dst_keytimes, src_timecodes, dst_timecodes, args.max_kf_distance, args.kf_mode)

        for event in events:
            event.apply_shift()

        script.save_to_file(dst_script_path)

        if write_plot:
            plt.plot([x.shift + (x._start_shift + x._end_shift)/2.0 for x in events], label='After correction')
            plt.legend(fontsize=5, frameon=False, fancybox=False)
            plt.savefig(args.plot_path, dpi=300)

    finally:
        if args.cleanup:
            src_demuxer.cleanup()
            dst_demuxer.cleanup()
示例#4
0
def run(args):
    ignore_chapters = args.chapters_file is not None and args.chapters_file.lower(
    ) == 'none'
    write_plot = plot_enabled and args.plot_path
    if write_plot:
        plt.clf()
        plt.ylabel('Shift, seconds')
        plt.xlabel('Event index')

    # first part should do all possible validation and should NOT take significant amount of time
    check_file_exists(args.source, 'Source')
    check_file_exists(args.destination, 'Destination')
    check_file_exists(args.src_timecodes, 'Source timecodes')
    check_file_exists(args.dst_timecodes, 'Source timecodes')
    check_file_exists(args.script_file, 'Script')

    if not ignore_chapters:
        check_file_exists(args.chapters_file, 'Chapters')
    if args.src_keyframes not in ('auto', 'make'):
        check_file_exists(args.src_keyframes, 'Source keyframes')
    if args.dst_keyframes not in ('auto', 'make'):
        check_file_exists(args.dst_keyframes, 'Destination keyframes')

    if (args.src_timecodes and args.src_fps) or (args.dst_timecodes
                                                 and args.dst_fps):
        raise SushiError(
            'Both fps and timecodes file cannot be specified at the same time')

    src_demuxer = Demuxer(args.source)
    dst_demuxer = Demuxer(args.destination)

    if src_demuxer.is_wav and not args.script_file:
        raise SushiError("Script file isn't specified")

    if (args.src_keyframes
            and not args.dst_keyframes) or (args.dst_keyframes
                                            and not args.src_keyframes):
        raise SushiError(
            'Either none or both of src and dst keyframes should be provided')

    create_directory_if_not_exists(args.temp_dir)

    # selecting source audio
    if src_demuxer.is_wav:
        src_audio_path = args.source
    else:
        src_audio_path = format_full_path(args.temp_dir, args.source,
                                          '.sushi.wav')
        src_demuxer.set_audio(stream_idx=args.src_audio_idx,
                              output_path=src_audio_path,
                              sample_rate=args.sample_rate)

    # selecting destination audio
    if dst_demuxer.is_wav:
        dst_audio_path = args.destination
    else:
        dst_audio_path = format_full_path(args.temp_dir, args.destination,
                                          '.sushi.wav')
        dst_demuxer.set_audio(stream_idx=args.dst_audio_idx,
                              output_path=dst_audio_path,
                              sample_rate=args.sample_rate)

    # selecting source subtitles
    if args.script_file:
        src_script_path = args.script_file
    else:
        stype = src_demuxer.get_subs_type(args.src_script_idx)
        src_script_path = format_full_path(args.temp_dir, args.source,
                                           '.sushi' + stype)
        src_demuxer.set_script(stream_idx=args.src_script_idx,
                               output_path=src_script_path)

    script_extension = get_extension(src_script_path)
    if script_extension not in ('.ass', '.srt'):
        raise SushiError('Unknown script type')

    # selection destination subtitles
    if args.output_script:
        dst_script_path = args.output_script
        dst_script_extension = get_extension(args.output_script)
        if dst_script_extension != script_extension:
            raise SushiError(
                "Source and destination script file types don't match ({0} vs {1})"
                .format(script_extension, dst_script_extension))
    else:
        dst_script_path = format_full_path(args.temp_dir, args.destination,
                                           '.sushi' + script_extension)

    # selecting chapters
    if args.grouping and not ignore_chapters:
        if args.chapters_file:
            if get_extension(args.chapters_file) == '.xml':
                chapter_times = chapters.get_xml_start_times(
                    args.chapters_file)
            else:
                chapter_times = chapters.get_ogm_start_times(
                    args.chapters_file)
        elif not src_demuxer.is_wav:
            chapter_times = src_demuxer.chapters
            output_path = format_full_path(args.temp_dir, src_demuxer.path,
                                           ".sushi.chapters.txt")
            src_demuxer.set_chapters(output_path)
        else:
            chapter_times = []
    else:
        chapter_times = []

    # selecting keyframes and timecodes
    if args.src_keyframes:

        def select_keyframes(file_arg, demuxer):
            auto_file = format_full_path(args.temp_dir, demuxer.path,
                                         '.sushi.keyframes.txt')
            if file_arg in ('auto', 'make'):
                if file_arg == 'make' or not os.path.exists(auto_file):
                    if not demuxer.has_video:
                        raise SushiError(
                            "Cannot make keyframes for {0} because it doesn't have any video!"
                            .format(demuxer.path))
                    demuxer.set_keyframes(output_path=auto_file)
                return auto_file
            else:
                return file_arg

        def select_timecodes(external_file, fps_arg, demuxer):
            if external_file:
                return external_file
            elif fps_arg:
                return None
            elif demuxer.has_video:
                path = format_full_path(args.temp_dir, demuxer.path,
                                        '.sushi.timecodes.txt')
                demuxer.set_timecodes(output_path=path)
                return path
            else:
                raise SushiError(
                    'Fps, timecodes or video files must be provided if keyframes are used'
                )

        src_keyframes_file = select_keyframes(args.src_keyframes, src_demuxer)
        dst_keyframes_file = select_keyframes(args.dst_keyframes, dst_demuxer)
        src_timecodes_file = select_timecodes(args.src_timecodes, args.src_fps,
                                              src_demuxer)
        dst_timecodes_file = select_timecodes(args.dst_timecodes, args.dst_fps,
                                              dst_demuxer)

    # after this point nothing should fail so it's safe to start slow operations
    # like running the actual demuxing
    src_demuxer.demux()
    dst_demuxer.demux()

    try:
        if args.src_keyframes:
            src_timecodes = Timecodes.cfr(
                args.src_fps) if args.src_fps else Timecodes.from_file(
                    src_timecodes_file)
            src_keytimes = [
                src_timecodes.get_frame_time(f)
                for f in parse_keyframes(src_keyframes_file)
            ]

            dst_timecodes = Timecodes.cfr(
                args.dst_fps) if args.dst_fps else Timecodes.from_file(
                    dst_timecodes_file)
            dst_keytimes = [
                dst_timecodes.get_frame_time(f)
                for f in parse_keyframes(dst_keyframes_file)
            ]

        script = AssScript.from_file(
            src_script_path
        ) if script_extension == '.ass' else SrtScript.from_file(
            src_script_path)
        script.sort_by_time()

        src_stream = WavStream(src_audio_path,
                               sample_rate=args.sample_rate,
                               sample_type=args.sample_type)
        dst_stream = WavStream(dst_audio_path,
                               sample_rate=args.sample_rate,
                               sample_type=args.sample_type)

        calculate_shifts(
            src_stream,
            dst_stream,
            script.events,
            chapter_times=chapter_times,
            window=args.window,
            max_window=args.max_window,
            rewind_thresh=args.rewind_thresh if args.grouping else 0,
            max_ts_duration=args.max_ts_duration,
            max_ts_distance=args.max_ts_distance)

        events = script.events

        if write_plot:
            plt.plot([x.shift for x in events], label='From audio')

        if args.grouping:
            if not ignore_chapters and chapter_times:
                groups = groups_from_chapters(events, chapter_times)
                for g in groups:
                    fix_near_borders(g)
                    smooth_events([x for x in g if not x.linked],
                                  args.smooth_radius)
                groups = split_broken_groups(groups, args.min_group_size)
            else:
                fix_near_borders(events)
                smooth_events([x for x in events if not x.linked],
                              args.smooth_radius)
                groups = detect_groups(events, args.min_group_size)

            if write_plot:
                plt.plot([x.shift for x in events], label='Borders fixed')

            for g in groups:
                start_shift = g[0].shift
                end_shift = g[-1].shift
                avg_shift = average_shifts(g)
                logging.info(
                    u'Group (start: {0}, end: {1}, lines: {2}), '
                    u'shifts (start: {3}, end: {4}, average: {5})'.format(
                        format_time(g[0].start), format_time(g[-1].end),
                        len(g), start_shift, end_shift, avg_shift))

            if args.src_keyframes:
                for e in (x for x in events if x.linked):
                    e.resolve_link()
                for g in groups:
                    snap_groups_to_keyframes(
                        g, chapter_times, args.max_ts_duration,
                        args.max_ts_distance, src_keytimes, dst_keytimes,
                        src_timecodes, dst_timecodes, args.max_kf_distance,
                        args.kf_mode)

            if args.write_avs:
                write_shift_avs(dst_script_path + '.avs', groups,
                                src_audio_path, dst_audio_path)
        else:
            fix_near_borders(events)
            if write_plot:
                plt.plot([x.shift for x in events], label='Borders fixed')

            if args.src_keyframes:
                for e in (x for x in events if x.linked):
                    e.resolve_link()
                snap_groups_to_keyframes(events, chapter_times,
                                         args.max_ts_duration,
                                         args.max_ts_distance, src_keytimes,
                                         dst_keytimes, src_timecodes,
                                         dst_timecodes, args.max_kf_distance,
                                         args.kf_mode)

        for event in events:
            event.apply_shift()

        script.save_to_file(dst_script_path)

        if write_plot:
            plt.plot([
                x.shift + (x._start_shift + x._end_shift) / 2.0 for x in events
            ],
                     label='After correction')
            plt.legend(fontsize=5, frameon=False, fancybox=False)
            plt.savefig(args.plot_path, dpi=300)

    finally:
        if args.cleanup:
            src_demuxer.cleanup()
            dst_demuxer.cleanup()