예제 #1
0
    def test_dfxp2srt(self):
        dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?>
            <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
            <body>
                <div xml:lang="en">
                    <p begin="0" end="1">The following line contains Chinese characters and special symbols</p>
                    <p begin="1" end="2">第二行<br/>♪♪</p>
                    <p begin="2" end="3"><span>Third<br/>Line</span></p>
                </div>
            </body>
            </tt>'''
        srt_data = '''1
00:00:00,000 --> 00:00:01,000
The following line contains Chinese characters and special symbols

2
00:00:01,000 --> 00:00:02,000
第二行
♪♪

3
00:00:02,000 --> 00:00:03,000
Third
Line

'''
        self.assertEqual(dfxp2srt(dfxp_data), srt_data)
예제 #2
0
    def test_dfxp2srt(self):
        dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?>
            <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
            <body>
                <div xml:lang="en">
                    <p begin="0" end="1">The following line contains Chinese characters and special symbols</p>
                    <p begin="1" end="2">第二行<br/>♪♪</p>
                    <p begin="2" dur="1"><span>Third<br/>Line</span></p>
                    <p begin="3" end="-1">Lines with invalid timestamps are ignored</p>
                    <p begin="-1" end="-1">Ignore, two</p>
                    <p begin="3" dur="-1">Ignored, three</p>
                </div>
            </body>
            </tt>'''
        srt_data = '''1
00:00:00,000 --> 00:00:01,000
The following line contains Chinese characters and special symbols

2
00:00:01,000 --> 00:00:02,000
第二行
♪♪

3
00:00:02,000 --> 00:00:03,000
Third
Line

'''
        self.assertEqual(dfxp2srt(dfxp_data), srt_data)

        dfxp_data_no_default_namespace = '''<?xml version="1.0" encoding="UTF-8"?>
            <tt xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
            <body>
                <div xml:lang="en">
                    <p begin="0" end="1">The first line</p>
                </div>
            </body>
            </tt>'''
        srt_data = '''1
00:00:00,000 --> 00:00:01,000
The first line

'''
        self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
예제 #3
0
    def test_dfxp2srt(self):
        dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?>
            <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
            <body>
                <div xml:lang="en">
                    <p begin="0" end="1">The following line contains Chinese characters and special symbols</p>
                    <p begin="1" end="2">第二行<br/>♪♪</p>
                    <p begin="2" dur="1"><span>Third<br/>Line</span></p>
                    <p begin="3" end="-1">Lines with invalid timestamps are ignored</p>
                    <p begin="-1" end="-1">Ignore, two</p>
                    <p begin="3" dur="-1">Ignored, three</p>
                </div>
            </body>
            </tt>'''
        srt_data = '''1
00:00:00,000 --> 00:00:01,000
The following line contains Chinese characters and special symbols

2
00:00:01,000 --> 00:00:02,000
第二行
♪♪

3
00:00:02,000 --> 00:00:03,000
Third
Line

'''
        self.assertEqual(dfxp2srt(dfxp_data), srt_data)

        dfxp_data_no_default_namespace = '''<?xml version="1.0" encoding="UTF-8"?>
            <tt xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
            <body>
                <div xml:lang="en">
                    <p begin="0" end="1">The first line</p>
                </div>
            </body>
            </tt>'''
        srt_data = '''1
00:00:00,000 --> 00:00:01,000
The first line

'''
        self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
예제 #4
0
    def run(self, info):
        subs = info.get('requested_subtitles')
        filename = info['filepath']
        new_ext = self.format
        new_format = new_ext
        if new_format == 'vtt':
            new_format = 'webvtt'
        if subs is None:
            self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert')
            return [], info
        self._downloader.to_screen('[ffmpeg] Converting subtitles')
        sub_filenames = []
        for lang, sub in subs.items():
            ext = sub['ext']
            if ext == new_ext:
                self._downloader.to_screen(
                    '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext)
                continue
            old_file = subtitles_filename(filename, lang, ext)
            sub_filenames.append(old_file)
            new_file = subtitles_filename(filename, lang, new_ext)

            if ext in ('dfxp', 'ttml', 'tt'):
                self._downloader.report_warning(
                    'You have requested to convert dfxp (TTML) subtitles into another format, '
                    'which results in style information loss')

                dfxp_file = old_file
                srt_file = subtitles_filename(filename, lang, 'srt')

                with open(dfxp_file, 'rb') as f:
                    srt_data = dfxp2srt(f.read())

                with io.open(srt_file, 'wt', encoding='utf-8') as f:
                    f.write(srt_data)
                old_file = srt_file

                subs[lang] = {
                    'ext': 'srt',
                    'data': srt_data
                }

                if new_ext == 'srt':
                    continue
                else:
                    sub_filenames.append(srt_file)

            self.run_ffmpeg(old_file, new_file, ['-f', new_format])

            with io.open(new_file, 'rt', encoding='utf-8') as f:
                subs[lang] = {
                    'ext': new_ext,
                    'data': f.read(),
                }

        return sub_filenames, info
예제 #5
0
    def __init__(self, handler):
        self._handler = handler
        # See ydl_opts['forcejson']
        self._on_info_dict_json = None
        self._allow_authentication_request = True
        self._skip_authentication = False
        self._skipped_count = 0
        self.ydl_opts = {
            'logger': self,
            'logtostderr': True,
            'no_color': True,
            'progress_hooks': [self._on_progress],
            'fixup': 'detect_or_warn',
            'ignoreerrors': True,  # handled via logger error callback
            'retries': 10,
            'fragment_retries': 10,
            'writesubtitles': True,
            'allsubtitles': True,
            'subtitlesformat': 'vtt/best',
            'keepvideo': True,
            'postprocessors': [
                {'key': 'FFmpegMetadata'},
                {'key': 'FFmpegSubtitlesConvertor', 'format': 'vtt'},
                {'key': 'FFmpegEmbedSubtitle'},
                {'key': 'XAttrMetadata'}]}
        url = self._handler.get_url()
        download_dir = os.path.abspath(self._handler.get_download_dir())
        with tempfile.TemporaryDirectory() as temp_dir:
            self.ydl_opts['cookiefile'] = os.path.join(temp_dir, 'cookies')
            # Collect info without downloading videos
            testplaylist_dir = os.path.join(temp_dir, 'testplaylist')
            noplaylist_dir = os.path.join(temp_dir, 'noplaylist')
            fullplaylist_dir = os.path.join(temp_dir, 'fullplaylist')
            for path in [testplaylist_dir, noplaylist_dir, fullplaylist_dir]:
                os.mkdir(path)
            self.ydl_opts['writeinfojson'] = True
            self.ydl_opts['writethumbnail'] = True
            self.ydl_opts['skip_download'] = True
            self.ydl_opts['playlistend'] = 2
            self.ydl_opts['outtmpl'] = '%(autonumber)s.%(ext)s'
            # Test playlist
            info_testplaylist, skipped_testplaylist = self._load_playlist(
                testplaylist_dir, url)
            self.ydl_opts['noplaylist'] = True
            if len(info_testplaylist) + skipped_testplaylist > 1:
                info_noplaylist, skipped_noplaylist = self._load_playlist(
                    noplaylist_dir, url)
            else:
                info_noplaylist = info_testplaylist
                skipped_noplaylist = skipped_testplaylist
            del self.ydl_opts['noplaylist']
            del self.ydl_opts['playlistend']
            if (len(info_testplaylist) + skipped_testplaylist >
                    len(info_noplaylist) + skipped_noplaylist):
                self.ydl_opts['noplaylist'] = (
                    not self._handler.on_playlist_request())
                if not self.ydl_opts['noplaylist']:
                    info_playlist, _ = self._load_playlist(
                        fullplaylist_dir, url)
                else:
                    info_playlist = info_noplaylist
            elif len(info_testplaylist) + skipped_testplaylist > 1:
                info_playlist, _ = self._load_playlist(fullplaylist_dir, url)
            else:
                info_playlist = info_testplaylist
            # Download videos
            self._allow_authentication_request = False
            del self.ydl_opts['writeinfojson']
            del self.ydl_opts['writethumbnail']
            del self.ydl_opts['skip_download']
            # Include id and format_id in outtmpl to prevent youtube-dl
            # from continuing wrong file
            self.ydl_opts['outtmpl'] = '%(id)s.%(format_id)s.%(ext)s'
            # Output info_dict as JSON handled via logger debug callback
            self.ydl_opts['forcejson'] = True
            mode = self._handler.get_mode()
            if mode == 'audio':
                resolution = MAX_RESOLUTION
                prefer_mpeg = False
                self.ydl_opts['format'] = 'bestaudio/best'
                self.ydl_opts['postprocessors'].insert(0, {
                    'key': 'FFmpegExtractAudio',
                    'preferredcodec': 'mp3',
                    'preferredquality': '192'})
                self.ydl_opts['postprocessors'].insert(1, {
                    'key': 'EmbedThumbnail',
                    'already_have_thumbnail': True})
            else:
                resolution = self._handler.get_resolution()
                prefer_mpeg = self._handler.get_prefer_mpeg()
            try:
                os.makedirs(download_dir, exist_ok=True)
            except OSError as e:
                traceback.print_exc(file=sys.stderr)
                sys.stderr.flush()
                self._handler.on_error(
                    'ERROR: Failed to create download folder: %s' % e)
                sys.exit(1)
            for i, (info_path, thumbnail_paths, subtitles) in enumerate(
                    info_playlist):
                with open(info_path) as f:
                    info = json.load(f)
                title = info.get('title') or info.get('id') or 'video'
                output_title = self._get_output_title(title)
                # Test subtitles
                # youtube-dl fails for subtitles that it can't convert or
                # are unsupported by ffmpeg
                supported_subtitles = []
                for sub_path, sub_lang, sub_ext in subtitles:
                    print('[youtube_dl_slave] Testing subtitle (%r, %r)' %
                          (sub_lang, sub_ext), file=sys.stderr, flush=True)
                    if sub_ext in ['dfxp', 'ttml', 'tt']:
                        # Try to use youtube-dl's internal dfxp2srt converter
                        with open(sub_path, 'rb') as f:
                            sub_data = f.read()
                        try:
                            sub_data = dfxp2srt(sub_data)
                        except Exception:
                            traceback.print_exc(file=sys.stderr)
                            sys.stderr.flush()
                            continue
                        ff_sub_path = sub_path + '-converted.srt'
                        with open(ff_sub_path, 'w', encoding='utf-8') as f:
                            f.write(sub_data)
                    else:
                        ff_sub_path = sub_path
                    # Try to read and convert subtitles with ffmpeg
                    try:
                        subprocess.run(
                            [FFMPEG_EXE, '-i', os.path.abspath(ff_sub_path),
                             '-f', 'webvtt', '-'],
                            check=True, stdin=subprocess.DEVNULL,
                            stdout=subprocess.DEVNULL)
                    except FileNotFoundError:
                        traceback.print_exc(file=sys.stderr)
                        sys.stderr.flush()
                        self._handler.on_error(
                            'ERROR: %r not found' % FFMPEG_EXE)
                        sys.exit(1)
                    except subprocess.CalledProcessError:
                        traceback.print_exc(file=sys.stderr)
                        sys.stderr.flush()
                        continue
                    supported_subtitles.append((sub_lang, sub_ext))
                # Choose supported subtitles
                new_info_subtitles = {}
                for sub_lang, subs in (info.get('subtitles') or {}).items():
                    new_subs = []
                    for sub in subs or []:
                        if (sub_lang, sub.get('ext')) in supported_subtitles:
                            new_subs.append(sub)
                    if new_subs:
                        new_info_subtitles[sub_lang] = new_subs
                info['subtitles'] = new_info_subtitles
                thumbnail_path = thumbnail_paths[0] if thumbnail_paths else ''
                if thumbnail_path:
                    # Convert thumbnail to JPEG and limit resolution
                    print('[youtube_dl_slave] Converting thumbnail',
                          file=sys.stderr, flush=True)
                    new_thumbnail_path = thumbnail_path + '-converted.jpg'
                    try:
                        subprocess.run(
                            [FFMPEG_EXE, '-i', os.path.abspath(thumbnail_path),
                             '-vf', ('scale=\'min({0},iw):min({0},ih):'
                                     'force_original_aspect_ratio=decrease\''
                                     ).format(MAX_THUMBNAIL_RESOLUTION),
                             os.path.abspath(new_thumbnail_path)],
                            check=True, stdin=subprocess.DEVNULL,
                            stdout=subprocess.DEVNULL)
                    except FileNotFoundError:
                        traceback.print_exc(file=sys.stderr)
                        sys.stderr.flush()
                        self._handler.on_error(
                            'ERROR: %r not found' % FFMPEG_EXE)
                        sys.exit(1)
                    except subprocess.CalledProcessError:
                        traceback.print_exc(file=sys.stderr)
                        sys.stderr.flush()
                        new_thumbnail_path = ''
                    # No longer needed
                    os.remove(thumbnail_path)
                    thumbnail_path = new_thumbnail_path
                self._handler.on_progress_start(i, len(info_playlist), title,
                                                thumbnail_path)
                for thumbnail in info.get('thumbnails') or []:
                    thumbnail['filename'] = thumbnail_path
                # Remove description, because long comments cause problems when
                # displayed in Nautilus and other applications.
                with contextlib.suppress(KeyError):
                    del info['description']
                sort_formats(info.get('formats') or [], resolution,
                             prefer_mpeg)
                with open(info_path, 'w') as f:
                    json.dump(info, f)
                # Check if we already got the file
                existing_filename = self._find_existing_download(
                    download_dir, output_title, mode)
                if existing_filename is not None:
                    self._handler.on_progress_end(existing_filename)
                    continue
                # Download into separate directory because youtube-dl generates
                # many temporary files
                temp_download_dir = os.path.join(
                    download_dir, output_title + '.part')
                # Lock download directory to prevent other processes from
                # writing to the same files
                temp_download_dir_cm = contextlib.ExitStack()
                try:
                    temp_download_dir_cm.enter_context(
                        self._create_and_lock_dir(temp_download_dir))
                except OSError as e:
                    traceback.print_exc(file=sys.stderr)
                    sys.stderr.flush()
                    self._handler.on_error(
                        'ERROR: Failed to lock download folder: %s' % e)
                    sys.exit(1)
                with temp_download_dir_cm:
                    # Check if the file got downloaded in the meantime
                    existing_filename = self._find_existing_download(
                        download_dir, output_title, mode)
                    if existing_filename is not None:
                        filename = existing_filename
                    else:
                        info_dict = None

                        # See ydl_opts['forcejson']
                        def on_info_dict_json(info_dict_):
                            nonlocal info_dict
                            info_dict = info_dict_
                        self._on_info_dict_json = on_info_dict_json
                        self._load_video(temp_download_dir, info_path)
                        if self._on_info_dict_json:
                            raise RuntimeError('info_dict not received')
                        # Find the temporary filename
                        temp_filename_root, temp_filename_ext = (
                            os.path.splitext(info_dict['_filename']))
                        if mode == 'audio':
                            temp_filename_ext = '.mp3'
                        else:
                            # youtube-dl changes extension for incompatible
                            # formats to .mkv
                            for ext in [temp_filename_ext, '.mkv']:
                                if os.path.exists(temp_filename_root + ext):
                                    temp_filename_ext = ext
                                    break
                        temp_filename = temp_filename_root + temp_filename_ext
                        filename = output_title + temp_filename_ext
                        # Move finished download from download to target dir
                        try:
                            os.replace(
                                os.path.join(temp_download_dir, temp_filename),
                                os.path.join(download_dir, filename))
                        except OSError as e:
                            traceback.print_exc(file=sys.stderr)
                            sys.stderr.flush()
                            self._handler.on_error((
                                'ERROR: Falied to move finished download to '
                                'download folder: %s') % e)
                            sys.exit(1)
                    # Delete download directory
                    with contextlib.suppress(OSError):
                        shutil.rmtree(temp_download_dir)
                self._handler.on_progress_end(filename)