示例#1
0
    def test_template(self):
        ie = yt_dlp.extractor.get_info_extractor(test_case['name'])()
        other_ies = [
            get_info_extractor(ie_key)()
            for ie_key in test_case.get('add_ie', [])
        ]
        is_playlist = any(k.startswith('playlist') for k in test_case)
        test_cases = test_case.get('playlist',
                                   [] if is_playlist else [test_case])

        def print_skipping(reason):
            print('Skipping %s: %s' % (test_case['name'], reason))

        if not ie.working():
            print_skipping('IE marked as not _WORKING')
            return

        for tc in test_cases:
            info_dict = tc.get('info_dict', {})
            if not (info_dict.get('id') and info_dict.get('ext')):
                raise Exception(
                    'Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?'
                )

        if 'skip' in test_case:
            print_skipping(test_case['skip'])
            return
        for other_ie in other_ies:
            if not other_ie.working():
                print_skipping('test depends on %sIE, marked as not WORKING' %
                               other_ie.ie_key())
                return

        params = get_params(test_case.get('params', {}))
        params['outtmpl'] = tname + '_' + params['outtmpl']
        if is_playlist and 'playlist' not in test_case:
            params.setdefault('extract_flat', 'in_playlist')
            params.setdefault('playlistend',
                              test_case.get('playlist_mincount'))
            params.setdefault('skip_download', True)

        ydl = YoutubeDL(params, auto_init=False)
        ydl.add_default_info_extractors()
        finished_hook_called = set()

        def _hook(status):
            if status['status'] == 'finished':
                finished_hook_called.add(status['filename'])

        ydl.add_progress_hook(_hook)
        expect_warnings(ydl, test_case.get('expected_warnings', []))

        def get_tc_filename(tc):
            return ydl.prepare_filename(tc.get('info_dict', {}))

        res_dict = None

        def try_rm_tcs_files(tcs=None):
            if tcs is None:
                tcs = test_cases
            for tc in tcs:
                tc_filename = get_tc_filename(tc)
                try_rm(tc_filename)
                try_rm(tc_filename + '.part')
                try_rm(os.path.splitext(tc_filename)[0] + '.info.json')

        try_rm_tcs_files()
        try:
            try_num = 1
            while True:
                try:
                    # We're not using .download here since that is just a shim
                    # for outside error handling, and returns the exit code
                    # instead of the result dict.
                    res_dict = ydl.extract_info(
                        test_case['url'],
                        force_generic_extractor=params.get(
                            'force_generic_extractor', False))
                except (DownloadError, ExtractorError) as err:
                    # Check if the exception is not a network related one
                    if not err.exc_info[0] in (
                            compat_urllib_error.URLError, socket.timeout,
                            UnavailableVideoError,
                            compat_http_client.BadStatusLine) or (
                                err.exc_info[0] == compat_HTTPError
                                and err.exc_info[1].code == 503):
                        raise

                    if try_num == RETRIES:
                        report_warning(
                            '%s failed due to network errors, skipping...' %
                            tname)
                        return

                    print(
                        'Retrying: {0} failed tries\n\n##########\n\n'.format(
                            try_num))

                    try_num += 1
                else:
                    break

            if is_playlist:
                self.assertTrue(
                    res_dict['_type'] in ['playlist', 'multi_video'])
                self.assertTrue('entries' in res_dict)
                expect_info_dict(self, res_dict,
                                 test_case.get('info_dict', {}))

            if 'playlist_mincount' in test_case:
                assertGreaterEqual(
                    self, len(res_dict['entries']),
                    test_case['playlist_mincount'],
                    'Expected at least %d in playlist %s, but got only %d' %
                    (test_case['playlist_mincount'], test_case['url'],
                     len(res_dict['entries'])))
            if 'playlist_count' in test_case:
                self.assertEqual(
                    len(res_dict['entries']), test_case['playlist_count'],
                    'Expected %d entries in playlist %s, but got %d.' % (
                        test_case['playlist_count'],
                        test_case['url'],
                        len(res_dict['entries']),
                    ))
            if 'playlist_duration_sum' in test_case:
                got_duration = sum(e['duration'] for e in res_dict['entries'])
                self.assertEqual(test_case['playlist_duration_sum'],
                                 got_duration)

            # Generalize both playlists and single videos to unified format for
            # simplicity
            if 'entries' not in res_dict:
                res_dict['entries'] = [res_dict]

            for tc_num, tc in enumerate(test_cases):
                tc_res_dict = res_dict['entries'][tc_num]
                # First, check test cases' data against extracted data alone
                expect_info_dict(self, tc_res_dict, tc.get('info_dict', {}))
                # Now, check downloaded file consistency
                tc_filename = get_tc_filename(tc)
                if not test_case.get('params', {}).get('skip_download', False):
                    self.assertTrue(os.path.exists(tc_filename),
                                    msg='Missing file ' + tc_filename)
                    self.assertTrue(tc_filename in finished_hook_called)
                    expected_minsize = tc.get('file_minsize', 10000)
                    if expected_minsize is not None:
                        if params.get('test'):
                            expected_minsize = max(expected_minsize, 10000)
                        got_fsize = os.path.getsize(tc_filename)
                        assertGreaterEqual(
                            self, got_fsize, expected_minsize,
                            'Expected %s to be at least %s, but it\'s only %s '
                            % (tc_filename, format_bytes(expected_minsize),
                               format_bytes(got_fsize)))
                    if 'md5' in tc:
                        md5_for_file = _file_md5(tc_filename)
                        self.assertEqual(tc['md5'], md5_for_file)
                # Finally, check test cases' data again but this time against
                # extracted data from info JSON file written during processing
                info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
                self.assertTrue(os.path.exists(info_json_fn),
                                'Missing info file %s' % info_json_fn)
                with io.open(info_json_fn, encoding='utf-8') as infof:
                    info_dict = json.load(infof)
                expect_info_dict(self, info_dict, tc.get('info_dict', {}))
        finally:
            try_rm_tcs_files()
            if is_playlist and res_dict is not None and res_dict.get(
                    'entries'):
                # Remove all other files that may have been extracted if the
                # extractor returns full results even with extract_flat
                res_tcs = [{'info_dict': e} for e in res_dict['entries']]
                try_rm_tcs_files(res_tcs)
    def urlretrieve(self, url: str, filename: str, context: ssl.SSLContext, reporthook=None, cookies_path=None):
        """
        original source:
        https://github.com/python/cpython/blob/
        21bee0bd71e1ad270274499f9f58194ebb52e236/Lib/urllib/request.py#L229

        Because urlopen also supports context,
        I decided to adapt the download function.
        """
        start = time.time()
        url_parsed = urlparse.urlparse(url)

        request = urllib.request.Request(url=url, headers=RequestHelper.stdHeader)
        if cookies_path is not None:
            cookie_jar = MozillaCookieJar(cookies_path)
            if os.path.isfile(cookies_path):
                cookie_jar.load(ignore_discard=True, ignore_expires=True)
                cookie_jar.add_cookie_header(request)

        with contextlib.closing(urllib.request.urlopen(request, context=context, timeout=60)) as fp:
            headers = fp.info()

            # Just return the local path and the 'headers' for file://
            # URLs. No sense in performing a copy unless requested.
            if url_parsed.scheme == 'file' and not filename:
                return os.path.normpath(url_parsed.path), headers

            if not filename:
                raise RuntimeError('No filename specified!')

            tfp = open(filename, 'wb')

            with tfp:
                result = filename, headers

                # read overall
                read = 0

                # 4kb at once
                bs = 1024 * 8
                blocknum = 0

                # guess size
                size = int(headers.get('Content-Length', -1))

                if reporthook:
                    reporthook(blocknum, bs, size)

                while True:
                    try:
                        block = fp.read(bs)
                    except (socket.timeout, socket.error) as error:
                        raise ConnectionError("Connection error: %s" % str(error)) from None

                    if not block:
                        break
                    read += len(block)
                    tfp.write(block)
                    blocknum += 1
                    if reporthook:
                        reporthook(blocknum, bs, size)

        if size >= 0 and read < size:
            raise ContentTooShortError('retrieval incomplete: got only %i out of %i bytes' % (read, size), result)

        end = time.time()
        logging.debug(
            'T%s - Download of %s finished in %s', self.thread_id, format_bytes(read), self.format_seconds(end - start)
        )

        return result
 def format_speed(speed):
     if speed is None:
         return '%10s' % '---b/s'
     return '%10s' % ('%s/s' % format_bytes(speed))
def format_size(bytes):
    return '%s (%d bytes)' % (format_bytes(bytes), bytes)
    def _get_status_message(self) -> str:
        """
        Creates a string that combines the status messages of all threads.
        The current download progress of a file is displayed in percent
        per Thread.
        A total display is also created, showing the total amount downloaded
        in relation to what still needs to be downloaded.
        @return: A status message string
        """

        # to limit the output to one line
        limits = shutil.get_terminal_size()

        # Starting with a carriage return to overwrite the last message
        progressmessage = f'\033[{len(self.threads)}A\r'

        threads_status_message = ''
        threads_total_downloaded = 0
        for thread in self.threads:

            i = thread.thread_id
            # A thread status contains it id and the progress
            # of the current file
            thread_percentage = self.thread_report[i]['percentage']
            thread_current_url = self.thread_report[i]['current_url']
            if self.thread_report[i]['external_dl'] is not None:
                thread_current_url = 'ExtDL: ' + self.thread_report[i]['external_dl']

            if not thread.is_alive():
                thread_percentage = 100
                thread_current_url = 'Finished!'

            if len(thread_current_url) + 13 > limits.columns:
                thread_current_url = thread_current_url[0 : limits.columns - 15] + '..'

            threads_status_message += '\033[KT%2i: %3i%% - %s\n' % (i, thread_percentage, thread_current_url)

            threads_total_downloaded += self.thread_report[i]['total']

            extra_totalsize = self.thread_report[i]['extra_totalsize']
            if extra_totalsize is not None and extra_totalsize != -1:
                self.total_to_download += extra_totalsize
                self.thread_report[i]['extra_totalsize'] = -1

        progressmessage += threads_status_message

        percentage = 100
        if self.total_to_download != 0:
            percentage = int(threads_total_downloaded * 100 / self.total_to_download)

        # The overall progress also includes the total size that needs to be
        # downloaded and the size that has already been downloaded.
        progressmessage_line = 'Total: %3s%% %12s/%12s' % (
            percentage,
            format_bytes(threads_total_downloaded),
            format_bytes(self.total_to_download),
        )

        progressmessage_line += ' | Files: %5s/%5s' % (len(self.report['success']), self.total_files)

        diff_to_last_status = threads_total_downloaded - self.last_threads_total_downloaded

        speed = self.calc_speed(self.last_status_timestamp, time.time(), diff_to_last_status)
        progressmessage_line += ' | ' + self.format_speed(speed)

        if len(progressmessage_line) > limits.columns:
            progressmessage_line = progressmessage_line[0 : limits.columns]
        progressmessage_line = '\033[K' + progressmessage_line

        progressmessage += progressmessage_line

        self.last_status_timestamp = time.time()
        self.last_threads_total_downloaded = threads_total_downloaded

        return progressmessage