Пример #1
0
def get_real_download_url(url, preferred_fmt_ids=None):
    if not preferred_fmt_ids:
        preferred_fmt_ids, _, _ = formats_dict[22]  # MP4 720p

    vid = get_youtube_id(url)
    if vid is not None:
        page = None
        url = 'http://www.youtube.com/get_video_info?&el=detailpage&video_id=' + vid

        while page is None:
            req = util.http_request(url, method='GET')
            if 'location' in req.msg:
                url = req.msg['location']
            else:
                page = req.read()

        page = page.decode()

        # Try to find the best video format available for this video
        # (http://forum.videohelp.com/topic336882-1800.html#1912972)

        def find_urls(page):
            r4 = re.search('url_encoded_fmt_stream_map=([^&]+)', page)
            if r4 is not None:
                fmt_url_map = urllib.parse.unquote(r4.group(1))
                for fmt_url_encoded in fmt_url_map.split(','):
                    video_info = parse_qs(fmt_url_encoded)
                    yield int(video_info['itag'][0]), video_info['url'][0]
            else:
                error_info = parse_qs(page)
                error_message = util.remove_html_tags(error_info['reason'][0])
                raise YouTubeError('Cannot download video: %s' % error_message)

        fmt_id_url_map = sorted(find_urls(page), reverse=True)

        if not fmt_id_url_map:
            raise YouTubeError('fmt_url_map not found for video ID "%s"' % vid)

        # Default to the highest fmt_id if we don't find a match below
        _, url = fmt_id_url_map[0]

        formats_available = set(fmt_id for fmt_id, url in fmt_id_url_map)
        fmt_id_url_map = dict(fmt_id_url_map)

        for id in preferred_fmt_ids:
            id = int(id)
            if id in formats_available:
                format = formats_dict.get(id)
                if format is not None:
                    _, _, description = format
                else:
                    description = 'Unknown'

                logger.info('Found YouTube format: %s (fmt_id=%d)',
                            description, id)
                url = fmt_id_url_map[id]
                break

    return url
Пример #2
0
def get_real_download_url(url, preferred_fmt_id=18):
    vid = get_youtube_id(url)
    if vid is not None:
        page = None
        url = 'http://www.youtube.com/watch?v=' + vid

        while page is None:
            req = util.http_request(url, method='GET')
            if 'location' in req.msg:
                url = req.msg['location']
            else:
                page = req.read()

        # Try to find the best video format available for this video
        # (http://forum.videohelp.com/topic336882-1800.html#1912972)
        r3 = re.compile('.*"fmt_map"\:\s+"([^"]+)".*').search(page)
        if r3:
            formats_available = urllib.unquote(r3.group(1)).split(',')
        else:
            formats_available = []

        if gpodder.ui.diablo:
            # Hardcode fmt_id 5 for Maemo (for performance reasons) - we could
            # also use 13 and 17 here, but the quality is very low then. There
            # seems to also be a 6, but I could not find a video with that yet.
            fmt_id = 5
        elif gpodder.ui.fremantle:
            # This provides good quality video, seems to be always available
            # and is playable fluently in Media Player
            fmt_id = 18
        else:
            # As a fallback, use fmt_id 18 (seems to be always available)
            fmt_id = 18

            # This will be set to True if the search below has already "seen"
            # our preferred format, but has not yet found a suitable available
            # format for the given video.
            seen_preferred = False

            for id, wanted, description in supported_formats:
                # If we see our preferred format, accept formats below
                if id == preferred_fmt_id:
                    seen_preferred = True

                # If the format is available and preferred (or lower),
                # use the given format for our fmt_id
                if wanted in formats_available and seen_preferred:
                    log('Found available YouTube format: %s (fmt_id=%d)', \
                            description, id)
                    fmt_id = id
                    break

        r2 = re.compile('.*"t"\:\s+"([^"]+)".*').search(page)
        if r2:
            next = 'http://www.youtube.com/get_video?video_id=' + vid + '&t=' + r2.group(1) + '&fmt=%d' % fmt_id
            log('YouTube link resolved: %s => %s', url, next)
            return next

    return url
Пример #3
0
def youtube_resolve_download_url(episode, config):
    url = episode.url
    preferred_fmt_ids = get_fmt_ids(config.plugins.youtube)

    if not preferred_fmt_ids:
        preferred_fmt_ids, _, _ = formats_dict[22]  # MP4 720p

    vid = get_youtube_id(url)
    if vid is None:
        return None

    page = None
    url = 'http://www.youtube.com/get_video_info?&el=detailpage&video_id=' + vid

    while page is None:
        req = util.http_request(url, method='GET')
        if 'location' in req.msg:
            url = req.msg['location']
        else:
            page = req.read().decode('utf-8')

    # Try to find the best video format available for this video
    # (http://forum.videohelp.com/topic336882-1800.html#1912972)
    def find_urls(page):
        r4 = re.search('.*&url_encoded_fmt_stream_map=([^&]+)&.*', page)
        if r4 is not None:
            fmt_url_map = urllib.parse.unquote(r4.group(1))
            for fmt_url_encoded in fmt_url_map.split(','):
                video_info = urllib.parse.parse_qs(fmt_url_encoded)
                yield (int(video_info['itag'][0]), video_info['url'][0])
        else:
            error_info = urllib.parse.parse_qs(page)
            error_message = util.remove_html_tags(error_info['reason'][0])
            raise YouTubeError('Cannot download video: %s' % error_message)

    fmt_id_url_map = sorted(find_urls(page), reverse=True)

    if not fmt_id_url_map:
        raise YouTubeError('fmt_url_map not found for video ID "%s"' % vid)

    # Default to the highest fmt_id if we don't find a match below
    _, url = fmt_id_url_map[0]

    formats_available = set(fmt_id for fmt_id, url in fmt_id_url_map)
    fmt_id_url_map = dict(fmt_id_url_map)

    for id in preferred_fmt_ids:
        id = int(id)
        if id in formats_available:
            format = formats_dict.get(id)
            if format is not None:
                _, _, description = format
            else:
                description = 'Unknown'

            logger.info('Found YouTube format: %s (fmt_id=%d)', description, id)
            return fmt_id_url_map[id]
Пример #4
0
def get_real_download_url(url, preferred_fmt_ids=None):
    if not preferred_fmt_ids:
        preferred_fmt_ids, _, _ = formats_dict[22] # MP4 720p

    vid = get_youtube_id(url)
    if vid is not None:
        page = None
        url = 'http://www.youtube.com/get_video_info?&el=detailpage&video_id=' + vid

        while page is None:
            req = util.http_request(url, method='GET')
            if 'location' in req.msg:
                url = req.msg['location']
            else:
                page = req.read()

        # Try to find the best video format available for this video
        # (http://forum.videohelp.com/topic336882-1800.html#1912972)
        def find_urls(page):
            r4 = re.search('url_encoded_fmt_stream_map=([^&]+)', page)
            if r4 is not None:
                fmt_url_map = urllib.unquote(r4.group(1))
                for fmt_url_encoded in fmt_url_map.split(','):
                    video_info = parse_qs(fmt_url_encoded)
                    yield int(video_info['itag'][0]), video_info['url'][0]
            else:
                error_info = parse_qs(page)
                error_message = util.remove_html_tags(error_info['reason'][0])
                raise YouTubeError('Cannot download video: %s' % error_message)

        fmt_id_url_map = sorted(find_urls(page), reverse=True)

        if not fmt_id_url_map:
            raise YouTubeError('fmt_url_map not found for video ID "%s"' % vid)

        # Default to the highest fmt_id if we don't find a match below
        _, url  = fmt_id_url_map[0]

        formats_available = set(fmt_id for fmt_id, url in fmt_id_url_map)
        fmt_id_url_map = dict(fmt_id_url_map)

        # This provides good quality video, seems to be always available
        # and is playable fluently in Media Player
        if gpodder.ui.harmattan:
            preferred_fmt_ids = [18]

        for id in preferred_fmt_ids:
            id = int(id)
            if id in formats_available:
                format = formats_dict.get(id)
                if format is not None:
                    _, _, description = format
                else:
                    description = 'Unknown'

                logger.info('Found YouTube format: %s (fmt_id=%d)',
                        description, id)
                url = fmt_id_url_map[id]
                break

    return url
Пример #5
0
def get_real_download_url(url, preferred_fmt_id=18):
    vid = get_youtube_id(url)
    if vid is not None:
        page = None
        url = 'http://www.youtube.com/watch?v=' + vid

        while page is None:
            req = util.http_request(url, method='GET')
            if 'location' in req.msg:
                url = req.msg['location']
            else:
                page = req.read()

        # Try to find the best video format available for this video
        # (http://forum.videohelp.com/topic336882-1800.html#1912972)
        def find_urls(page):
            r4 = re.search('.*"url_encoded_fmt_stream_map"\:\s+"([^"]+)".*', page)
            if r4 is not None:
                fmt_url_map = r4.group(1)
                for fmt_url_encoded in fmt_url_map.split(','):
                    video_info = dict(map(urllib.unquote, x.split('=', 1))
                            for x in fmt_url_encoded.split('\\u0026'))

                    yield int(video_info['itag']), video_info['url']

        fmt_id_url_map = sorted(find_urls(page), reverse=True)
        # Default to the highest fmt_id if we don't find a match below
        if fmt_id_url_map:
            default_fmt_id, default_url = fmt_id_url_map[0]
        else:
            raise YouTubeError('fmt_url_map not found for video ID "%s"' % vid)

        formats_available = set(fmt_id for fmt_id, url in fmt_id_url_map)
        fmt_id_url_map = dict(fmt_id_url_map)

        if gpodder.ui.diablo:
            # Hardcode fmt_id 5 for Maemo (for performance reasons) - we could
            # also use 13 and 17 here, but the quality is very low then. There
            # seems to also be a 6, but I could not find a video with that yet.
            fmt_id = 5
        elif gpodder.ui.fremantle:
            # This provides good quality video, seems to be always available
            # and is playable fluently in Media Player
            if preferred_fmt_id == 5:
                fmt_id = 5
            else:
                fmt_id = 18
        else:
            # As a fallback, use fmt_id 18 (seems to be always available)
            fmt_id = 18

            # This will be set to True if the search below has already "seen"
            # our preferred format, but has not yet found a suitable available
            # format for the given video.
            seen_preferred = False

            for id, wanted, description in supported_formats:
                # If we see our preferred format, accept formats below
                if id == preferred_fmt_id:
                    seen_preferred = True

                # If the format is available and preferred (or lower),
                # use the given format for our fmt_id
                if id in formats_available and seen_preferred:
                    log('Found available YouTube format: %s (fmt_id=%d)', \
                            description, id)
                    fmt_id = id
                    break

        url = fmt_id_url_map.get(fmt_id, None)
        if url is None:
            url = default_url

    return url
Пример #6
0
        def find_urls(page):
            # streamingData is preferable to url_encoded_fmt_stream_map
            # streamingData.formats are the same as url_encoded_fmt_stream_map
            # streamingData.adaptiveFormats are audio-only and video-only formats
            x = parse_qs(page)
            error_message = None

            if 'reason' in x:
                error_message = util.remove_html_tags(x['reason'][0])
            elif 'player_response' in x:
                player_response = json.loads(x['player_response'][0])
                playabilityStatus = player_response['playabilityStatus']

                if 'reason' in playabilityStatus:
                    error_message = util.remove_html_tags(
                        playabilityStatus['reason'])
                elif 'liveStreamability' in playabilityStatus \
                        and not playabilityStatus['liveStreamability'].get('liveStreamabilityRenderer', {}).get('displayEndscreen', False):
                    # playabilityStatus.liveStreamability -- video is or was a live stream
                    # playabilityStatus.liveStreamability.liveStreamabilityRenderer.displayEndscreen -- video has ended if present

                    if allow_partial and 'streamingData' in player_response and 'hlsManifestUrl' in player_response[
                            'streamingData']:
                        manifest = None
                        url = player_response['streamingData'][
                            'hlsManifestUrl']
                        while manifest is None:
                            req = util.http_request(url, method='GET')
                            if 'location' in req.msg:
                                url = req.msg['location']
                            else:
                                manifest = req.read()
                        manifest = manifest.decode().splitlines()

                        urls = [line for line in manifest if line[0] != '#']
                        itag_re = re.compile(r'/itag/([0-9]+)/')
                        for url in urls:
                            itag = itag_re.search(url).group(1)
                            yield int(itag), [url, None]
                        return

                    error_message = 'live stream'
                elif 'streamingData' in player_response:
                    # DRM videos store url inside a cipher key - not supported
                    if 'formats' in player_response['streamingData']:
                        for f in player_response['streamingData']['formats']:
                            if 'url' in f:
                                yield int(f['itag']), [
                                    f['url'],
                                    f.get('approxDurationMs')
                                ]
                    if 'adaptiveFormats' in player_response['streamingData']:
                        for f in player_response['streamingData'][
                                'adaptiveFormats']:
                            if 'url' in f:
                                yield int(f['itag']), [
                                    f['url'],
                                    f.get('approxDurationMs')
                                ]
                    return

            if error_message is not None:
                raise YouTubeError('Cannot download video: %s' % error_message)

            r4 = re.search(r'url_encoded_fmt_stream_map=([^&]+)', page)
            if r4 is not None:
                fmt_url_map = urllib.parse.unquote(r4.group(1))
                for fmt_url_encoded in fmt_url_map.split(','):
                    video_info = parse_qs(fmt_url_encoded)
                    yield int(
                        video_info['itag'][0]), [video_info['url'][0], None]
Пример #7
0
def get_real_download_url(url, allow_partial, preferred_fmt_ids=None):
    if not preferred_fmt_ids:
        preferred_fmt_ids, _, _ = formats_dict[22]  # MP4 720p

    duration = None

    vid = get_youtube_id(url)
    if vid is not None:
        page = None
        url = 'https://www.youtube.com/get_video_info?&el=detailpage&video_id=' + vid

        while page is None:
            req = util.http_request(url, method='GET')
            if 'location' in req.msg:
                url = req.msg['location']
            else:
                page = req.read()

        page = page.decode()

        # Try to find the best video format available for this video
        # (http://forum.videohelp.com/topic336882-1800.html#1912972)

        def find_urls(page):
            # streamingData is preferable to url_encoded_fmt_stream_map
            # streamingData.formats are the same as url_encoded_fmt_stream_map
            # streamingData.adaptiveFormats are audio-only and video-only formats
            x = parse_qs(page)
            error_message = None

            if 'reason' in x:
                error_message = util.remove_html_tags(x['reason'][0])
            elif 'player_response' in x:
                player_response = json.loads(x['player_response'][0])
                playabilityStatus = player_response['playabilityStatus']

                if 'reason' in playabilityStatus:
                    error_message = util.remove_html_tags(
                        playabilityStatus['reason'])
                elif 'liveStreamability' in playabilityStatus \
                        and not playabilityStatus['liveStreamability'].get('liveStreamabilityRenderer', {}).get('displayEndscreen', False):
                    # playabilityStatus.liveStreamability -- video is or was a live stream
                    # playabilityStatus.liveStreamability.liveStreamabilityRenderer.displayEndscreen -- video has ended if present

                    if allow_partial and 'streamingData' in player_response and 'hlsManifestUrl' in player_response[
                            'streamingData']:
                        manifest = None
                        url = player_response['streamingData'][
                            'hlsManifestUrl']
                        while manifest is None:
                            req = util.http_request(url, method='GET')
                            if 'location' in req.msg:
                                url = req.msg['location']
                            else:
                                manifest = req.read()
                        manifest = manifest.decode().splitlines()

                        urls = [line for line in manifest if line[0] != '#']
                        itag_re = re.compile(r'/itag/([0-9]+)/')
                        for url in urls:
                            itag = itag_re.search(url).group(1)
                            yield int(itag), [url, None]
                        return

                    error_message = 'live stream'
                elif 'streamingData' in player_response:
                    # DRM videos store url inside a cipher key - not supported
                    if 'formats' in player_response['streamingData']:
                        for f in player_response['streamingData']['formats']:
                            if 'url' in f:
                                yield int(f['itag']), [
                                    f['url'],
                                    f.get('approxDurationMs')
                                ]
                    if 'adaptiveFormats' in player_response['streamingData']:
                        for f in player_response['streamingData'][
                                'adaptiveFormats']:
                            if 'url' in f:
                                yield int(f['itag']), [
                                    f['url'],
                                    f.get('approxDurationMs')
                                ]
                    return

            if error_message is not None:
                raise YouTubeError('Cannot download video: %s' % error_message)

            r4 = re.search(r'url_encoded_fmt_stream_map=([^&]+)', page)
            if r4 is not None:
                fmt_url_map = urllib.parse.unquote(r4.group(1))
                for fmt_url_encoded in fmt_url_map.split(','):
                    video_info = parse_qs(fmt_url_encoded)
                    yield int(
                        video_info['itag'][0]), [video_info['url'][0], None]

        fmt_id_url_map = sorted(find_urls(page), reverse=True)

        if not fmt_id_url_map:
            drm = re.search(r'%22(cipher|signatureCipher)%22%3A', page)
            if drm is not None:
                raise YouTubeError(
                    'Unsupported DRM content found for video ID "%s"' % vid)
            raise YouTubeError('No formats found for video ID "%s"' % vid)

        formats_available = set(fmt_id for fmt_id, url in fmt_id_url_map)
        fmt_id_url_map = dict(fmt_id_url_map)

        for id in preferred_fmt_ids:
            if re.search(r'\+', str(id)):
                # skip formats that contain a + (136+140)
                continue
            id = int(id)
            if id in formats_available:
                format = formats_dict.get(id) or hls_formats_dict.get(id)
                if format is not None:
                    _, _, description = format
                else:
                    description = 'Unknown'

                logger.info('Found YouTube format: %s (fmt_id=%d)',
                            description, id)
                url, duration = fmt_id_url_map[id]
                break
        else:
            raise YouTubeError('No preferred formats found for video ID "%s"' %
                               vid)

    return url, duration
Пример #8
0
def get_real_download_url(url, preferred_fmt_id=None):
    # Default fmt_id when none preferred
    if preferred_fmt_id is None:
        preferred_fmt_id = 18

    vid = get_youtube_id(url)
    if vid is not None:
        page = None
        url = 'http://www.youtube.com/get_video_info?&el=detailpage&video_id=' + vid

        while page is None:
            req = util.http_request(url, method='GET')
            if 'location' in req.msg:
                url = req.msg['location']
            else:
                page = req.read()

        # Try to find the best video format available for this video
        # (http://forum.videohelp.com/topic336882-1800.html#1912972)
        def find_urls(page):
            r4 = re.search('.*&url_encoded_fmt_stream_map=([^&]+)&.*', page)
            if r4 is not None:
                fmt_url_map = urllib.unquote(r4.group(1))
                for fmt_url_encoded in fmt_url_map.split(','):
                    video_info = parse_qs(fmt_url_encoded)
                    yield int(video_info['itag'][0]), video_info['url'][0] + "&signature=" + video_info['sig'][0]
            else:
                error_info = parse_qs(page)
                error_message = util.remove_html_tags(error_info['reason'][0])
                raise YouTubeError('Cannot download video: %s' % error_message)

        fmt_id_url_map = sorted(find_urls(page), reverse=True)
        # Default to the highest fmt_id if we don't find a match below
        if fmt_id_url_map:
            default_fmt_id, default_url = fmt_id_url_map[0]
        else:
            raise YouTubeError('fmt_url_map not found for video ID "%s"' % vid)

        formats_available = set(fmt_id for fmt_id, url in fmt_id_url_map)
        fmt_id_url_map = dict(fmt_id_url_map)

        if gpodder.ui.harmattan:
            # This provides good quality video, seems to be always available
            # and is playable fluently in Media Player
            if preferred_fmt_id == 5:
                fmt_id = 5
            else:
                fmt_id = 18
        else:
            # As a fallback, use fmt_id 18 (seems to be always available)
            fmt_id = 18

            # This will be set to True if the search below has already "seen"
            # our preferred format, but has not yet found a suitable available
            # format for the given video.
            seen_preferred = False

            for id, wanted, description in supported_formats:
                # If we see our preferred format, accept formats below
                if id == preferred_fmt_id:
                    seen_preferred = True

                # If the format is available and preferred (or lower),
                # use the given format for our fmt_id
                if id in formats_available and seen_preferred:
                    logger.info('Found YouTube format: %s (fmt_id=%d)',
                            description, id)
                    fmt_id = id
                    break

        url = fmt_id_url_map.get(fmt_id, None)
        if url is None:
            url = default_url

    return url