Python YouTubeResource.get_resource_info示例，pressurecooker.youtube.YouTubeResource.get_resource_info Python示例

示例#1

0

显示文件

    def _get_youtube_info(self, use_proxy=True, use_cache=True, options=None):
        youtube_info = None
        # 1. Try to get from cache if allowed:
        if os.path.exists(self.cache_path) and use_cache:
            LOGGER.info("==> [%s] Retrieving cached information...", self.__str__())
            youtube_info = json.load(open(self.cache_path))
        # 2. Fetch info from youtube_dl
        if not youtube_info:
            LOGGER.info("==> [%s] Requesting info from youtube...", self.__str__())
            os.makedirs(self.cache_dir, exist_ok=True)
            try:
                youtube_resource = YouTubeResource(self.url, useproxy=use_proxy)
            except youtube_dl.utils.ExtractorError as e:
                if "unavailable" in str(e):
                    LOGGER.error("==> [%s] Resource unavailable for URL: %s", self.__str__, self.url)
                    return None

            if youtube_resource:
                try:
                    # Save YouTube info to JSON cache file
                    youtube_info = youtube_resource.get_resource_info(options)
                    if youtube_info:
                        json.dump(youtube_info,
                                  open(self.cache_path, 'w'),
                                  indent=4,
                                  ensure_ascii=False,
                                  sort_keys=True)
                    else:
                        LOGGER.error("==> [%s] Failed to extract YouTube info", self.__str__())
                except Exception as e:
                    LOGGER.error("==> [%s] Failed to get YouTube info: %s", self.__str__(), e)
                    return None
        return youtube_info

示例#2

0

显示文件

文件： utils.py 项目： learningequality/sushi-chef-refugee-response-crisis-advice

    def download_info(self, use_cache=True):
        """
        Download video info to json file
        """
        match = YOUTUBE_ID_REGEX.match(self.url)
        if not match:
            LOGGER.error('==> URL ' + self.url +
                         ' does not match YOUTUBE_ID_REGEX')
            return False
        youtube_id = match.group('youtube_id')
        if not os.path.isdir(YOUTUBE_CACHE_DIR):
            os.mkdir(YOUTUBE_CACHE_DIR)
        vinfo_json_path = os.path.join(YOUTUBE_CACHE_DIR, youtube_id + '.json')
        # First try to get from cache:
        vinfo = None
        if os.path.exists(vinfo_json_path) and use_cache:
            vinfo = json.load(open(vinfo_json_path))
            LOGGER.info("Retrieving cached video information...")
        # else get using youtube_dl:
        if not vinfo:
            LOGGER.info("Downloading %s from youtube...", self.url)
            try:
                video = YouTubeResource(self.url)
            except youtube_dl.utils.ExtractorError as e:
                if "unavailable" in str(e):
                    LOGGER.error("Video not found at URL: %s", self.url)
                    return False

            if video:
                try:
                    vinfo = video.get_resource_info()
                    json.dump(vinfo,
                              open(vinfo_json_path, 'w'),
                              indent=4,
                              ensure_ascii=False,
                              sort_keys=True)
                    return True
                except Exception as e:
                    LOGGER.error("Failed to get video info: %s", e)
                    return False

        else:
            return False

        self.uid = vinfo[
            'id']  # video must have id because required to set youtube_id later
        self.title = vinfo.get('title', '')
        self.description = vinfo.get('description', '')
        if not vinfo['license']:
            self.license = "Licensed not available"
        elif "Creative Commons" in vinfo['license']:
            self.license_common = True
        else:
            self.license = vinfo['license']
        return True

示例#3

0

显示文件

文件： arvind.py 项目： learningequality/sushi-chef-arvind-gupta-toys

    def download_info(self):

        match = YOUTUBE_ID_REGEX.match(self.url)
        if not match:
            print('==> URL ' + self.url + ' does not match YOUTUBE_ID_REGEX')
            return False
        youtube_id = match.group('youtube_id')
        if not os.path.isdir(YOUTUBE_CACHE_DIR):
            os.mkdir(YOUTUBE_CACHE_DIR)
        vinfo_json_path = os.path.join(YOUTUBE_CACHE_DIR, youtube_id+'.json')
        # First try to get from cache:
        vinfo = None
        if os.path.exists(vinfo_json_path):
            vinfo = json.load(open(vinfo_json_path))
            if not vinfo:
                # the json data for "Video unavailable" is `null` so can skip them
                return False
            print("Using cached video info for youtube_id", youtube_id)

        # else get using YouTubeResource
        if not vinfo:
            print("Downloading {} from youtube...".format(self.url))
            try:
                video = YouTubeResource(self.url)
            except youtube_dl.utils.ExtractorError as e:
                if "unavailable" in str(e):
                    print("Video not found at URL: {}".format(self.url))
                    return False

            if video:
                try:
                    vinfo = video.get_resource_info()
                    # Save the remaining "temporary scraped values" of attributes with actual values
                    # from the video metadata.
                    json.dump(vinfo, open(vinfo_json_path, 'w'), indent=4, ensure_ascii=False, sort_keys=True)
                except Exception as e:
                    print(e)
                    return False

            else:
                return False

        self.uid = vinfo['id']  # video must have id because required to set youtube_id later
        self.title = vinfo.get('title', '')
        self.description = vinfo.get('description', '')
        if not vinfo['license']:
            self.license = "Licensed not available"
        elif "Creative Commons" in vinfo['license']:
            self.license_common = True
        else:
            self.license = vinfo['license']

        return True

示例#4

0

显示文件

文件： utils.py 项目： learningequality/sushi-chef-refugee-response-crisis-advice

    def get_playlist_info(self):
        """
        Get playlist info from either local json cache or URL
        """
        if not os.path.isdir(YOUTUBE_CACHE_DIR):
            os.mkdir(YOUTUBE_CACHE_DIR)

        playlist_info = None
        if os.path.exists(self.playlist_info_json_path) and self.use_cache:
            LOGGER.info(
                "[Playlist %s] Retrieving cached playlist information...",
                self.playlist_id)
            playlist_info = json.load(open(self.playlist_info_json_path))

        if not playlist_info:
            playlist_url = YOUTUBE_PLAYLIST_URL_FORMAT.format(self.playlist_id)
            playlist_resource = YouTubeResource(playlist_url)

            if playlist_resource:
                try:
                    playlist_info = playlist_resource.get_resource_info(
                        dict(ignoreerrors=True, skip_download=True))

                    # Traverse through the video list to remove duplicates
                    video_set = set()
                    videos = playlist_info.get('children')
                    for video in videos:
                        if video['id'] in video_set:
                            videos.remove(video)
                        else:
                            video_set.add(video['id'])

                    json.dump(playlist_info,
                              open(self.playlist_info_json_path, 'w'),
                              indent=4,
                              ensure_ascii=False,
                              sort_keys=False)
                    LOGGER.info("[Playlist %s] Successfully get playlist info",
                                self.playlist_id)
                    return playlist_info
                except Exception as e:
                    LOGGER.error(
                        "[Playlist %s] Failed to get playlist info: %s",
                        self.playlist_id, e)
                    return None
        return playlist_info

示例#5

0

显示文件

def download_from_web(web_url,
                      download_settings,
                      file_format=file_formats.MP4,
                      ext="",
                      download_ext=""):
    """
    Download `web_url` using YoutubeDL using `download_settings` options.
    Args:
        download_settings (dict): options to pass onto YoutubeDL
        file_format (str): one of "mp4" or "vtt"
        ext (str): extensions to use as part of `outtmpl` given to YoutubeDL
        download_ext (str): extensions to append to `outtmpl` after downloading
    This is function operates differently when downloadin videos and substitles.
    For videos we set the `outtmpl` to the actual filename that will be downloaded,
    and the function must be called with ext = ".mp4" and download_ext="".
    For subtitles we set the `outtmpl` to extension-less string, and YoutubeDL
    automatically appends the language code and vtt extension, so the function
    must be called with ext="" and download_ext=".{youtube_lang}.vtt"
    :return: filename derived from hash of file contents {md5hash(file)}.ext
    """
    key = generate_key("DOWNLOADED", web_url, settings=download_settings)
    cache_file = get_cache_filename(key)
    if cache_file:
        return cache_file

    # Get hash of web_url to act as temporary storage name
    url_hash = hashlib.md5()
    url_hash.update(web_url.encode('utf-8'))
    tempfilename = "{}{ext}".format(url_hash.hexdigest(), ext=ext)
    outtmpl_path = os.path.join(tempfile.gettempdir(), tempfilename)
    download_settings["outtmpl"] = outtmpl_path
    destination_path = outtmpl_path + download_ext  # file dest. after download

    # Delete files in case previously downloaeded
    if os.path.exists(outtmpl_path):
        os.remove(outtmpl_path)
    if os.path.exists(destination_path):
        os.remove(destination_path)

    # Download the web_url which can be either a video or subtitles
    if not config.USEPROXY:
        # Connect to YouTube directly
        with youtube_dl.YoutubeDL(download_settings) as ydl:
            ydl.download([web_url])
            if not os.path.exists(destination_path):
                raise youtube_dl.utils.DownloadError('Failed to download ' +
                                                     web_url)
    else:
        # Connect to YouTube via an HTTP proxy
        yt_resource = YouTubeResource(web_url,
                                      useproxy=True,
                                      options=download_settings)
        result1 = yt_resource.get_resource_info()
        if result1 is None:
            raise youtube_dl.utils.DownloadError('Failed to get resource info')
        download_settings[
            "writethumbnail"] = False  # overwrite default behaviour
        if file_format == file_formats.VTT:
            # We need to use the proxy when downloading subtitles
            result2 = yt_resource.download(options=download_settings,
                                           useproxy=True)
        else:
            # For video files we can skip the proxy for faster download speed
            result2 = yt_resource.download(options=download_settings)
        if result2 is None or not os.path.exists(destination_path):
            raise youtube_dl.utils.DownloadError(
                'Failed to download resource ' + web_url)

    # Write file to local storage
    filename = "{}.{}".format(get_hash(destination_path), file_format)
    with open(destination_path,
              "rb") as dlf, open(config.get_storage_path(filename),
                                 'wb') as destf:
        shutil.copyfileobj(dlf, destf)

    FILECACHE.set(key, bytes(filename, "utf-8"))
    return filename