Exemplo n.º 1
0
    def convert_missing_downloads(max_videos, dryrun=False, 
        missing_on_s3=False, language_channels=None):
        """Download from YouTube and use Zencoder to start converting any
        missing downloadable content into its appropriate downloadable format.
        """

        videos_converted = 0
        converted_formats = None

        if missing_on_s3:
            # With this option, videos that are missing in the S3 converted
            # bucket are converted. The API's download_urls is ignored.
            logger.info("Searching for videos that are missing from S3")
            formats_to_convert = s3.list_missing_converted_formats()
            legacy_mp4_videos = s3.list_legacy_mp4_videos()
        else:
            # With this option (the default), videos that are missing in the
            # API's download_urls are converted, if they do not already exist
            # on S3.  Videos that are missing from S3, but present in the API's
            # download_urls, are ignored.
            logger.info(
                "Searching for videos that are missing from API download_urls")
            formats_to_convert = api.list_missing_video_content()
            converted_formats = s3.list_converted_formats()

        if language_channels:
            if language_channels[0] == 'all':
                channel_ids_set = lang_utils.video_ids_set()
            else:
                channel_ids_set = lang_utils.video_ids_set(language_channels)
            # Use converted_formats if already downloaded above, otherwise get it now
            converted_formats = converted_formats or s3.list_converted_formats()
            for vid_id in channel_ids_set:
                if vid_id not in converted_formats:
                    formats_to_convert[vid_id] = DOWNLOADABLE_FORMATS

        for youtube_id, missing_formats in formats_to_convert.iteritems():
            if videos_converted >= max_videos:
                logger.info("Stopping: max videos reached")
                break

            if "_DUP_" in youtube_id:
                logger.info(
                    ("Skipping video {0} as it has invalid DUP in youtube ID"
                     .format(youtube_id)))
                continue

            if missing_on_s3:
                # We already know the formats are missing from S3.
                formats_to_create = missing_formats
                if (youtube_id in legacy_mp4_videos and
                        "mp4" in formats_to_create):
                    if dryrun:
                        logger.info(
                            "Skipping copy of legacy content due to dryrun")
                    else:
                        s3.copy_legacy_content_to_new_location(youtube_id)
                    formats_to_create.remove("mp4")
            else:
                # Don't recreate any formats that are already waiting on s3
                # but are, for any reason, not known by the API.
                already_converted_still_unpublished = (
                    converted_formats[youtube_id] & missing_formats)
                if len(already_converted_still_unpublished) > 0:
                    logger.info(
                        "Video %s missing formats %s in API but they are "
                        "already converted; use publish step" %
                        (youtube_id,
                        ",".join(already_converted_still_unpublished)))
                formats_to_create = (
                    missing_formats - already_converted_still_unpublished)

            if len(formats_to_create) == 0:
                continue

            logger.info("Starting conversion of %s into formats %s" %
                        (youtube_id, ",".join(formats_to_create)))

            if dryrun:
                logger.info(
                    "Skipping downloading and sending job to zencoder due to "
                    "dryrun")
            else:
                s3_source_url = s3.get_or_create_unconverted_source_url(
                    youtube_id)
                assert(s3_source_url)

                zencode.start_converting(
                    youtube_id, s3_source_url, formats_to_create)

            videos_converted += 1
Exemplo n.º 2
0
    def publish_converted_videos(max_videos, dryrun=False, use_archive=True):

        logger.info(
            "Searching for downloadable content that needs to be "
            "published")

        publish_attempts = 0
        converted_formats = s3.list_converted_formats()

        for youtube_id, missing_formats in (api.list_missing_video_content()
                .iteritems()):
            if publish_attempts >= max_videos:
                logger.info("Stopping: max videos reached")
                break

            converted_missing_formats = (
                converted_formats[youtube_id] & missing_formats)

            unconverted_formats = missing_formats - converted_missing_formats
            if len(unconverted_formats) > 0:
                logger.info(
                    "Video %s missing formats %s which are still "
                    "unconverted, can't be published" %
                    (youtube_id, ",".join(unconverted_formats)))

            # If no converted content waiting, just continue to next video
            if len(converted_missing_formats) == 0:
                continue

            if dryrun:
                logger.info(
                    "Skipping publish for video {0} formats {1} due to dryrun"
                    .format(youtube_id, ", ".join(converted_missing_formats)))
            else:
                if use_archive:
                    if s3.upload_converted_to_archive(
                            youtube_id, converted_missing_formats):
                        logger.info("Successfully uploaded to archive.org")
                    else:
                        logger.error(
                            "Unable to upload video {0} to archive.org"
                            .format(youtube_id))
                        continue
                else:
                    logger.info("Skipping upload to archive.org; assuming API "
                                "points directly to S3 instead.")

                current_format_downloads = (api.video_metadata(youtube_id)[
                    "download_urls"] or {})
                current_formats = set(current_format_downloads.keys())
                new_formats = current_formats | converted_missing_formats

                if "mp4" in new_formats:
                    # PNG thumbnails are generated as part of the MP4
                    # conversion process.  If mp4 has been uploaded to
                    # archive.org, png is guaranteed to be there as well.
                    new_formats.add("png")

                if api.update_download_available(youtube_id, new_formats):
                    logger.info(
                        "Updated KA download_available, set to {0} for video "
                        "{1}".format(", ".join(new_formats), youtube_id))
                else:
                    logger.error(
                        "Unable to update KA download_available to {0} for "
                        "youtube id {1}".format(", ".join(new_formats),
                                                youtube_id))

                publish_attempts += 1