def main(): args = get_arguments() print( "Will encode videos using settings from the following " "configuration groups: %s" % ", ".join(args.config_groups)) print "Fetching video YouTube IDs under topic %s" % args.topic_slug youtube_ids = get_youtube_ids(args.topic_slug) for youtube_id in youtube_ids: # We use the -converted bucket as the source bucket (rather than # -unconverted) because files in the latter get transferred to Glacier # storage, which can't be accessed immediately. source_url = "gcs://ka-youtube-converted/%s.mp4/%s.mp4" % (youtube_id, youtube_id) print "Converting YouTube video %s on Zencoder (source url: %s)" % ( youtube_id, source_url) if not args.dry_run: zencode.start_converting(youtube_id, source_url, args.config_groups, base_url=args.base_url) print print "See %s running jobs at https://app.zencoder.com/jobs" % len( youtube_ids)
def main(): args = get_arguments() print ("Will encode videos using settings from the following " "configuration groups: %s" % ", ".join(args.config_groups)) print "Fetching video YouTube IDs under topic %s" % args.topic_slug youtube_ids = get_youtube_ids(args.topic_slug) for youtube_id in youtube_ids: # We use the -converted bucket as the source bucket (rather than # -unconverted) because files in the latter get transferred to Glacier # storage, which can't be accessed immediately. source_url = "s3://KA-youtube-converted/%s.mp4/%s.mp4" % ( youtube_id, youtube_id) print "Converting YouTube video %s on Zencoder (source url: %s)" % ( youtube_id, source_url) if not args.dry_run: zencode.start_converting( youtube_id, source_url, args.config_groups, base_url=args.base_url) print print "See %s running jobs at https://app.zencoder.com/jobs" % len( youtube_ids)
def convert_missing_downloads(max_videos, dryrun=False): """Download from YouTube and use Zencoder to start converting any missing downloadable content into its appropriate downloadable format. """ videos_converted = 0 error_ids = [] # With this option, videos that are missing in the gcs converted # bucket are converted. The API's download_urls is ignored. logger.info("Searching for videos that are missing from gcs") formats_to_convert = gcs.list_missing_converted_formats() legacy_mp4_videos = gcs.list_legacy_mp4_videos() for youtube_id, missing_formats in formats_to_convert.iteritems(): if videos_converted >= max_videos: logger.info("Stopping: max videos reached") break if "_DUP_" in youtube_id: logger.info( ("Skipping video {0} as it has invalid DUP in youtube ID". format(youtube_id))) continue # We already know the formats are missing from gcs. formats_to_create = missing_formats if (youtube_id in legacy_mp4_videos and "mp4" in formats_to_create): if dryrun: logger.info( "Skipping copy of legacy content due to dryrun") else: gcs.copy_legacy_content_to_new_location(youtube_id) formats_to_create.remove("mp4") if len(formats_to_create) == 0: continue logger.info("Starting conversion of %s into formats %s" % (youtube_id, ",".join(formats_to_create))) if dryrun: logger.info( "Skipping downloading and sending job to zencoder due to " "dryrun") videos_converted += 1 else: gcs_source_url = gcs.get_or_create_unconverted_source_url( youtube_id) if not gcs_source_url: logger.warning( "No gcs source URL created for %s; skipping" % youtube_id) error_ids.append(youtube_id) continue try: zencode.start_converting(youtube_id, gcs_source_url, formats_to_create) videos_converted += 1 except Exception, why: logger.error('Skipping youtube_id "%s": %s' % (youtube_id, why)) error_ids.append(youtube_id)
def convert_missing_downloads(max_videos, dryrun=False): """Download from YouTube and use Zencoder to start converting any missing downloadable content into its appropriate downloadable format. """ videos_converted = 0 error_ids = [] # With this option, videos that are missing in the gcs converted # bucket are converted. The API's download_urls is ignored. logger.info("Searching for videos that are missing from gcs") formats_to_convert = gcs.list_missing_converted_formats() legacy_mp4_videos = gcs.list_legacy_mp4_videos() for youtube_id, missing_formats in formats_to_convert.iteritems(): if videos_converted >= max_videos: logger.info("Stopping: max videos reached") break if "_DUP_" in youtube_id: logger.info( ("Skipping video {0} as it has invalid DUP in youtube ID" .format(youtube_id))) continue # We already know the formats are missing from gcs. formats_to_create = missing_formats if (youtube_id in legacy_mp4_videos and "mp4" in formats_to_create): if dryrun: logger.info( "Skipping copy of legacy content due to dryrun") else: gcs.copy_legacy_content_to_new_location(youtube_id) formats_to_create.remove("mp4") if len(formats_to_create) == 0: continue logger.info("Starting conversion of %s into formats %s" % (youtube_id, ",".join(formats_to_create))) if dryrun: logger.info( "Skipping downloading and sending job to zencoder due to " "dryrun") videos_converted += 1 else: gcs_source_url = gcs.get_or_create_unconverted_source_url( youtube_id) if not gcs_source_url: logger.warning("No gcs source URL created for %s; skipping" % youtube_id) error_ids.append(youtube_id) continue try: zencode.start_converting(youtube_id, gcs_source_url, formats_to_create) videos_converted += 1 except Exception, why: logger.error('Skipping youtube_id "%s": %s' % (youtube_id, why)) error_ids.append(youtube_id)
def convert_missing_downloads(max_videos, dryrun=False, missing_on_s3=False, language_channels=None): """Download from YouTube and use Zencoder to start converting any missing downloadable content into its appropriate downloadable format. """ videos_converted = 0 converted_formats = None if missing_on_s3: # With this option, videos that are missing in the S3 converted # bucket are converted. The API's download_urls is ignored. logger.info("Searching for videos that are missing from S3") formats_to_convert = s3.list_missing_converted_formats() legacy_mp4_videos = s3.list_legacy_mp4_videos() else: # With this option (the default), videos that are missing in the # API's download_urls are converted, if they do not already exist # on S3. Videos that are missing from S3, but present in the API's # download_urls, are ignored. logger.info( "Searching for videos that are missing from API download_urls") formats_to_convert = api.list_missing_video_content() converted_formats = s3.list_converted_formats() if language_channels: if language_channels[0] == 'all': channel_ids_set = lang_utils.video_ids_set() else: channel_ids_set = lang_utils.video_ids_set(language_channels) # Use converted_formats if already downloaded above, otherwise get it now converted_formats = converted_formats or s3.list_converted_formats() for vid_id in channel_ids_set: if vid_id not in converted_formats: formats_to_convert[vid_id] = DOWNLOADABLE_FORMATS for youtube_id, missing_formats in formats_to_convert.iteritems(): if videos_converted >= max_videos: logger.info("Stopping: max videos reached") break if "_DUP_" in youtube_id: logger.info( ("Skipping video {0} as it has invalid DUP in youtube ID" .format(youtube_id))) continue if missing_on_s3: # We already know the formats are missing from S3. formats_to_create = missing_formats if (youtube_id in legacy_mp4_videos and "mp4" in formats_to_create): if dryrun: logger.info( "Skipping copy of legacy content due to dryrun") else: s3.copy_legacy_content_to_new_location(youtube_id) formats_to_create.remove("mp4") else: # Don't recreate any formats that are already waiting on s3 # but are, for any reason, not known by the API. already_converted_still_unpublished = ( converted_formats[youtube_id] & missing_formats) if len(already_converted_still_unpublished) > 0: logger.info( "Video %s missing formats %s in API but they are " "already converted; use publish step" % (youtube_id, ",".join(already_converted_still_unpublished))) formats_to_create = ( missing_formats - already_converted_still_unpublished) if len(formats_to_create) == 0: continue logger.info("Starting conversion of %s into formats %s" % (youtube_id, ",".join(formats_to_create))) if dryrun: logger.info( "Skipping downloading and sending job to zencoder due to " "dryrun") else: s3_source_url = s3.get_or_create_unconverted_source_url( youtube_id) assert(s3_source_url) zencode.start_converting( youtube_id, s3_source_url, formats_to_create) videos_converted += 1