def handle(self, *args, **options): caching_enabled = (settings.CACHE_TIME != 0) touched_video_ids = [] # delete VideoFile objects that are not marked as in progress, but are neither 0% nor 100% done; they're broken video_files_to_delete = VideoFile.objects.filter(download_in_progress=False, percent_complete__gt=0, percent_complete__lt=100) youtube_ids_to_delete = [d["youtube_id"] for d in video_files_to_delete.values("youtube_id")] video_files_to_delete.delete() touched_video_ids += [i18n.get_video_id(yid) for yid in youtube_ids_to_delete] if len(video_files_to_delete): self.stdout.write("Deleted %d VideoFile models (to mark them as not downloaded, since they were in a bad state)\n" % len(video_files_to_delete)) files = glob.glob(settings.CONTENT_ROOT + "*.mp4") videos_marked_at_all = set([video.youtube_id for video in VideoFile.objects.all()]) videos_marked_as_in_progress = set([video.youtube_id for video in VideoFile.objects.filter(download_in_progress=True)]) videos_marked_as_unstarted = set([video.youtube_id for video in VideoFile.objects.filter(percent_complete=0, download_in_progress=False)]) videos_flagged_for_download = set([video.youtube_id for video in VideoFile.objects.filter(flagged_for_download=True)]) videos_in_filesystem = set([path.replace("\\", "/").split("/")[-1].split(".")[0] for path in files]) # Files that exist, but are not in the DB, should be assumed to be good videos, # and just needing to be added to the DB. Add them to the DB in this way, # so that these files also trigger the update code below (and trigger cache invalidation) video_ids_needing_model_creation = list(videos_in_filesystem - videos_marked_at_all) count = len(video_ids_needing_model_creation) if count: # OK to do bulk_create; cache invalidation triggered via save download VideoFile.objects.bulk_create([VideoFile(youtube_id=id, percent_complete=0, download_in_progress=False) for id in video_ids_needing_model_creation]) self.stdout.write("Created %d VideoFile models (to mark them as complete, since the files exist)\n" % len(video_ids_needing_model_creation)) touched_video_ids += [i18n.get_video_id(yid) or yid for yid in video_ids_needing_model_creation] # Files that exist, are in the DB, but have percent_complete=0, download_in_progress=False # These should be individually saved to be 100% complete, to trigger their availability (and cache invalidation) count = 0 for chunk in break_into_chunks(videos_in_filesystem): video_files_needing_model_update = VideoFile.objects.filter(percent_complete=0, download_in_progress=False, youtube_id__in=chunk) count += video_files_needing_model_update.count() for videofile in video_files_needing_model_update: videofile.percent_complete = 100 videofile.flagged_for_download = False videofile.save() if count: self.stdout.write("Updated %d VideoFile models (to mark them as complete, since the files exist)\n" % count) # VideoFile objects say they're available, but that don't actually exist. count = 0 videos_needing_model_deletion_chunked = break_into_chunks(videos_marked_at_all - videos_in_filesystem - videos_flagged_for_download) for chunk in videos_needing_model_deletion_chunked: video_files_needing_model_deletion = VideoFile.objects.filter(youtube_id__in=chunk) count += video_files_needing_model_deletion.count() video_files_needing_model_deletion.delete() touched_video_ids += [i18n.get_video_id(yid) or yid for yid in chunk] if count: self.stdout.write("Deleted %d VideoFile models (because the videos didn't exist in the filesystem)\n" % count) if options["auto_cache"] and caching_enabled and touched_video_ids: caching.regenerate_all_pages_related_to_videos(video_ids=list(set(touched_video_ids)))
def start_video_download(request): youtube_ids = OrderedSet( simplejson.loads(request.raw_post_data or "{}").get("youtube_ids", [])) video_files_to_create = [ id for id in youtube_ids if not get_object_or_None(VideoFile, youtube_id=id) ] video_files_to_update = youtube_ids - OrderedSet(video_files_to_create) VideoFile.objects.bulk_create([ VideoFile(youtube_id=id, flagged_for_download=True) for id in video_files_to_create ]) for chunk in break_into_chunks(youtube_ids): video_files_needing_model_update = VideoFile.objects.filter( download_in_progress=False, youtube_id__in=chunk).exclude(percent_complete=100) video_files_needing_model_update.update(percent_complete=0, cancel_download=False, flagged_for_download=True) force_job("videodownload", "Download Videos") return JsonResponse({})
def start_subtitle_download(request): update_set = simplejson.loads(request.raw_post_data or "{}").get("update_set", "existing") language = simplejson.loads(request.raw_post_data or "{}").get("language", "") # Set subtitle language Settings.set("subtitle_language", language) # Get the json file with all srts request_url = "http://%s/static/data/subtitles/languages/%s_available_srts.json" % (settings.CENTRAL_SERVER_HOST, language) try: r = requests.get(request_url) r.raise_for_status() # will return none if 200, otherwise will raise HTTP error available_srts = set((r.json)["srt_files"]) except ConnectionError: return JsonResponse({"error": "The central server is currently offline."}, status=500) except HTTPError: return JsonResponse({"error": "No subtitles available on central server for language code: %s; aborting." % language}, status=500) if update_set == "existing": videofiles = VideoFile.objects.filter(subtitles_downloaded=False, subtitle_download_in_progress=False) else: videofiles = VideoFile.objects.filter(subtitle_download_in_progress=False) queue_count = 0 for chunk in break_into_chunks(available_srts): queue_count += videofiles.filter(youtube_id__in=chunk).update(flagged_for_subtitle_download=True, subtitles_downloaded=False) if queue_count == 0: return JsonResponse({"info": "There aren't any subtitles available in this language for your currently downloaded videos."}, status=200) force_job("subtitledownload", "Download Subtitles") return JsonResponse({})
def start_video_download(request): """ API endpoint for launching the videodownload job. """ youtube_ids = OrderedSet( simplejson.loads(request.raw_post_data or "{}").get("youtube_ids", [])) # One query per video (slow) video_files_to_create = [ id for id in youtube_ids if not get_object_or_None(VideoFile, youtube_id=id) ] # OK to do bulk_create; cache invalidation triggered via save download for lang_code, lang_youtube_ids in divide_videos_by_language( video_files_to_create).iteritems(): VideoFile.objects.bulk_create([ VideoFile(youtube_id=id, flagged_for_download=True, language=lang_code) for id in lang_youtube_ids ]) # OK to update all, since we're not setting all props above. # One query per chunk for chunk in break_into_chunks(youtube_ids): video_files_needing_model_update = VideoFile.objects.filter( download_in_progress=False, youtube_id__in=chunk).exclude(percent_complete=100) video_files_needing_model_update.update(percent_complete=0, cancel_download=False, flagged_for_download=True) force_job("videodownload", _("Download Videos"), locale=request.language) return JsonResponse({})
def start_video_download(request): """ API endpoint for launching the videodownload job. """ youtube_ids = OrderedSet( simplejson.loads(request.raw_post_data or "{}").get("youtube_ids", [])) # One query per video (slow) video_files_to_create = [ id for id in youtube_ids if not get_object_or_None(VideoFile, youtube_id=id) ] video_files_to_update = youtube_ids - OrderedSet(video_files_to_create) # OK to do bulk_create; cache invalidation triggered via save download VideoFile.objects.bulk_create([ VideoFile(youtube_id=id, flagged_for_download=True) for id in video_files_to_create ]) # One query per chunk for chunk in break_into_chunks(youtube_ids): video_files_needing_model_update = VideoFile.objects.filter( download_in_progress=False, youtube_id__in=chunk).exclude(percent_complete=100) video_files_needing_model_update.update(percent_complete=0, cancel_download=False, flagged_for_download=True) force_job("videodownload", _("Download Videos")) return JsonResponse({})
def handle(self, *args, **options): # delete VideoFile objects that are not marked as in progress, but are neither 0% nor 100% done; they're broken VideoFile.objects.filter(download_in_progress=False, percent_complete__gt=0, percent_complete__lt=100).delete() files = glob.glob(settings.VIDEO_PATH + "*.mp4") subtitle_files = glob.glob(settings.VIDEO_PATH + "*.srt") videos_marked_at_all = set([video.youtube_id for video in VideoFile.objects.all()]) videos_marked_as_in_progress = set([video.youtube_id for video in VideoFile.objects.filter(download_in_progress=True)]) videos_marked_as_unstarted = set([video.youtube_id for video in VideoFile.objects.filter(percent_complete=0, download_in_progress=False)]) videos_in_filesystem = set([path.replace("\\", "/").split("/")[-1].split(".")[0] for path in files]) videos_in_filesystem_chunked = break_into_chunks(videos_in_filesystem) subtitles_in_filesystem = set([path.replace("\\", "/").split("/")[-1].split(".")[0] for path in subtitle_files]) subtitles_in_filesystem_chunked = break_into_chunks(subtitles_in_filesystem) count = 0 for chunk in videos_in_filesystem_chunked: video_files_needing_model_update = VideoFile.objects.filter(percent_complete=0, download_in_progress=False, youtube_id__in=chunk) count += video_files_needing_model_update.count() video_files_needing_model_update.update(percent_complete=100, flagged_for_download=False) if count: self.stdout.write("Updated %d VideoFile models (to mark them as complete, since the files exist)\n" % count) video_ids_needing_model_creation = list(videos_in_filesystem - videos_marked_at_all) count = len(video_ids_needing_model_creation) if count: VideoFile.objects.bulk_create([VideoFile(youtube_id=youtube_id, percent_complete=100) for youtube_id in video_ids_needing_model_creation]) self.stdout.write("Created %d VideoFile models (to mark them as complete, since the files exist)\n" % count) count = 0 videos_needing_model_deletion_chunked = break_into_chunks(videos_marked_at_all - videos_in_filesystem) for chunk in videos_needing_model_deletion_chunked: video_files_needing_model_deletion = VideoFile.objects.filter(youtube_id__in=chunk) count += video_files_needing_model_deletion.count() video_files_needing_model_deletion.delete() if count: self.stdout.write("Deleted %d VideoFile models (because the videos didn't exist in the filesystem)\n" % count) count = 0 for chunk in subtitles_in_filesystem_chunked: video_files_needing_model_update = VideoFile.objects.filter(subtitle_download_in_progress=False, subtitles_downloaded=False, youtube_id__in=chunk) count += video_files_needing_model_update.count() video_files_needing_model_update.update(subtitles_downloaded=True) if count: self.stdout.write("Updated %d VideoFile models (marked them as having subtitles)\n" % count)
def delete_objects_for_missing_videos(youtube_ids_in_filesystem, videos_marked_at_all): # VideoFile objects say they're available, but that don't actually exist. deleted_video_ids = [] videos_flagged_for_download = set([video.youtube_id for video in VideoFile.objects.filter(flagged_for_download=True)]) videos_needing_model_deletion_chunked = break_into_chunks(videos_marked_at_all - youtube_ids_in_filesystem - videos_flagged_for_download) for chunk in videos_needing_model_deletion_chunked: video_files_needing_model_deletion = VideoFile.objects.filter(youtube_id__in=chunk) video_files_needing_model_deletion.delete() deleted_video_ids += [video_file.video_id for video_file in video_files_needing_model_deletion] if deleted_video_ids: self.stdout.write("Deleted %d VideoFile models (because the videos didn't exist in the filesystem)\n" % len(deleted_video_ids)) return deleted_video_ids
def update_objects_to_be_complete(youtube_ids_in_filesystem): # Files that exist, are in the DB, but have percent_complete=0, download_in_progress=False updated_video_ids = [] for chunk in break_into_chunks(youtube_ids_in_filesystem): video_files_needing_model_update = VideoFile.objects.filter(percent_complete=0, download_in_progress=False, youtube_id__in=chunk) video_files_needing_model_update.update(percent_complete=100, flagged_for_download=False) updated_video_ids += [i18n.get_video_id(video_file.youtube_id) for video_file in video_files_needing_model_update] if updated_video_ids: caching.invalidate_all_caches() self.stdout.write("Updated %d VideoFile models (to mark them as complete, since the files exist)\n" % len(updated_video_ids)) return updated_video_ids
def start_video_download(request): youtube_ids = OrderedSet(simplejson.loads(request.raw_post_data or "{}").get("youtube_ids", [])) video_files_to_create = [id for id in youtube_ids if not get_object_or_None(VideoFile, youtube_id=id)] video_files_to_update = youtube_ids - OrderedSet(video_files_to_create) VideoFile.objects.bulk_create([VideoFile(youtube_id=id, flagged_for_download=True) for id in video_files_to_create]) for chunk in break_into_chunks(youtube_ids): video_files_needing_model_update = VideoFile.objects.filter(download_in_progress=False, youtube_id__in=chunk).exclude(percent_complete=100) video_files_needing_model_update.update(percent_complete=0, cancel_download=False, flagged_for_download=True) force_job("videodownload", "Download Videos") return JsonResponse({})
def start_video_download(request): """ API endpoint for launching the videodownload job. """ youtube_ids = OrderedSet(simplejson.loads(request.raw_post_data or "{}").get("youtube_ids", [])) # One query per video (slow) video_files_to_create = [id for id in youtube_ids if not get_object_or_None(VideoFile, youtube_id=id)] video_files_to_update = youtube_ids - OrderedSet(video_files_to_create) # OK to do bulk_create; cache invalidation triggered via save download VideoFile.objects.bulk_create([VideoFile(youtube_id=id, flagged_for_download=True) for id in video_files_to_create]) # One query per chunk for chunk in break_into_chunks(youtube_ids): video_files_needing_model_update = VideoFile.objects.filter(download_in_progress=False, youtube_id__in=chunk).exclude(percent_complete=100) video_files_needing_model_update.update(percent_complete=0, cancel_download=False, flagged_for_download=True) force_job("videodownload", _("Download Videos")) return JsonResponse({})
def start_video_download(request): """ API endpoint for launching the videodownload job. """ youtube_ids = OrderedSet(simplejson.loads(request.raw_post_data or "{}").get("youtube_ids", [])) # One query per video (slow) video_files_to_create = [id for id in youtube_ids if not get_object_or_None(VideoFile, youtube_id=id)] # OK to do bulk_create; cache invalidation triggered via save download for lang_code, lang_youtube_ids in divide_videos_by_language(video_files_to_create).iteritems(): VideoFile.objects.bulk_create([VideoFile(youtube_id=id, flagged_for_download=True, language=lang_code) for id in lang_youtube_ids]) # OK to update all, since we're not setting all props above. # One query per chunk for chunk in break_into_chunks(youtube_ids): video_files_needing_model_update = VideoFile.objects.filter(download_in_progress=False, youtube_id__in=chunk).exclude(percent_complete=100) video_files_needing_model_update.update(percent_complete=0, cancel_download=False, flagged_for_download=True) force_job("videodownload", _("Download Videos"), locale=request.language) return JsonResponse({})
def update_objects_to_be_complete(youtube_ids_in_filesystem): # Files that exist, are in the DB, but have percent_complete=0, download_in_progress=False updated_video_ids = [] for chunk in break_into_chunks(youtube_ids_in_filesystem): video_files_needing_model_update = VideoFile.objects.filter( percent_complete=0, download_in_progress=False, youtube_id__in=chunk) video_files_needing_model_update.update( percent_complete=100, flagged_for_download=False) updated_video_ids += [ i18n.get_video_id(video_file.youtube_id) for video_file in video_files_needing_model_update ] if updated_video_ids: caching.invalidate_all_caches() self.stdout.write( "Updated %d VideoFile models (to mark them as complete, since the files exist)\n" % len(updated_video_ids)) return updated_video_ids
def delete_objects_for_missing_videos(youtube_ids_in_filesystem, videos_marked_at_all): # VideoFile objects say they're available, but that don't actually exist. deleted_video_ids = [] videos_flagged_for_download = set([ video.youtube_id for video in VideoFile.objects.filter( flagged_for_download=True) ]) videos_needing_model_deletion_chunked = break_into_chunks( videos_marked_at_all - youtube_ids_in_filesystem - videos_flagged_for_download) for chunk in videos_needing_model_deletion_chunked: video_files_needing_model_deletion = VideoFile.objects.filter( youtube_id__in=chunk) video_files_needing_model_deletion.delete() deleted_video_ids += [ video_file.video_id for video_file in video_files_needing_model_deletion ] if deleted_video_ids: self.stdout.write( "Deleted %d VideoFile models (because the videos didn't exist in the filesystem)\n" % len(deleted_video_ids)) return deleted_video_ids
def handle(self, *args, **options): # delete VideoFile objects that are not marked as in progress, but are neither 0% nor 100% done; they're broken VideoFile.objects.filter(download_in_progress=False, percent_complete__gt=0, percent_complete__lt=100).delete() files = glob.glob(settings.CONTENT_ROOT + "*.mp4") subtitle_files = glob.glob(settings.CONTENT_ROOT + "*.srt") videos_marked_at_all = set( [video.youtube_id for video in VideoFile.objects.all()]) videos_marked_as_in_progress = set([ video.youtube_id for video in VideoFile.objects.filter(download_in_progress=True) ]) videos_marked_as_unstarted = set([ video.youtube_id for video in VideoFile.objects.filter(percent_complete=0, download_in_progress=False) ]) videos_in_filesystem = set([ path.replace("\\", "/").split("/")[-1].split(".")[0] for path in files ]) videos_in_filesystem_chunked = break_into_chunks(videos_in_filesystem) videos_flagged_for_download = set([ video.youtube_id for video in VideoFile.objects.filter(flagged_for_download=True) ]) subtitles_in_filesystem = set([ path.replace("\\", "/").split("/")[-1].split(".")[0] for path in subtitle_files ]) subtitles_in_filesystem_chunked = break_into_chunks( subtitles_in_filesystem) count = 0 for chunk in videos_in_filesystem_chunked: video_files_needing_model_update = VideoFile.objects.filter( percent_complete=0, download_in_progress=False, youtube_id__in=chunk) count += video_files_needing_model_update.count() video_files_needing_model_update.update(percent_complete=100, flagged_for_download=False) if count: self.stdout.write( "Updated %d VideoFile models (to mark them as complete, since the files exist)\n" % count) video_ids_needing_model_creation = list(videos_in_filesystem - videos_marked_at_all) count = len(video_ids_needing_model_creation) if count: VideoFile.objects.bulk_create([ VideoFile(youtube_id=youtube_id, percent_complete=100) for youtube_id in video_ids_needing_model_creation ]) self.stdout.write( "Created %d VideoFile models (to mark them as complete, since the files exist)\n" % count) count = 0 videos_needing_model_deletion_chunked = break_into_chunks( videos_marked_at_all - videos_in_filesystem - videos_flagged_for_download) for chunk in videos_needing_model_deletion_chunked: video_files_needing_model_deletion = VideoFile.objects.filter( youtube_id__in=chunk) count += video_files_needing_model_deletion.count() video_files_needing_model_deletion.delete() if count: self.stdout.write( "Deleted %d VideoFile models (because the videos didn't exist in the filesystem)\n" % count) count = 0 for chunk in subtitles_in_filesystem_chunked: video_files_needing_model_update = VideoFile.objects.filter( subtitle_download_in_progress=False, subtitles_downloaded=False, youtube_id__in=chunk) count += video_files_needing_model_update.count() video_files_needing_model_update.update(subtitles_downloaded=True) if count: self.stdout.write( "Updated %d VideoFile models (marked them as having subtitles)\n" % count)
def handle(self, *args, **options): caching_enabled = (settings.CACHE_TIME != 0) touched_video_ids = [] # delete VideoFile objects that are not marked as in progress, but are neither 0% nor 100% done; they're broken video_files_to_delete = VideoFile.objects.filter(download_in_progress=False, percent_complete__gt=0, percent_complete__lt=100) youtube_ids_to_delete = [d["youtube_id"] for d in video_files_to_delete.values("youtube_id")] video_files_to_delete.delete() if caching_enabled: for youtube_id in youtube_ids_to_delete: caching.invalidate_all_pages_related_to_video(video_id=youtube_id) touched_video_ids.append(youtube_id) if len(video_files_to_delete): self.stdout.write("Deleted %d VideoFile models (to mark them as not downloaded, since they were in a bad state)\n" % len(video_files_to_delete)) files = glob.glob(settings.CONTENT_ROOT + "*.mp4") subtitle_files = glob.glob(settings.CONTENT_ROOT + "*.srt") videos_marked_at_all = set([video.youtube_id for video in VideoFile.objects.all()]) videos_marked_as_in_progress = set([video.youtube_id for video in VideoFile.objects.filter(download_in_progress=True)]) videos_marked_as_unstarted = set([video.youtube_id for video in VideoFile.objects.filter(percent_complete=0, download_in_progress=False)]) videos_in_filesystem = set([path.replace("\\", "/").split("/")[-1].split(".")[0] for path in files]) videos_in_filesystem_chunked = break_into_chunks(videos_in_filesystem) videos_flagged_for_download = set([video.youtube_id for video in VideoFile.objects.filter(flagged_for_download=True)]) subtitles_in_filesystem = set([path.replace("\\", "/").split("/")[-1].split(".")[0] for path in subtitle_files]) subtitles_in_filesystem_chunked = break_into_chunks(subtitles_in_filesystem) count = 0 for chunk in videos_in_filesystem_chunked: video_files_needing_model_update = VideoFile.objects.filter(percent_complete=0, download_in_progress=False, youtube_id__in=chunk) count += video_files_needing_model_update.count() video_files_needing_model_update.update(percent_complete=100, flagged_for_download=False) if caching_enabled: for vf in video_files_needing_model_update: caching.invalidate_all_pages_related_to_video(video_id=vf.youtube_id) touched_video_ids.append(vf.youtube_id) if count: self.stdout.write("Updated %d VideoFile models (to mark them as complete, since the files exist)\n" % count) video_ids_needing_model_creation = list(videos_in_filesystem - videos_marked_at_all) count = len(video_ids_needing_model_creation) if count: VideoFile.objects.bulk_create([VideoFile(youtube_id=youtube_id, percent_complete=100) for youtube_id in video_ids_needing_model_creation]) if caching_enabled: for vid in video_ids_needing_model_creation: caching.invalidate_all_pages_related_to_video(video_id=vid) touched_video_ids.append(vid) self.stdout.write("Created %d VideoFile models (to mark them as complete, since the files exist)\n" % count) count = 0 videos_needing_model_deletion_chunked = break_into_chunks(videos_marked_at_all - videos_in_filesystem - videos_flagged_for_download) for chunk in videos_needing_model_deletion_chunked: video_files_needing_model_deletion = VideoFile.objects.filter(youtube_id__in=chunk) count += video_files_needing_model_deletion.count() video_files_needing_model_deletion.delete() if caching_enabled: for video_id in chunk: caching.invalidate_all_pages_related_to_video(video_id=video_id) touched_video_ids.append(video_id) if count: self.stdout.write("Deleted %d VideoFile models (because the videos didn't exist in the filesystem)\n" % count) count = 0 for chunk in subtitles_in_filesystem_chunked: video_files_needing_model_update = VideoFile.objects.filter(subtitle_download_in_progress=False, subtitles_downloaded=False, youtube_id__in=chunk) count += video_files_needing_model_update.count() video_files_needing_model_update.update(subtitles_downloaded=True) if caching_enabled: for vf in video_files_needing_model_update: caching.invalidate_all_pages_related_to_video(video_id=vf.youtube_id) touched_video_ids.append(vf.youtube_id) if count: self.stdout.write("Updated %d VideoFile models (marked them as having subtitles)\n" % count) if options["auto_cache"] and caching_enabled and touched_video_ids: caching.regenerate_all_pages_related_to_videos(video_ids=touched_video_ids)
def handle(self, *args, **options): caching_enabled = (settings.CACHE_TIME != 0) touched_video_ids = [] # delete VideoFile objects that are not marked as in progress, but are neither 0% nor 100% done; they're broken video_files_to_delete = VideoFile.objects.filter( download_in_progress=False, percent_complete__gt=0, percent_complete__lt=100) youtube_ids_to_delete = [ d["youtube_id"] for d in video_files_to_delete.values("youtube_id") ] video_files_to_delete.delete() if caching_enabled: for youtube_id in youtube_ids_to_delete: caching.invalidate_all_pages_related_to_video( video_id=youtube_id) touched_video_ids.append(youtube_id) if len(video_files_to_delete): self.stdout.write( "Deleted %d VideoFile models (to mark them as not downloaded, since they were in a bad state)\n" % len(video_files_to_delete)) files = glob.glob(settings.CONTENT_ROOT + "*.mp4") subtitle_files = glob.glob(settings.CONTENT_ROOT + "*.srt") videos_marked_at_all = set( [video.youtube_id for video in VideoFile.objects.all()]) videos_marked_as_in_progress = set([ video.youtube_id for video in VideoFile.objects.filter(download_in_progress=True) ]) videos_marked_as_unstarted = set([ video.youtube_id for video in VideoFile.objects.filter(percent_complete=0, download_in_progress=False) ]) videos_in_filesystem = set([ path.replace("\\", "/").split("/")[-1].split(".")[0] for path in files ]) videos_in_filesystem_chunked = break_into_chunks(videos_in_filesystem) videos_flagged_for_download = set([ video.youtube_id for video in VideoFile.objects.filter(flagged_for_download=True) ]) subtitles_in_filesystem = set([ path.replace("\\", "/").split("/")[-1].split(".")[0] for path in subtitle_files ]) subtitles_in_filesystem_chunked = break_into_chunks( subtitles_in_filesystem) count = 0 for chunk in videos_in_filesystem_chunked: video_files_needing_model_update = VideoFile.objects.filter( percent_complete=0, download_in_progress=False, youtube_id__in=chunk) count += video_files_needing_model_update.count() video_files_needing_model_update.update(percent_complete=100, flagged_for_download=False) if caching_enabled: for vf in video_files_needing_model_update: caching.invalidate_all_pages_related_to_video( video_id=vf.youtube_id) touched_video_ids.append(vf.youtube_id) if count: self.stdout.write( "Updated %d VideoFile models (to mark them as complete, since the files exist)\n" % count) video_ids_needing_model_creation = list(videos_in_filesystem - videos_marked_at_all) count = len(video_ids_needing_model_creation) if count: VideoFile.objects.bulk_create([ VideoFile(youtube_id=youtube_id, percent_complete=100) for youtube_id in video_ids_needing_model_creation ]) if caching_enabled: for vid in video_ids_needing_model_creation: caching.invalidate_all_pages_related_to_video(video_id=vid) touched_video_ids.append(vid) self.stdout.write( "Created %d VideoFile models (to mark them as complete, since the files exist)\n" % count) count = 0 videos_needing_model_deletion_chunked = break_into_chunks( videos_marked_at_all - videos_in_filesystem - videos_flagged_for_download) for chunk in videos_needing_model_deletion_chunked: video_files_needing_model_deletion = VideoFile.objects.filter( youtube_id__in=chunk) count += video_files_needing_model_deletion.count() video_files_needing_model_deletion.delete() if caching_enabled: for video_id in chunk: caching.invalidate_all_pages_related_to_video( video_id=video_id) touched_video_ids.append(video_id) if count: self.stdout.write( "Deleted %d VideoFile models (because the videos didn't exist in the filesystem)\n" % count) count = 0 for chunk in subtitles_in_filesystem_chunked: video_files_needing_model_update = VideoFile.objects.filter( subtitle_download_in_progress=False, subtitles_downloaded=False, youtube_id__in=chunk) count += video_files_needing_model_update.count() video_files_needing_model_update.update(subtitles_downloaded=True) if caching_enabled: for vf in video_files_needing_model_update: caching.invalidate_all_pages_related_to_video( video_id=vf.youtube_id) touched_video_ids.append(vf.youtube_id) if count: self.stdout.write( "Updated %d VideoFile models (marked them as having subtitles)\n" % count) if options["auto_cache"] and caching_enabled and touched_video_ids: caching.regenerate_all_pages_related_to_videos( video_ids=touched_video_ids)
def handle(self, *args, **options): caching_enabled = (settings.CACHE_TIME != 0) touched_video_ids = [] # delete VideoFile objects that are not marked as in progress, but are neither 0% nor 100% done; they're broken video_files_to_delete = VideoFile.objects.filter( download_in_progress=False, percent_complete__gt=0, percent_complete__lt=100) youtube_ids_to_delete = [ d["youtube_id"] for d in video_files_to_delete.values("youtube_id") ] video_files_to_delete.delete() touched_video_ids += [ i18n.get_video_id(yid) for yid in youtube_ids_to_delete ] if len(video_files_to_delete): self.stdout.write( "Deleted %d VideoFile models (to mark them as not downloaded, since they were in a bad state)\n" % len(video_files_to_delete)) files = glob.glob(settings.CONTENT_ROOT + "*.mp4") videos_marked_at_all = set( [video.youtube_id for video in VideoFile.objects.all()]) videos_marked_as_in_progress = set([ video.youtube_id for video in VideoFile.objects.filter(download_in_progress=True) ]) videos_marked_as_unstarted = set([ video.youtube_id for video in VideoFile.objects.filter(percent_complete=0, download_in_progress=False) ]) videos_flagged_for_download = set([ video.youtube_id for video in VideoFile.objects.filter(flagged_for_download=True) ]) videos_in_filesystem = set([ path.replace("\\", "/").split("/")[-1].split(".")[0] for path in files ]) # Files that exist, but are not in the DB, should be assumed to be good videos, # and just needing to be added to the DB. Add them to the DB in this way, # so that these files also trigger the update code below (and trigger cache invalidation) video_ids_needing_model_creation = list(videos_in_filesystem - videos_marked_at_all) count = len(video_ids_needing_model_creation) if count: # OK to do bulk_create; cache invalidation triggered via save download VideoFile.objects.bulk_create([ VideoFile(youtube_id=id, percent_complete=0, download_in_progress=False) for id in video_ids_needing_model_creation ]) self.stdout.write( "Created %d VideoFile models (to mark them as complete, since the files exist)\n" % len(video_ids_needing_model_creation)) touched_video_ids += [ i18n.get_video_id(yid) or yid for yid in video_ids_needing_model_creation ] # Files that exist, are in the DB, but have percent_complete=0, download_in_progress=False # These should be individually saved to be 100% complete, to trigger their availability (and cache invalidation) count = 0 for chunk in break_into_chunks(videos_in_filesystem): video_files_needing_model_update = VideoFile.objects.filter( percent_complete=0, download_in_progress=False, youtube_id__in=chunk) count += video_files_needing_model_update.count() for videofile in video_files_needing_model_update: videofile.percent_complete = 100 videofile.flagged_for_download = False videofile.save() if count: self.stdout.write( "Updated %d VideoFile models (to mark them as complete, since the files exist)\n" % count) # VideoFile objects say they're available, but that don't actually exist. count = 0 videos_needing_model_deletion_chunked = break_into_chunks( videos_marked_at_all - videos_in_filesystem - videos_flagged_for_download) for chunk in videos_needing_model_deletion_chunked: video_files_needing_model_deletion = VideoFile.objects.filter( youtube_id__in=chunk) count += video_files_needing_model_deletion.count() video_files_needing_model_deletion.delete() touched_video_ids += [ i18n.get_video_id(yid) or yid for yid in chunk ] if count: self.stdout.write( "Deleted %d VideoFile models (because the videos didn't exist in the filesystem)\n" % count) if options["auto_cache"] and caching_enabled and touched_video_ids: caching.regenerate_all_pages_related_to_videos( video_ids=list(set(touched_video_ids)))