def handle(self, *args, **options): caching_enabled = (settings.CACHE_TIME != 0) touched_video_ids = [] # delete VideoFile objects that are not marked as in progress, but are neither 0% nor 100% done; they're broken video_files_to_delete = VideoFile.objects.filter(download_in_progress=False, percent_complete__gt=0, percent_complete__lt=100) youtube_ids_to_delete = [d["youtube_id"] for d in video_files_to_delete.values("youtube_id")] video_files_to_delete.delete() touched_video_ids += [i18n.get_video_id(yid) for yid in youtube_ids_to_delete] if len(video_files_to_delete): self.stdout.write("Deleted %d VideoFile models (to mark them as not downloaded, since they were in a bad state)\n" % len(video_files_to_delete)) files = glob.glob(settings.CONTENT_ROOT + "*.mp4") videos_marked_at_all = set([video.youtube_id for video in VideoFile.objects.all()]) videos_marked_as_in_progress = set([video.youtube_id for video in VideoFile.objects.filter(download_in_progress=True)]) videos_marked_as_unstarted = set([video.youtube_id for video in VideoFile.objects.filter(percent_complete=0, download_in_progress=False)]) videos_flagged_for_download = set([video.youtube_id for video in VideoFile.objects.filter(flagged_for_download=True)]) videos_in_filesystem = set([path.replace("\\", "/").split("/")[-1].split(".")[0] for path in files]) # Files that exist, but are not in the DB, should be assumed to be good videos, # and just needing to be added to the DB. Add them to the DB in this way, # so that these files also trigger the update code below (and trigger cache invalidation) video_ids_needing_model_creation = list(videos_in_filesystem - videos_marked_at_all) count = len(video_ids_needing_model_creation) if count: # OK to do bulk_create; cache invalidation triggered via save download VideoFile.objects.bulk_create([VideoFile(youtube_id=id, percent_complete=0, download_in_progress=False) for id in video_ids_needing_model_creation]) self.stdout.write("Created %d VideoFile models (to mark them as complete, since the files exist)\n" % len(video_ids_needing_model_creation)) touched_video_ids += [i18n.get_video_id(yid) or yid for yid in video_ids_needing_model_creation] # Files that exist, are in the DB, but have percent_complete=0, download_in_progress=False # These should be individually saved to be 100% complete, to trigger their availability (and cache invalidation) count = 0 for chunk in break_into_chunks(videos_in_filesystem): video_files_needing_model_update = VideoFile.objects.filter(percent_complete=0, download_in_progress=False, youtube_id__in=chunk) count += video_files_needing_model_update.count() for videofile in video_files_needing_model_update: videofile.percent_complete = 100 videofile.flagged_for_download = False videofile.save() if count: self.stdout.write("Updated %d VideoFile models (to mark them as complete, since the files exist)\n" % count) # VideoFile objects say they're available, but that don't actually exist. count = 0 videos_needing_model_deletion_chunked = break_into_chunks(videos_marked_at_all - videos_in_filesystem - videos_flagged_for_download) for chunk in videos_needing_model_deletion_chunked: video_files_needing_model_deletion = VideoFile.objects.filter(youtube_id__in=chunk) count += video_files_needing_model_deletion.count() video_files_needing_model_deletion.delete() touched_video_ids += [i18n.get_video_id(yid) or yid for yid in chunk] if count: self.stdout.write("Deleted %d VideoFile models (because the videos didn't exist in the filesystem)\n" % count) if options["auto_cache"] and caching_enabled and touched_video_ids: caching.regenerate_all_pages_related_to_videos(video_ids=list(set(touched_video_ids)))
def save(self, update_userlog=True, *args, **kwargs): # To deal with backwards compatibility, # check video_id, whether imported or not. if not self.video_id: assert kwargs.get("imported", False), "video_id better be set by internal code." assert self.youtube_id, "If not video_id, you better have set youtube_id!" self.video_id = i18n.get_video_id(self.youtube_id) or self.youtube_id # for unknown videos, default to the youtube_id if not kwargs.get("imported", False): self.full_clean() # Compute learner status already_complete = self.complete self.complete = (self.points >= VideoLog.POINTS_PER_VIDEO) if not already_complete and self.complete: self.completion_timestamp = datetime.now() # Tell logins that they are still active (ignoring validation failures). # TODO(bcipolli): Could log video information in the future. if update_userlog: try: UserLog.update_user_activity(self.user, activity_type="login", update_datetime=(self.completion_timestamp or datetime.now()), language=self.language) except ValidationError as e: logging.error("Failed to update userlog during video: %s" % e) super(VideoLog, self).save(*args, **kwargs)
def my_handler2(sender, **kwargs): """ Listen in to see when available videos become unavailable. """ was_available = kwargs["instance"].percent_complete == 100 if was_available: logging.debug("Invalidating cache on delete for %s" % kwargs["instance"]) invalidate_all_pages_related_to_video(video_id=i18n.get_video_id(kwargs["instance"].youtube_id))
def my_handler2(sender, **kwargs): """ Listen in to see when available videos become unavailable. """ was_available = kwargs["instance"].percent_complete == 100 if was_available: logging.debug("Invalidating cache on delete for %s" % kwargs["instance"]) invalidate_all_pages_related_to_video( video_id=i18n.get_video_id(kwargs["instance"].youtube_id))
def my_handler1(sender, **kwargs): """ Listen in to see when videos become available. """ # Can only do full check in Django 1.5+, but shouldn't matter--we should only save with # percent_complete == 100 once. just_now_available = kwargs["instance"].percent_complete == 100 #and "percent_complete" in kwargs["updated_fields"] if just_now_available: # This event should only happen once, so don't bother checking if # this is the field that changed. logging.debug("Invalidating cache on save for %s" % kwargs["instance"]) invalidate_all_pages_related_to_video(video_id=i18n.get_video_id(kwargs["instance"].youtube_id))
def my_handler1(sender, **kwargs): """ Listen in to see when videos become available. """ # Can only do full check in Django 1.5+, but shouldn't matter--we should only save with # percent_complete == 100 once. just_now_available = kwargs[ "instance"].percent_complete == 100 #and "percent_complete" in kwargs["updated_fields"] if just_now_available: # This event should only happen once, so don't bother checking if # this is the field that changed. logging.debug("Invalidating cache on save for %s" % kwargs["instance"]) invalidate_all_pages_related_to_video( video_id=i18n.get_video_id(kwargs["instance"].youtube_id))
def forwards(self, orm): # Setting the video ID for vlog in VideoLog.objects.all(): vlog.video_id = i18n.get_video_id( vlog.youtube_id) or vlog.youtube_id
def handle(self, *args, **options): self.video = None handled_youtube_ids = [] # stored to deal with caching failed_youtube_ids = [] # stored to avoid requerying failures. set_process_priority.lowest(logging=settings.LOG) try: while True: # loop until the method is aborted # Grab any video that hasn't been tried yet videos = VideoFile.objects \ .filter(flagged_for_download=True, download_in_progress=False) \ .exclude(youtube_id__in=failed_youtube_ids) video_count = videos.count() if video_count == 0: self.stdout.write("Nothing to download; exiting.\n") break # Grab a video as OURS to handle, set fields to indicate to others that we're on it! # Update the video logging video = videos[0] video.download_in_progress = True video.percent_complete = 0 video.save() self.stdout.write("Downloading video '%s'...\n" % video.youtube_id) # Update the progress logging self.set_stages( num_stages=video_count + len(handled_youtube_ids) + len(failed_youtube_ids) + int(options["auto_cache"])) if not self.started(): self.start(stage_name=video.youtube_id) # Initiate the download process try: download_video(video.youtube_id, callback=partial( self.download_progress_callback, video)) handled_youtube_ids.append(video.youtube_id) self.stdout.write("Download is complete!\n") except Exception as e: # On error, report the error, mark the video as not downloaded, # and allow the loop to try other videos. self.stderr.write("Error in downloading %s: %s\n" % (video.youtube_id, e)) video.download_in_progress = False video.flagged_for_download = not isinstance( e, URLNotFound) # URLNotFound means, we won't try again video.save() # Rather than getting stuck on one video, continue to the next video. failed_youtube_ids.append(video.youtube_id) continue # This can take a long time, without any further update, so ... best to avoid. if options["auto_cache"] and caching.caching_is_enabled( ) and handled_youtube_ids: self.update_stage( stage_name=self.video.youtube_id, stage_percent=0, notes=_("Generating all pages related to videos.")) caching.regenerate_all_pages_related_to_videos(video_ids=list( set([ i18n.get_video_id(yid) or yid for yid in handled_youtube_ids ]))) # Update self.complete( notes= _("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully." ) % { "num_handled_videos": len(handled_youtube_ids), "num_total_videos": len(handled_youtube_ids) + len(failed_youtube_ids), }) except Exception as e: sys.stderr.write("Error: %s\n" % e) self.cancel(notes=_("Error: %s") % e)
def forwards(self, orm): # Setting the video ID for vlog in VideoLog.objects.all(): vlog.video_id = i18n.get_video_id(vlog.youtube_id) or vlog.youtube_id
def handle(self, *args, **options): caching_enabled = (settings.CACHE_TIME != 0) touched_video_ids = [] # delete VideoFile objects that are not marked as in progress, but are neither 0% nor 100% done; they're broken video_files_to_delete = VideoFile.objects.filter( download_in_progress=False, percent_complete__gt=0, percent_complete__lt=100) youtube_ids_to_delete = [ d["youtube_id"] for d in video_files_to_delete.values("youtube_id") ] video_files_to_delete.delete() touched_video_ids += [ i18n.get_video_id(yid) for yid in youtube_ids_to_delete ] if len(video_files_to_delete): self.stdout.write( "Deleted %d VideoFile models (to mark them as not downloaded, since they were in a bad state)\n" % len(video_files_to_delete)) files = glob.glob(settings.CONTENT_ROOT + "*.mp4") videos_marked_at_all = set( [video.youtube_id for video in VideoFile.objects.all()]) videos_marked_as_in_progress = set([ video.youtube_id for video in VideoFile.objects.filter(download_in_progress=True) ]) videos_marked_as_unstarted = set([ video.youtube_id for video in VideoFile.objects.filter(percent_complete=0, download_in_progress=False) ]) videos_flagged_for_download = set([ video.youtube_id for video in VideoFile.objects.filter(flagged_for_download=True) ]) videos_in_filesystem = set([ path.replace("\\", "/").split("/")[-1].split(".")[0] for path in files ]) # Files that exist, but are not in the DB, should be assumed to be good videos, # and just needing to be added to the DB. Add them to the DB in this way, # so that these files also trigger the update code below (and trigger cache invalidation) video_ids_needing_model_creation = list(videos_in_filesystem - videos_marked_at_all) count = len(video_ids_needing_model_creation) if count: # OK to do bulk_create; cache invalidation triggered via save download VideoFile.objects.bulk_create([ VideoFile(youtube_id=id, percent_complete=0, download_in_progress=False) for id in video_ids_needing_model_creation ]) self.stdout.write( "Created %d VideoFile models (to mark them as complete, since the files exist)\n" % len(video_ids_needing_model_creation)) touched_video_ids += [ i18n.get_video_id(yid) or yid for yid in video_ids_needing_model_creation ] # Files that exist, are in the DB, but have percent_complete=0, download_in_progress=False # These should be individually saved to be 100% complete, to trigger their availability (and cache invalidation) count = 0 for chunk in break_into_chunks(videos_in_filesystem): video_files_needing_model_update = VideoFile.objects.filter( percent_complete=0, download_in_progress=False, youtube_id__in=chunk) count += video_files_needing_model_update.count() for videofile in video_files_needing_model_update: videofile.percent_complete = 100 videofile.flagged_for_download = False videofile.save() if count: self.stdout.write( "Updated %d VideoFile models (to mark them as complete, since the files exist)\n" % count) # VideoFile objects say they're available, but that don't actually exist. count = 0 videos_needing_model_deletion_chunked = break_into_chunks( videos_marked_at_all - videos_in_filesystem - videos_flagged_for_download) for chunk in videos_needing_model_deletion_chunked: video_files_needing_model_deletion = VideoFile.objects.filter( youtube_id__in=chunk) count += video_files_needing_model_deletion.count() video_files_needing_model_deletion.delete() touched_video_ids += [ i18n.get_video_id(yid) or yid for yid in chunk ] if count: self.stdout.write( "Deleted %d VideoFile models (because the videos didn't exist in the filesystem)\n" % count) if options["auto_cache"] and caching_enabled and touched_video_ids: caching.regenerate_all_pages_related_to_videos( video_ids=list(set(touched_video_ids)))
def get_video_by_youtube_id(youtube_id): # TODO(bcipolli): will need to change for dubbed videos video_id = i18n.get_video_id(youtube_id=youtube_id) return get_node_cache("Video").get(video_id, [None])[0]
def handle(self, *args, **options): self.video = None handled_youtube_ids = [] # stored to deal with caching failed_youtube_ids = [] # stored to avoid requerying failures. set_process_priority.lowest(logging=settings.LOG) try: while True: # loop until the method is aborted # Grab any video that hasn't been tried yet videos = VideoFile.objects \ .filter(flagged_for_download=True, download_in_progress=False) \ .exclude(youtube_id__in=failed_youtube_ids) video_count = videos.count() if video_count == 0: self.stdout.write("Nothing to download; exiting.\n") break # Grab a video as OURS to handle, set fields to indicate to others that we're on it! # Update the video logging video = videos[0] video.download_in_progress = True video.percent_complete = 0 video.save() self.stdout.write("Downloading video '%s'...\n" % video.youtube_id) # Update the progress logging self.set_stages(num_stages=video_count + len(handled_youtube_ids) + len(failed_youtube_ids) + int(options["auto_cache"])) if not self.started(): self.start(stage_name=video.youtube_id) # Initiate the download process try: download_video(video.youtube_id, callback=partial(self.download_progress_callback, video)) handled_youtube_ids.append(video.youtube_id) self.stdout.write("Download is complete!\n") except Exception as e: # On error, report the error, mark the video as not downloaded, # and allow the loop to try other videos. self.stderr.write("Error in downloading %s: %s\n" % (video.youtube_id, e)) video.download_in_progress = False video.flagged_for_download = not isinstance(e, URLNotFound) # URLNotFound means, we won't try again video.save() # Rather than getting stuck on one video, continue to the next video. failed_youtube_ids.append(video.youtube_id) continue # This can take a long time, without any further update, so ... best to avoid. if options["auto_cache"] and caching.caching_is_enabled() and handled_youtube_ids: self.update_stage(stage_name=self.video.youtube_id, stage_percent=0, notes=_("Generating all pages related to videos.")) caching.regenerate_all_pages_related_to_videos(video_ids=list(set([i18n.get_video_id(yid) or yid for yid in handled_youtube_ids]))) # Update self.complete(notes=_("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully.") % { "num_handled_videos": len(handled_youtube_ids), "num_total_videos": len(handled_youtube_ids) + len(failed_youtube_ids), }) except Exception as e: sys.stderr.write("Error: %s\n" % e) self.cancel(notes=_("Error: %s") % e)
class TestVideoLogs(KALiteTestCase): ORIGINAL_POINTS = 37 ORIGINAL_SECONDS_WATCHED = 3 NEW_POINTS = 22 NEW_SECONDS_WATCHED = 5 YOUTUBE_ID = "aNqG4ChKShI" VIDEO_ID = i18n.get_video_id(YOUTUBE_ID) or "dummy" def setUp(self): super(TestVideoLogs, self).setUp() # create a facility and user that can be referred to in models across tests self.facility = Facility(name="Test Facility") self.facility.save() self.user = FacilityUser(username="******", facility=self.facility) self.user.set_password("dumber") self.user.save() # create an initial VideoLog instance so we have something to collide with later self.original_videolog = VideoLog(video_id=self.VIDEO_ID, youtube_id=self.YOUTUBE_ID, user=self.user) self.original_videolog.points = self.ORIGINAL_POINTS self.original_videolog.total_seconds_watched = self.ORIGINAL_SECONDS_WATCHED self.original_videolog.save() # get a new reference to the existing VideoLog videolog = VideoLog.objects.get(id=self.original_videolog.id) # make sure the VideoLog was created correctly self.assertEqual(videolog.points, self.ORIGINAL_POINTS, "The VideoLog's points have already changed.") self.assertEqual(videolog.total_seconds_watched, self.ORIGINAL_SECONDS_WATCHED, "The VideoLog's total seconds watched have already changed.") def test_videolog_update(self): # get a new reference to the existing VideoLog videolog = VideoLog.objects.get(id=self.original_videolog.id) # update the VideoLog videolog.points = self.NEW_POINTS videolog.total_seconds_watched = self.NEW_SECONDS_WATCHED videolog.save() # get a new reference to the existing VideoLog videolog2 = VideoLog.objects.get(id=self.original_videolog.id) # make sure the VideoLog was updated self.assertEqual(videolog2.points, self.NEW_POINTS, "The VideoLog's points were not updated.") self.assertEqual(videolog2.total_seconds_watched, self.NEW_SECONDS_WATCHED, "The VideoLog's total seconds watched were not updated.") @unittest.skip("Auto-merging is not yet automatic, so skip this") def test_videolog_collision(self): # create a new video log with the same youtube_id and user, but different points/total seconds watched videolog = VideoLog(video_id=self.VIDEO_ID, youtube_id=self.YOUTUBE_ID, user=self.user) videolog.points = self.NEW_POINTS videolog.total_seconds_watched = self.NEW_SECONDS_WATCHED # try saving the new VideoLog: this is where the collision will happen, hopefully leading to a merge videolog.save() # get a new reference to the existing VideoLog videolog2 = VideoLog.objects.get(id=self.original_videolog.id) # make sure the VideoLog has been properly merged self.assertEqual(videolog.points, max(self.ORIGINAL_POINTS, self.NEW_POINTS), "The VideoLog's points were not properly merged.") self.assertEqual(videolog.total_seconds_watched, max(self.ORIGINAL_ATTEMPTS, self.NEW_SECONDS_WATCHED), "The VideoLog's total seconds watched have already changed.")
class TestSaveVideoLog(KALiteTestCase): ORIGINAL_POINTS = 84 ORIGINAL_SECONDS_WATCHED = 32 NEW_POINTS = 32 NEW_SECONDS_WATCHED = 15 YOUTUBE_ID = "aNqG4ChKShI" VIDEO_ID = i18n.get_video_id(YOUTUBE_ID) or "dummy" YOUTUBE_ID2 = "b22tMEc6Kko" VIDEO_ID2 = i18n.get_video_id(YOUTUBE_ID2) or "dummy2" USERNAME = "******" PASSWORD = "******" def setUp(self): super(TestSaveVideoLog, self).setUp() # create a facility and user that can be referred to in models across tests self.facility = Facility(name="Test Facility") self.facility.save() self.user = FacilityUser(username=self.USERNAME, facility=self.facility) self.user.set_password(self.PASSWORD) self.user.save() # create an initial VideoLog instance so we have something to update later self.original_videolog = VideoLog(video_id=self.VIDEO_ID, youtube_id=self.YOUTUBE_ID, user=self.user) self.original_videolog.points = self.ORIGINAL_POINTS self.original_videolog.total_seconds_watched = self.ORIGINAL_SECONDS_WATCHED self.original_videolog.save() def test_new_videolog(self): # make sure the target video log does not already exist videologs = VideoLog.objects.filter(video_id=self.VIDEO_ID, user__username=self.USERNAME) self.assertEqual(videologs.count(), 0, "The target video log to be newly created already exists") c = KALiteClient() # login success = c.login(username=self.USERNAME, password=self.PASSWORD, facility=self.facility.id) self.assertTrue(success, "Was not able to login as the test user") # save a new video log result = c.save_video_log( video_id=self.VIDEO_ID2, youtube_id=self.YOUTUBE_ID2, total_seconds_watched=self.ORIGINAL_SECONDS_WATCHED, points=self.NEW_POINTS, ) self.assertEqual(result.status_code, 200, "An error (%d) was thrown while saving the video log." % result.status_code) # get a reference to the newly created VideoLog videolog = VideoLog.objects.get(video_id=self.VIDEO_ID, user__username=self.USERNAME) # make sure the VideoLog was properly created self.assertEqual(videolog.points, self.NEW_POINTS, "The VideoLog's points were not saved correctly.") self.assertEqual(videolog.total_seconds_watched, self.ORIGINAL_SECONDS_WATCHED, "The VideoLog's seconds watched was not saved correctly.") def test_update_videolog(self): # get a new reference to the existing VideoLog videolog = VideoLog.objects.get(id=self.original_videolog.id) # make sure the VideoLog hasn't already been changed self.assertEqual(videolog.points, self.ORIGINAL_POINTS, "The VideoLog's points have already changed.") self.assertEqual(videolog.total_seconds_watched, self.ORIGINAL_SECONDS_WATCHED, "The VideoLog's seconds watched already changed.") c = KALiteClient() # login success = c.login(username=self.USERNAME, password=self.PASSWORD, facility=self.facility.id) self.assertTrue(success, "Was not able to login as the test user") # save a new record onto the video log, with a correct answer (increasing the points and streak) result = c.save_video_log( video_id=self.VIDEO_ID, youtube_id=self.YOUTUBE_ID, total_seconds_watched=self.ORIGINAL_SECONDS_WATCHED + self.NEW_SECONDS_WATCHED, points=self.ORIGINAL_POINTS + self.NEW_POINTS, ) self.assertEqual(result.status_code, 200, "An error (%d) was thrown while saving the video log." % result.status_code) # get a reference to the updated VideoLog videolog = VideoLog.objects.get(video_id=self.VIDEO_ID, user__username=self.USERNAME) # make sure the VideoLog was properly updated self.assertEqual(videolog.points, self.ORIGINAL_POINTS + self.NEW_POINTS, "The VideoLog's points were not updated correctly.") self.assertEqual(videolog.total_seconds_watched, self.ORIGINAL_SECONDS_WATCHED + self.NEW_SECONDS_WATCHED, "The VideoLog's seconds watched was not updated correctly.")