def delete_videos(request): """ API endpoint for deleting videos. """ paths = OrderedSet(json.loads(request.body or "{}").get("paths", [])) lang = json.loads(request.body or "{}").get("lang", "en") youtube_ids = get_download_youtube_ids(paths, language=lang, downloaded=True) num_deleted = 0 for id in youtube_ids: # Delete the file on disk if delete_downloaded_files(id): num_deleted += 1 annotate_content_models_by_youtube_id(youtube_ids=youtube_ids.keys(), language=lang) return JsonResponseMessageSuccess( _("Deleted %(num_videos)s video(s) successfully.") % {"num_videos": num_deleted})
def setUp(self): UpdatesTestCase.setUp(self) delete_downloaded_files(self.real_video.youtube_id) annotate_content_models_by_youtube_id( youtube_ids=[self.real_video.youtube_id]) updated = get_content_item(content_id=self.real_video.id) self.assertFalse(updated['available'])
def test_simple_download(self): """ Tests that a real, existing video can be downloaded """ # Download a video that exists for real! download_video(self.real_video.youtube_id) # Check that file exists self.assertTrue( os.path.exists(get_video_local_path(self.real_video.youtube_id))) # After downloading the video, annotate the database annotate_content_models_by_youtube_id( youtube_ids=[self.real_video.youtube_id]) # Check that it's been marked available updated = get_content_item(content_id=self.real_video.id) logger.error(updated) self.assertTrue(updated['available']) # Adding in an unrelated test (becase we don't need database etc. for # this to be tested. self.assertEqual(get_local_video_size("/bogus/path", default=123), 123)
def delete_videos(request): """ API endpoint for deleting videos. """ paths = OrderedSet(json.loads(request.body or "{}").get("paths", [])) lang = json.loads(request.body or "{}").get("lang", "en") youtube_ids = get_download_youtube_ids(paths, language=lang, downloaded=True) num_deleted = 0 for id in youtube_ids: # Delete the file on disk if delete_downloaded_files(id): num_deleted += 1 annotate_content_models_by_youtube_id(youtube_ids=youtube_ids.keys(), language=lang) return JsonResponseMessageSuccess(_("Deleted %(num_videos)s video(s) successfully.") % {"num_videos": num_deleted})
def test_simple_download(self): """ Tests that a real, existing video can be downloaded """ # Download a video that exists for real! download_video(self.real_video.youtube_id) # Check that file exists self.assertTrue(os.path.exists( get_video_local_path(self.real_video.youtube_id) )) # After downloading the video, annotate the database annotate_content_models_by_youtube_id(youtube_ids=[self.real_video.youtube_id]) # Check that it's been marked available updated = get_content_item(content_id=self.real_video.id) logger.error(updated) self.assertTrue(updated['available']) # Adding in an unrelated test (becase we don't need database etc. for # this to be tested. self.assertEqual( get_local_video_size("/bogus/path", default=123), 123 )
def setUp(self): UpdatesTestCase.setUp(self) delete_downloaded_files(self.real_video.youtube_id) annotate_content_models_by_youtube_id(youtube_ids=[self.real_video.youtube_id]) updated = get_content_item(content_id=self.real_video.id) self.assertFalse(updated['available'])
def handle(self, *args, **options): self.setup(options) self.video = {} handled_youtube_ids = [] # stored to deal with caching failed_youtube_ids = [] # stored to avoid requerying failures. set_process_priority.lowest(logging=settings.LOG) try: while True: # loop until the method is aborted # Grab any video that hasn't been tried yet video_queue = VideoQueue() video_count = video_queue.count() if video_count == 0: self.stdout.write(_("Nothing to download; exiting.") + "\n") break # Grab a video as OURS to handle, set fields to indicate to others that we're on it! # Update the video logging video = video_queue.next() video["download_in_progress"] = True video["percent_complete"] = 0 self.stdout.write((_("Downloading video '%(youtube_id)s'...") + "\n") % {"youtube_id": video.get("youtube_id")}) # Update the progress logging self.set_stages(num_stages=video_count + len(handled_youtube_ids) + len(failed_youtube_ids) + int(options["auto_cache"])) if not self.started(): self.start(stage_name=video.get("youtube_id")) # Initiate the download process try: progress_callback = partial(self.download_progress_callback, video) # Don't try to download a file that already exists in the content dir - just say it was successful # and call it a day! if not os.path.exists(os.path.join(settings.CONTENT_ROOT, "{id}.mp4".format(id=video.get("youtube_id")))): try: # Download via urllib download_video(video.get("youtube_id"), callback=progress_callback) except URLNotFound: # Video was not found on amazon cloud service, # either due to a KA mistake, or due to the fact # that it's a dubbed video. # # We can use youtube-dl to get that video!! logging.debug(_("Retrieving youtube video %(youtube_id)s via youtube-dl") % {"youtube_id": video.get("youtube_id")}) def youtube_dl_cb(stats, progress_callback, *args, **kwargs): if stats['status'] == "finished": percent = 100. elif stats['status'] == "downloading": percent = 100. * stats['downloaded_bytes'] / stats['total_bytes'] else: percent = 0. progress_callback(percent=percent) scrape_video(video.get("youtube_id"), quiet=not settings.DEBUG, callback=partial(youtube_dl_cb, progress_callback=progress_callback)) except IOError as e: logging.exception(e) failed_youtube_ids.append(video.get("youtube_id")) video_queue.remove_file(video.get("youtube_id")) time.sleep(10) continue # If we got here, we downloaded ... somehow :) handled_youtube_ids.append(video.get("youtube_id")) video_queue.remove_file(video.get("youtube_id")) self.stdout.write(_("Download is complete!") + "\n") annotate_content_models_by_youtube_id(youtube_ids=[video.get("youtube_id")], language=video.get("language")) except DownloadCancelled: # Cancellation event video_queue.clear() failed_youtube_ids.append(video.get("youtube_id")) except Exception as e: # On error, report the error, mark the video as not downloaded, # and allow the loop to try other videos. msg = _("Error in downloading %(youtube_id)s: %(error_msg)s") % {"youtube_id": video.get("youtube_id"), "error_msg": unicode(e)} self.stderr.write("%s\n" % msg) # If a connection error, we should retry. if isinstance(e, DownloadError): connection_error = "[Errno 8]" in e.args[0] elif isinstance(e, IOError) and hasattr(e, "strerror"): connection_error = e.strerror[0] == 8 else: connection_error = False # Rather than getting stuck on one video, continue to the next video. self.update_stage(stage_status="error", notes=_("%(error_msg)s; continuing to next video.") % {"error_msg": msg}) failed_youtube_ids.append(video.get("youtube_id")) video_queue.remove_file(video.get("youtube_id")) continue # Update self.complete(notes=_("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully.") % { "num_handled_videos": len(handled_youtube_ids), "num_total_videos": len(handled_youtube_ids) + len(failed_youtube_ids), }) except Exception as e: self.cancel(stage_status="error", notes=_("Error: %(error_msg)s") % {"error_msg": e}) raise
def handle(self, *args, **options): self.setup(options) self.video = {} handled_youtube_ids = [] # stored to deal with caching failed_youtube_ids = [] # stored to avoid requerying failures. set_process_priority.lowest(logging=logger) try: while True: # loop until the method is aborted # Grab any video that hasn't been tried yet video_queue = VideoQueue() video_count = video_queue.count() if video_count == 0: self.stdout.write(_("Nothing to download; exiting.") + "\n") break # Grab a video as OURS to handle, set fields to indicate to others that we're on it! # Update the video logging video = video_queue.next() video["download_in_progress"] = True video["percent_complete"] = 0 self.stdout.write((_("Downloading video '%(youtube_id)s'...") + "\n") % {"youtube_id": video.get("youtube_id")}) # Update the progress logging self.set_stages(num_stages=video_count + len(handled_youtube_ids) + len(failed_youtube_ids) + int(options["auto_cache"])) if not self.started(): self.start(stage_name=video.get("youtube_id")) # Initiate the download process try: progress_callback = partial(self.download_progress_callback, video) # Don't try to download a file that already exists in the content dir - just say it was successful # and call it a day! if not os.path.exists(os.path.join(settings.CONTENT_ROOT, "{id}.mp4".format(id=video.get("youtube_id")))): retries = 0 while True: try: download_video(video.get("youtube_id"), callback=progress_callback) break except (socket.timeout, ConnectionError): retries += 1 msg = _( "Pausing download for '{title}', failed {failcnt} times, sleeping for 30s, retry number {retries}" ).format( title=video.get("title"), failcnt=DOWNLOAD_MAX_RETRIES, retries=retries, ) try: self.update_stage( stage_name=video.get("youtube_id"), stage_percent=0., notes=msg ) except AssertionError: # Raised by update_stage when the video # download job has ended raise DownloadCancelled() logger.info(msg) time.sleep(30) continue # If we got here, we downloaded ... somehow :) handled_youtube_ids.append(video.get("youtube_id")) # Remove from item from the queue video_queue.remove_file(video.get("youtube_id")) self.stdout.write(_("Download is complete!") + "\n") annotate_content_models_by_youtube_id(youtube_ids=[video.get("youtube_id")], language=video.get("language")) except DownloadCancelled: video_queue.clear() failed_youtube_ids.append(video.get("youtube_id")) break except (HTTPError, Exception) as e: # Rather than getting stuck on one video, # completely remove this item from the queue failed_youtube_ids.append(video.get("youtube_id")) video_queue.remove_file(video.get("youtube_id")) logger.exception(e) if getattr(e, "response", None): reason = _( "Got non-OK HTTP status: {status}" ).format( status=e.response.status_code ) else: reason = _( "Unhandled request exception: " "{exception}" ).format( exception=str(e), ) msg = _( "Skipping '{title}', reason: {reason}" ).format( title=video.get('title'), reason=reason, ) # Inform the user of this problem self.update_stage( stage_name=video.get("youtube_id"), stage_percent=0., notes=msg ) logger.info(msg) continue # Update self.complete(notes=_("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully.") % { "num_handled_videos": len(handled_youtube_ids), "num_total_videos": len(handled_youtube_ids) + len(failed_youtube_ids), }) except Exception as e: logger.exception(e) self.cancel(stage_status="error", notes=_("Error: %(error_msg)s") % {"error_msg": e}) raise