def update(self, feedback): orig_video = feedback.video() if orig_video == None or type(orig_video).__name__ != "Video": return False readable_id = orig_video.readable_id query = Video.all() query.filter('readable_id =', readable_id) # The database currently contains multiple Video objects for a # particular video. Some are old. Some are due to a YouTube sync # where the youtube urls changed and our code was producing youtube_ids # that ended with '_player'. This hack gets the most recent valid # Video object. key_id = 0 for v in query: if v.key().id() > key_id and not v.youtube_id.endswith('_player'): video = v key_id = v.key().id() # End of hack if video is not None and video.key() != orig_video.key(): logging.info("Retargeting Feedback %s from Video %s to Video %s", feedback.key().id(), orig_video.key().id(), video.key().id()) feedback.targets[0] = video.key() return True else: return False
def get(self, readable_id=""): # This method displays a video in the context of a particular topic. # To do that we first need to find the appropriate topic. If we aren't # given the topic title in a query param, we need to find a topic that # the video is a part of. That requires finding the video, given it # readable_id or, to support old URLs, it's youtube_id. video = None video_id = self.request.get('v') topic_id = self.request_string('topic', default="") readable_id = urllib.unquote(readable_id) # remove any trailing dashes (see issue 1140) readable_id = re.sub('-+$', '', readable_id) # If either the readable_id or topic title is missing, # redirect to the canonical URL that contains them if video_id: # Support for old links query = Video.all() query.filter('youtube_id =', video_id) video = query.get() if not video: raise MissingVideoException( "Missing video w/ youtube id '%s'" % video_id) readable_id = video.readable_id topic = video.first_topic() if not topic: raise MissingVideoException( "No topic has video w/ youtube id '%s'" % video_id) ViewVideo.show_video(self, readable_id, topic_id, True)
def test_derive_key_name_from_video(self): self._set_responses_xrange(BATCH_SIZE) _task_handler('UUID') videos = Video.all().fetch(BATCH_SIZE) for v in videos: key = VideoSubtitles.get_key_name('en', v.youtube_id) subs = VideoSubtitles.get_by_key_name(key) self.assertIsNotNone(subs)
def update_video(self, video_youtube_id): v = Video.all().filter('youtube_id =', video_youtube_id).get() if v is not None: if v.key() not in self.videos: self.videos.append(v.key()) else: logging.info("Youtube ID %s not in datastore" % video_youtube_id) return
def test_process_next_batch_on_nonempty_cursor(self): offset = 3 # these should be skipped, they'll DownloadError for i in xrange(0, offset): Video(youtube_id=str(i)).put() # these should be downloaded self._set_responses_xrange(offset, BATCH_SIZE + offset) query = Video.all() query.fetch(offset) cursor = query.cursor() _task_handler('UUID', cursor=cursor) self.assertEqual(VideoSubtitles.all().count(), BATCH_SIZE)
def youtube_get_video_data_dict(youtube_id): yt_service = gdata.youtube.service.YouTubeService() # Now that we run these queries from the App Engine servers, we need to # explicitly specify our developer_key to avoid being lumped together w/ rest of GAE and # throttled by YouTube's "Too many request" quota yt_service.developer_key = "AI39si6ctKTnSR_Vx7o7GpkpeSZAKa6xjbZz6WySzTvKVYRDAO7NHBVwofphk82oP-OSUwIZd0pOJyNuWK8bbOlqzJc9OFozrQ" yt_service.client_id = "n/a" logging.info("trying to get info for youtube_id: %s" % youtube_id) try: video = yt_service.GetYouTubeVideoEntry(video_id=youtube_id) except: video = None if video: video_data = {"youtube_id" : youtube_id, "title" : video.media.title.text.decode('utf-8'), "url" : video.media.player.url.decode('utf-8'), "duration" : int(video.media.duration.seconds)} if video.statistics: video_data["views"] = int(video.statistics.view_count) video_data["description"] = (video.media.description.text or '').decode('utf-8') video_data["keywords"] = (video.media.keywords.text or '').decode('utf-8') potential_id = re.sub('[^a-z0-9]', '-', video_data["title"].lower()); potential_id = re.sub('-+$', '', potential_id) # remove any trailing dashes (see issue 1140) potential_id = re.sub('^-+', '', potential_id) # remove any leading dashes (see issue 1526) number_to_add = 0 current_id = potential_id while True: query = Video.all() query.filter('readable_id=', current_id) if (query.get() is None): #id is unique so use it and break out video_data["readable_id"] = current_id break else: # id is not unique so will have to go through loop again number_to_add+=1 current_id = potential_id+'-'+number_to_add return video_data return None
def youtube_get_video_data_dict(youtube_id): yt_service = third_party.gdata.youtube.service.YouTubeService() # Now that we run these queries from the App Engine servers, we need to # explicitly specify our developer_key to avoid being lumped together w/ rest of GAE and # throttled by YouTube's "Too many request" quota yt_service.developer_key = "AI39si6ctKTnSR_Vx7o7GpkpeSZAKa6xjbZz6WySzTvKVYRDAO7NHBVwofphk82oP-OSUwIZd0pOJyNuWK8bbOlqzJc9OFozrQ" yt_service.client_id = "n/a" logging.info("trying to get info for youtube_id: %s" % youtube_id) try: video = yt_service.GetYouTubeVideoEntry(video_id=youtube_id) except: video = None if video: video_data = {"youtube_id" : youtube_id, "title" : video.media.title.text.decode('utf-8'), "url" : video.media.player.url.decode('utf-8'), "duration" : int(video.media.duration.seconds)} if video.statistics: video_data["views"] = int(video.statistics.view_count) video_data["description"] = (video.media.description.text or '').decode('utf-8') video_data["keywords"] = (video.media.keywords.text or '').decode('utf-8') potential_id = re.sub('[^a-z0-9]', '-', video_data["title"].lower()); potential_id = re.sub('-+$', '', potential_id) # remove any trailing dashes (see issue 1140) potential_id = re.sub('^-+', '', potential_id) # remove any leading dashes (see issue 1526) number_to_add = 0 current_id = potential_id while True: query = Video.all() query.filter('readable_id=', current_id) if (query.get() is None): #id is unique so use it and break out video_data["readable_id"] = current_id break else: # id is not unique so will have to go through loop again number_to_add+=1 current_id = potential_id+'-'+number_to_add return video_data return None
def updateVideoStats(self): yt_service = third_party.gdata.youtube.service.YouTubeService() # Now that we run these queries from the App Engine servers, we need to # explicitly specify our developer_key to avoid being lumped together w/ rest of GAE and # throttled by YouTube's "Too many request" quota yt_service.developer_key = "AI39si6ctKTnSR_Vx7o7GpkpeSZAKa6xjbZz6WySzTvKVYRDAO7NHBVwofphk82oP-OSUwIZd0pOJyNuWK8bbOlqzJc9OFozrQ" yt_service.client_id = "n/a" videos_to_put = set() # doing fetch now otherwise query timesout later while doing youtube requests # theoretically we can also change this code to use Mapper class: # http://code.google.com/appengine/articles/deferred.html for i, video in enumerate(Video.all().fetch(100000)): entry = None youtube_id = video.youtube_id # truncating youtubeid at 11 to handle _DUP_X's # handling the _DUPs to make it easier to detect content problems when duration = 0 if re.search("_DUP_\d*$", youtube_id): youtube_id = youtube_id[0:11] try: entry = yt_service.GetYouTubeVideoEntry(video_id=youtube_id) except Exception, e: logging.info("Error trying to get %s: %s" % (youtube_id, e)) if entry: count = int(entry.statistics.view_count) if count != video.views: logging.info("%i: Updating %s from %i to %i views" % (i, video.title, video.views, count)) video.views = count videos_to_put.add(video) duration = int(entry.media.duration.seconds) if duration != video.duration: video.duration = duration videos_to_put.add(video)
def updateVideoStats(self): yt_service = gdata.youtube.service.YouTubeService() # Now that we run these queries from the App Engine servers, we need to # explicitly specify our developer_key to avoid being lumped together w/ rest of GAE and # throttled by YouTube's "Too many request" quota yt_service.developer_key = "AI39si6ctKTnSR_Vx7o7GpkpeSZAKa6xjbZz6WySzTvKVYRDAO7NHBVwofphk82oP-OSUwIZd0pOJyNuWK8bbOlqzJc9OFozrQ" yt_service.client_id = "n/a" videos_to_put = set() # doing fetch now otherwise query timesout later while doing youtube requests # theoretically we can also change this code to use Mapper class: # http://code.google.com/appengine/articles/deferred.html for i, video in enumerate(Video.all().fetch(100000)): entry = None youtube_id = video.youtube_id # truncating youtubeid at 11 to handle _DUP_X's # handling the _DUPs to make it easier to detect content problems when duration = 0 if re.search("_DUP_\d*$", youtube_id): youtube_id = youtube_id[0:11] try: entry = yt_service.GetYouTubeVideoEntry(video_id=youtube_id) except Exception, e: logging.info("Error trying to get %s: %s" % (youtube_id, e)) if entry: count = int(entry.statistics.view_count) if count != video.views: logging.info("%i: Updating %s from %i to %i views" % (i, video.title, video.views, count)) video.views = count videos_to_put.add(video) duration = int(entry.media.duration.seconds) if duration != video.duration: video.duration = duration videos_to_put.add(video)
def _task_handler(uid, task_id=0, cursor=None, report=None): """Task chain for fetching subtitles from the Universal Subtitles API It processes Video models in batches of BATCH_SIZE by fetching the English subtitles via an HTTP API call. This job runs regularly so fetch failures are fixed from run-to-run. Fetch failures are logged and suppressed as the task marches on. Errors include URL fetch timeouts, subtitles put failures, and response decoding failures. HTTP redirects indicate that the code needs updating to a new API endpoint. They are detected and reported separately. """ query = Video.all() query.with_cursor(cursor) videos = query.fetch(BATCH_SIZE) if report is None: report = dict(REPORT_TEMPLATE) VideoSubtitlesFetchReport(key_name=uid, **report).put() report = download_subtitles(videos, report) # Generate a report if there is nothing left to process if len(videos) < BATCH_SIZE: deferred.defer(_task_report_handler, uid, report, _name='%s_report' % uid, _queue=TASK_QUEUE) else: next_id = task_id + 1 cursor = query.cursor() deferred.defer(_task_handler, uid, next_id, cursor, report, _name='%s_%s' % (uid, next_id), _queue=TASK_QUEUE, _countdown=DEFER_SECONDS)
def get(self): from exercises import attempt_problem login_user = UserData.current() exercises_list = [exercise for exercise in Exercise.all()] videos_list = [video for video in Video.all()] user_count = self.request_int('users', 5) for user_id in xrange(0, user_count): # Create a new user first_name = random.choice(CreateRandomGoalData.first_names) last_name = random.choice(CreateRandomGoalData.last_names) nickname = "%s %s" % (first_name, last_name) email = 'test_%i@automatedrandomdata' % user_id user = users.User(email) logging.info("Creating user %s: (%i/%i)" % (nickname, user_id + 1, user_count)) user_data = UserData.get_or_insert( key_name="test_user_%i" % user_id, user=user, current_user=user, user_id=str(user_id), moderator=False, last_login=datetime.now(), proficient_exercises=[], suggested_exercises=[], need_to_reassess=True, points=0, coaches=[login_user.user_email], user_email=email, user_nickname=nickname, ) user_data.put() # Delete user exercise & video progress query = UserExercise.all() query.filter('user = '******'user = '******'type': 'GoalObjectiveExerciseProficiency', 'exercise': random.choice(exercises_list)}) for objective in xrange(1, random.randint(2, 4)): obj_descriptors.append({ 'type': 'GoalObjectiveWatchVideo', 'video': random.choice(videos_list)}) title = first_name + "'s Goal #" + str(goal_idx) logging.info("Creating goal " + title) objectives = GoalObjective.from_descriptors(obj_descriptors, user_data) goal = Goal(parent=user_data, title=title, objectives=objectives) user_data.save_goal(goal) for objective in obj_descriptors: if objective['type'] == 'GoalObjectiveExerciseProficiency': user_exercise = user_data.get_or_insert_exercise( objective['exercise']) chooser = random.randint(1, 120) if chooser >= 60: continue elif chooser > 15: count = 1 hints = 0 elif chooser < 7: count = 20 hints = 0 else: count = 25 hints = 1 logging.info( "Starting exercise: %s (%i problems, %i hints)" % (objective['exercise'].name, count, hints * count)) for i in xrange(1, count): attempt_problem( user_data, user_exercise, i, 1, 'TEST', 'TEST', 'TEST', True, hints, 0, False, False, "TEST", '0.0.0.0') elif objective['type'] == 'GoalObjectiveWatchVideo': seconds = random.randint(1, 1200) logging.info("Watching %i seconds of video %s" % (seconds, objective['video'].title)) VideoLog.add_entry(user_data, objective['video'], seconds, 0, detect_cheat=False) self.response.out.write('OK')
def get(self): self.response.out.write('<html>') videos = Video.all() for video in videos: self.response.out.write('<P>Title: ' + video.title)
def get(self): from exercises import attempt_problem login_user = UserData.current() exercises_list = [exercise for exercise in Exercise.all()] videos_list = [video for video in Video.all()] user_count = self.request_int('users', 5) for user_id in xrange(0, user_count): # Create a new user first_name = random.choice(CreateRandomGoalData.first_names) last_name = random.choice(CreateRandomGoalData.last_names) nickname = "%s %s" % (first_name, last_name) email = 'test_%i@automatedrandomdata' % user_id user = users.User(email) logging.info("Creating user %s: (%i/%i)" % (nickname, user_id + 1, user_count)) user_data = UserData.get_or_insert( key_name="test_user_%i" % user_id, user=user, current_user=user, user_id=str(user_id), moderator=False, last_login=datetime.now(), proficient_exercises=[], suggested_exercises=[], need_to_reassess=True, points=0, coaches=[login_user.user_email], user_email=email, user_nickname=nickname, ) user_data.put() # Delete user exercise & video progress query = UserExercise.all() query.filter('user = '******'user = '******'type': 'GoalObjectiveExerciseProficiency', 'exercise': random.choice(exercises_list) }) for objective in xrange(1, random.randint(2, 4)): obj_descriptors.append({ 'type': 'GoalObjectiveWatchVideo', 'video': random.choice(videos_list) }) title = first_name + "'s Goal #" + str(goal_idx) logging.info("Creating goal " + title) objectives = GoalObjective.from_descriptors( obj_descriptors, user_data) goal = Goal(parent=user_data, title=title, objectives=objectives) user_data.save_goal(goal) for objective in obj_descriptors: if objective['type'] == 'GoalObjectiveExerciseProficiency': user_exercise = user_data.get_or_insert_exercise( objective['exercise']) chooser = random.randint(1, 120) if chooser >= 60: continue elif chooser > 15: count = 1 hints = 0 elif chooser < 7: count = 20 hints = 0 else: count = 25 hints = 1 logging.info( "Starting exercise: %s (%i problems, %i hints)" % (objective['exercise'].name, count, hints * count)) for i in xrange(1, count): attempt_problem(user_data, user_exercise, i, 1, 'TEST', 'TEST', 'TEST', True, hints, 0, False, False, "TEST", '0.0.0.0') elif objective['type'] == 'GoalObjectiveWatchVideo': seconds = random.randint(1, 1200) logging.info("Watching %i seconds of video %s" % (seconds, objective['video'].title)) VideoLog.add_entry(user_data, objective['video'], seconds, 0, detect_cheat=False) self.response.out.write('OK')
def get(self): return db.delete(Video.all(keys_only=True))
def _task_handler(uid, task_id=0, cursor=None, report=None): """Task chain for fetching subtitles from the Universal Subtitles API It processes Video models in batches of BATCH_SIZE by fetching the English subtitles via an HTTP API call. This job runs regularly so fetch failures are fixed from run-to-run. Fetch failures are logged and suppressed as the task marches on. Errors include URL fetch timeouts, subtitles put failures, and response decoding failures. HTTP redirects indicate that the code needs updating to a new API endpoint. They are detected and reported separately. """ query = Video.all() query.with_cursor(cursor) videos = query.fetch(BATCH_SIZE) if report is None: report = dict(REPORT_TEMPLATE) VideoSubtitlesFetchReport(key_name=uid, **report).put() # Asynchronously fetch. We'll rate-limit by fetching BATCH_SIZE subtitles # at each DEFER_SECONDS interval rpcs = [] for video in videos: url = UNISUBS_URL % urllib.quote(YOUTUBE_URL % video.youtube_id) rpc = urlfetch.create_rpc(deadline=TIMEOUT_SECONDS) urlfetch.make_fetch_call(rpc, url) rpcs.append((video.youtube_id, rpc)) report['fetches'] += 1 # Process asynchronous fetches for youtube_id, rpc in rpcs: lang = 'en' key_name = VideoSubtitles.get_key_name(lang, youtube_id) try: resp = rpc.get_result() if resp.status_code != 200: raise RuntimeError('status code: %s' % resp.status_code) if resp.final_url: logging.warn('%s redirect to %s' % (key_name, resp.final_url)) report['redirects'] += 1 json = resp.content.decode('utf-8') # Only update stale records current = VideoSubtitles.get_by_key_name(key_name) if not current or current.json != json: new = VideoSubtitles(key_name=key_name, youtube_id=youtube_id, language=lang, json=json) new.put() report['writes'] += 1 else: logging.info('%s content already up-to-date' % key_name) except Exception, e: logging.error('%s subtitles fetch failed: %s' % (key_name, e)) report['errors'] += 1