def exercise_dashboard(request): slug = request.GET.get("topic") if not slug: title = _("Your Knowledge Map") elif slug in topic_tools.get_node_cache("Topic"): title = _(topic_tools.get_node_cache("Topic")[slug][0]["title"]) else: raise Http404 context = { "title": title, } return context
def handle(self, *args, **options): if not options["lang_code"]: raise CommandError("You must specify a language code.") lang_code = lcode_to_ietf(options["lang_code"]) if lang_code not in AVAILABLE_EXERCISE_LANGUAGE_CODES: logging.info("No exercises available for language %s" % lang_code) else: # Get list of exercises exercise_ids = options["exercise_ids"].split( ",") if options["exercise_ids"] else None exercise_ids = exercise_ids or ([ ex["id"] for ex in get_topic_exercises(topic_id=options["topic_id"]) ] if options["topic_id"] else None) exercise_ids = exercise_ids or get_node_cache("Exercise").keys() # Download the exercises for exercise_id in exercise_ids: scrape_exercise(exercise_id=exercise_id, lang_code=lang_code, force=options["force"]) logging.info("Process complete.")
def update_all_distributed_callback(request): """ """ if request.method != "POST": raise PermissionDenied("Only POST allowed to this URL endpoint.") videos = json.loads(request.POST["video_logs"]) exercises = json.loads(request.POST["exercise_logs"]) user = FacilityUser.objects.get(id=request.POST["user_id"]) node_cache = get_node_cache() # Save videos n_videos_uploaded = 0 for video in videos: video_id = video['video_id'] youtube_id = video['youtube_id'] # Only save video logs for videos that we recognize. if video_id not in node_cache["Video"]: logging.warn("Skipping unknown video %s" % video_id) continue try: (vl, _) = VideoLog.get_or_initialize(user=user, video_id=video_id, youtube_id=youtube_id) for key,val in video.iteritems(): setattr(vl, key, val) logging.debug("Saving video log for %s: %s" % (video_id, vl)) vl.save() n_videos_uploaded += 1 except KeyError: # logging.error("Could not save video log for data with missing values: %s" % video) except Exception as e: error_message = _("Unexpected error importing videos: %(err_msg)s") % {"err_msg": e} return JsonResponseMessageError(error_message) # Save exercises n_exercises_uploaded = 0 for exercise in exercises: # Only save video logs for videos that we recognize. if exercise['exercise_id'] not in node_cache['Exercise']: logging.warn("Skipping unknown video %s" % exercise['exercise_id']) continue try: (el, _) = ExerciseLog.get_or_initialize(user=user, exercise_id=exercise["exercise_id"]) for key,val in exercise.iteritems(): setattr(el, key, val) logging.debug("Saving exercise log for %s: %s" % (exercise['exercise_id'], el)) el.save() n_exercises_uploaded += 1 except KeyError: logging.error("Could not save exercise log for data with missing values: %s" % exercise) except Exception as e: error_message = _("Unexpected error importing exercises: %(err_msg)s") % {"err_msg": e} return JsonResponseMessageError(error_message) return JsonResponseMessageSuccess(_("Uploaded %(num_exercises)d exercises and %(num_videos)d videos") % { "num_exercises": n_exercises_uploaded, "num_videos": n_videos_uploaded, })
def search( request, topics ): # we don't use the topics variable, but this setup will refresh the node cache # Inputs query = request.GET.get('query') category = request.GET.get('category') max_results_per_category = request.GET.get('max_results', 25) # Outputs query_error = None possible_matches = {} hit_max = {} if query is None: query_error = _("Error: query not specified.") # elif len(query) < 3: # query_error = _("Error: query too short.") else: query = query.lower() # search for topic, video or exercise with matching title nodes = [] for node_type, node_dict in topic_tools.get_node_cache().iteritems(): if category and node_type != category: # Skip categories that don't match (if specified) continue possible_matches[node_type] = [ ] # make dict only for non-skipped categories for nodearr in node_dict.values(): node = nodearr[0] title = _(node['title']).lower( ) # this could be done once and stored. if title == query: # Redirect to an exact match return HttpResponseRedirect(node['path']) elif len(possible_matches[node_type] ) < max_results_per_category and query in title: # For efficiency, don't do substring matches when we've got lots of results possible_matches[node_type].append(node) hit_max[node_type] = len( possible_matches[node_type]) == max_results_per_category return { 'title': _("Search results for '%(query)s'") % { "query": (query if query else "") }, 'query_error': query_error, 'results': possible_matches, 'hit_max': hit_max, 'query': query, 'max_results': max_results_per_category, 'category': category, }
def show_cache(self, force=False): """Go through each cacheable page, and show which are cached and which are NOT""" for node_type in ['Topic', 'Video', 'Exercise']: self.stdout.write("Cached %ss:\n" % node_type) for narr in topic_tools.get_node_cache(node_type).values(): for n in narr: if caching.has_cache_key(path=n["path"]): self.stdout.write("\t%s\n" % n["path"])
def test_topic_availability(self): for node_list in get_node_cache("Topic").values(): for topic in node_list: if "Exercise" in topic["contains"]: self.assertTrue(topic["available"], "Make sure all topics containing exercises are shown as available.") if topic["children"] and len(topic["contains"]) == 1 and "Video" in topic["contains"]: any_on_disk = bool(sum([v["on_disk"] for v in topic["children"]])) self.assertEqual(topic["available"], any_on_disk, "Make sure topic availability matches video availability when only videos are available.")
def setUp(self): """ Create a student, log the student in, and go to the exercise page. """ super(StudentExerciseTest, self).setUp() self.student = self.create_student(facility_name=self.facility_name) self.browser_login_student(self.student_username, self.student_password, facility_name=self.facility_name) self.browse_to(self.live_server_url + get_node_cache("Exercise")[self.EXERCISE_SLUG][0]["path"]) self.browser_check_django_message(num_messages=0) # make sure no messages
def search(request, topics): # we don't use the topics variable, but this setup will refresh the node cache # Inputs query = request.GET.get('query') category = request.GET.get('category') max_results_per_category = request.GET.get('max_results', 25) # Outputs query_error = None possible_matches = {} hit_max = {} if query is None: query_error = _("Error: query not specified.") # elif len(query) < 3: # query_error = _("Error: query too short.") else: query = query.lower() # search for topic, video or exercise with matching title nodes = [] for node_type, node_dict in topic_tools.get_node_cache().iteritems(): if category and node_type != category: # Skip categories that don't match (if specified) continue possible_matches[node_type] = [] # make dict only for non-skipped categories for nodearr in node_dict.values(): node = nodearr[0] title = _(node['title']).lower() # this could be done once and stored. if title == query: # Redirect to an exact match return HttpResponseRedirect(node['path']) elif len(possible_matches[node_type]) < max_results_per_category and query in title: # For efficiency, don't do substring matches when we've got lots of results possible_matches[node_type].append(node) hit_max[node_type] = len(possible_matches[node_type]) == max_results_per_category return { 'title': _("Search results for '%(query)s'") % {"query": (query if query else "")}, 'query_error': query_error, 'results': possible_matches, 'hit_max': hit_max, 'query': query, 'max_results': max_results_per_category, 'category': category, }
def setUp(self): """ Create a student, log the student in, and go to the exercise page. """ super(StudentExerciseTest, self).setUp() self.student = self.create_student(facility_name=self.facility_name) self.browser_login_student(self.student_username, self.student_password, facility_name=self.facility_name) self.browse_to( self.live_server_url + get_node_cache("Exercise")[self.EXERCISE_SLUG][0]["path"]) self.browser_check_django_message( num_messages=0) # make sure no messages
def handle(self, *args, **options): if settings.CENTRAL_SERVER: raise CommandError( "This must only be run on the distributed server.") if not options["lang_code"]: raise CommandError("You must specify a language code.") # ensure_dir(settings.CONTENT_ROOT) # Get list of videos lang_code = lcode_to_ietf(options["lang_code"]) video_map = get_dubbed_video_map(lang_code) or {} video_ids = options["video_ids"].split( ",") if options["video_ids"] else None video_ids = video_ids or ([ vid["id"] for vid in get_topic_videos(topic_id=options["topic_id"]) ] if options["topic_id"] else None) video_ids = video_ids or video_map.keys() # Download the videos for video_id in video_ids: if video_id in video_map: youtube_id = video_map[video_id] elif video_id in video_map.values(): # Perhaps they sent in a youtube ID? We can handle that! youtube_id = video_id else: logging.error("No mapping for video_id=%s; skipping" % video_id) continue try: scrape_video(youtube_id=youtube_id, format=options["format"], force=options["force"]) #scrape_thumbnail(youtube_id=youtube_id) logging.info( "Access video %s at %s" % (youtube_id, get_node_cache("Video")[video_id][0]["path"])) except Exception as e: logging.error("Failed to download video %s: %s" % (youtube_id, e)) logging.info("Process complete.")
def handle(self, *args, **options): if not options["lang_code"]: raise CommandError("You must specify a language code.") lang_code = lcode_to_ietf(options["lang_code"]) if lang_code not in AVAILABLE_EXERCISE_LANGUAGE_CODES: logging.info("No exercises available for language %s" % lang_code) else: # Get list of exercises exercise_ids = options["exercise_ids"].split(",") if options["exercise_ids"] else None exercise_ids = exercise_ids or ([ex["id"] for ex in get_topic_exercises(topic_id=options["topic_id"])] if options["topic_id"] else None) exercise_ids = exercise_ids or get_node_cache("Exercise").keys() # Download the exercises for exercise_id in exercise_ids: scrape_exercise(exercise_id=exercise_id, lang_code=lang_code, force=options["force"]) logging.info("Process complete.")
def handle(self, *args, **options): if settings.CENTRAL_SERVER: raise CommandError("This must only be run on the distributed server.") if not options["lang_code"]: raise CommandError("You must specify a language code.") # ensure_dir(settings.CONTENT_ROOT) # Get list of videos lang_code = lcode_to_ietf(options["lang_code"]) video_map = get_dubbed_video_map(lang_code) or {} video_ids = options["video_ids"].split(",") if options["video_ids"] else None video_ids = video_ids or ([vid["id"] for vid in get_topic_videos(topic_id=options["topic_id"])] if options["topic_id"] else None) video_ids = video_ids or video_map.keys() # Download the videos for video_id in video_ids: if video_id in video_map: youtube_id = video_map[video_id] elif video_id in video_map.values(): # Perhaps they sent in a youtube ID? We can handle that! youtube_id = video_id else: logging.error("No mapping for video_id=%s; skipping" % video_id) continue try: scrape_video(youtube_id=youtube_id, format=options["format"], force=options["force"]) #scrape_thumbnail(youtube_id=youtube_id) logging.info("Access video %s at %s" % (youtube_id, get_node_cache("Video")[video_id][0]["path"])) except Exception as e: logging.error("Failed to download video %s: %s" % (youtube_id, e)) logging.info("Process complete.")
def _setup(self, num_logs=50, **kwargs): super(OneHundredRandomLogUpdates, self)._setup(**kwargs) node_cache = get_node_cache() try: self.user = FacilityUser.objects.get(username=self.username) except: #take username from ExerciseLog all_exercises = ExerciseLog.objects.all() self.user = FacilityUser.objects.get(id=all_exercises[0].user_id) print self.username, " not in FacilityUsers, using ", self.user self.num_logs = num_logs #give the platform a chance to cache the logs ExerciseLog.objects.filter(user=self.user).delete() for x in range(num_logs): while True: ex_idx = int(self.random.random() * len(node_cache["Exercise"].keys())) ex_id = node_cache["Exercise"].keys()[ex_idx] if not ExerciseLog.objects.filter(user=self.user, exercise_id=ex_id): break ex = ExerciseLog(user=self.user, exercise_id=ex_id) ex.save() self.exercise_list = ExerciseLog.objects.filter(user=self.user) self.exercise_count = self.exercise_list.count() VideoLog.objects.filter(user=self.user).delete() for x in range(num_logs): while True: vid_idx = int(self.random.random() * len(node_cache["Video"].keys())) vid_id = node_cache["Video"].keys()[vid_idx] if not VideoLog.objects.filter(user=self.user, video_id=vid_id): break vid = VideoLog(user=self.user, video_id=vid_id) vid.save() self.video_list = VideoLog.objects.filter(user=self.user) self.video_count = self.video_list.count()
class StudentExerciseTest(KALiteDistributedWithFacilityBrowserTestCase): """ Test exercises. """ student_username = '******' student_password = '******' EXERCISE_SLUG = 'addition_1' MIN_POINTS = get_node_cache("Exercise")[EXERCISE_SLUG][0]["basepoints"] MAX_POINTS = 2 * MIN_POINTS def setUp(self): """ Create a student, log the student in, and go to the exercise page. """ super(StudentExerciseTest, self).setUp() self.student = self.create_student(facility_name=self.facility_name) self.browser_login_student(self.student_username, self.student_password, facility_name=self.facility_name) self.browse_to( self.live_server_url + get_node_cache("Exercise")[self.EXERCISE_SLUG][0]["path"]) self.browser_check_django_message( num_messages=0) # make sure no messages def browser_get_current_points(self): """ Check the total points a student has accumulated, from an exercise page. """ return self.browser.find_element_by_css_selector('#totalpoints').text def browser_submit_answer(self, answer): """ From an exercise page, insert an answer into the text box and submit. """ self.browser.find_element_by_id( 'solutionarea').find_element_by_css_selector( 'input[type=text]').click() self.browser_send_keys(unicode(answer)) self.browser_send_keys(Keys.RETURN) # Convert points to a number, when appropriate time.sleep(0.25) points = self.browser_get_current_points() return float(points) if isnumeric(points) else points @unittest.skipIf(settings.RUNNING_IN_TRAVIS, "I CAN'T TAKE THIS ANYMORE!") def test_question_correct_points_are_added(self): """ Answer an exercise correctly """ numbers = self.browser.find_elements_by_class_name('mn') answer = sum(int(num.text) for num in numbers) points = self.browser_submit_answer(answer) self.assertTrue( self.MIN_POINTS <= points <= self.MAX_POINTS, "point update is wrong: %s. Should be %s <= points <= %s" % (points, self.MIN_POINTS, self.MAX_POINTS)) self.browser_check_django_message( num_messages=0) # make sure no messages elog = ExerciseLog.objects.get(exercise_id=self.EXERCISE_SLUG, user=self.student) self.assertEqual(elog.streak_progress, 10, "Streak progress should be 10%") self.assertFalse(elog.struggling, "Student is not struggling.") self.assertEqual(elog.attempts, 1, "Student should have 1 attempt.") self.assertFalse(elog.complete, "Student should not have completed the exercise.") self.assertEqual( elog.attempts_before_completion, None, "Student should not have a value for attempts_before_completion.") @unittest.skipIf(settings.RUNNING_IN_TRAVIS, "I CAN'T TAKE THIS ANYMORE!") def test_question_incorrect_no_points_are_added(self): """ Answer an exercise incorrectly. """ points = self.browser_submit_answer('this is a wrong answer') self.assertEqual(points, "", "points text should be empty") self.browser_check_django_message( num_messages=0) # make sure no messages elog = ExerciseLog.objects.get(exercise_id=self.EXERCISE_SLUG, user=self.student) self.assertEqual(elog.streak_progress, 0, "Streak progress should be 0%") self.assertFalse(elog.struggling, "Student is not struggling.") self.assertEqual(elog.attempts, 1, "Student should have 1 attempt.") self.assertFalse(elog.complete, "Student should not have completed the exercise.") self.assertEqual( elog.attempts_before_completion, None, "Student should not have a value for attempts_before_completion.") @unittest.skipIf(settings.RUNNING_IN_TRAVIS, "I CAN'T TAKE THIS ANYMORE!") def test_exercise_mastery(self): """ Answer an exercise 10 times correctly; verify mastery message """ points = 0 nanswers = 10 for ai in range(1, 1 + nanswers): numbers = self.browser.find_elements_by_class_name('mn') answer = sum(int(num.text) for num in numbers) expected_min_points = points + self.MIN_POINTS expected_max_points = points + self.MAX_POINTS points = self.browser_submit_answer(answer) self.assertGreaterEqual( points, expected_min_points, "Too few points were given: %s < %s" % (points, expected_min_points)) self.assertLessEqual( points, expected_max_points, "Too many points were given: %s > %s" % (points, expected_max_points)) if ai < nanswers: self.browser_check_django_message( num_messages=0) # make sure no messages else: self.browser_check_django_message( message_type="success", contains="You have mastered this exercise!") self.browser_send_keys(Keys.RETURN) # move on to next question. # Now test the models elog = ExerciseLog.objects.get(exercise_id=self.EXERCISE_SLUG, user=self.student) self.assertEqual(elog.streak_progress, 100, "Streak progress should be 100%") self.assertFalse(elog.struggling, "Student is not struggling.") self.assertEqual(elog.attempts, nanswers, "Student should have 10 attempts.") self.assertTrue(elog.complete, "Student should have completed the exercise.") self.assertEqual(elog.attempts_before_completion, nanswers, "Student should have 10 attempts for completion.")
def api_data(request, xaxis="", yaxis=""): """Request contains information about what data are requested (who, what, and how). Response should be a JSON object * data contains the data, structred by user and then datatype * the rest of the data is metadata, useful for displaying detailed info about data. """ # Get the request form try: form = get_data_form(request, xaxis=xaxis, yaxis=yaxis) # (data=request.REQUEST) except Exception as e: # In investigating #1509: we can catch SQL errors here and communicate clearer error # messages with the user here. For now, we have no such error to catch, so just # pass the errors on to the user (via the @api_handle_error_with_json decorator). raise e # Query out the data: who? if form.data.get("user"): facility = [] groups = [] users = [get_object_or_404(FacilityUser, id=form.data.get("user"))] elif form.data.get("group"): facility = [] groups = [get_object_or_404(FacilityGroup, id=form.data.get("group"))] users = FacilityUser.objects.filter(group=form.data.get("group"), is_teacher=False).order_by("last_name", "first_name") elif form.data.get("facility"): facility = get_object_or_404(Facility, id=form.data.get("facility")) groups = FacilityGroup.objects.filter(facility__in=[form.data.get("facility")]) users = FacilityUser.objects.filter(facility__in=[form.data.get("facility")], is_teacher=False).order_by("last_name", "first_name") else: return HttpResponseNotFound(_("Did not specify facility, group, nor user.")) # Query out the data: where? if not form.data.get("topic_path"): return HttpResponseNotFound(_("Must specify a topic path")) # Query out the data: what? computed_data = compute_data(data_types=[form.data.get("xaxis"), form.data.get("yaxis")], who=users, where=form.data.get("topic_path")) # Quickly add back in exercise meta-data (could potentially be used in future for other data too!) ex_nodes = get_node_cache()["Exercise"] exercises = [] for e in computed_data["exercises"]: exercises.append({ "slug": e, "full_name": ex_nodes[e][0]["display_name"], "url": ex_nodes[e][0]["path"], }) json_data = { "data": computed_data["data"], "exercises": exercises, "videos": computed_data["videos"], "users": dict(zip([u.id for u in users], ["%s, %s" % (u.last_name, u.first_name) for u in users] )), "groups": dict(zip([g.id for g in groups], dict(zip(["id", "name"], [(g.id, g.name) for g in groups])), )), "facility": None if not facility else { "name": facility.name, "id": facility.id, } } if "facility_user" in request.session: try: # Log a "begin" and end here user = request.session["facility_user"] UserLog.begin_user_activity(user, activity_type="coachreport") UserLog.update_user_activity(user, activity_type="login") # to track active login time for teachers UserLog.end_user_activity(user, activity_type="coachreport") except ValidationError as e: # Never report this error; don't want this logging to block other functionality. logging.error("Failed to update Teacher userlog activity login: %s" % e) # Now we have data, stream it back with a handler for date-times return JsonResponse(json_data)
def compute_data(data_types, who, where): """ Compute the data in "data_types" for each user in "who", for the topics selected by "where" who: list of users where: topic_path data_types can include: pct_mastery effort attempts """ # None indicates that the data hasn't been queried yet. # We'll query it on demand, for efficiency topics = None exercises = None videos = None # Initialize an empty dictionary of data, video logs, exercise logs, for each user data = OrderedDict(zip([w.id for w in who], [dict() for i in range(len(who))])) # maintain the order of the users vid_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))])) ex_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))])) if UserLog.is_enabled(): activity_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))])) # Set up queries (but don't run them), so we have really easy aliases. # Only do them if they haven't been done yet (tell this by passing in a value to the lambda function) # Topics: topics. # Exercises: names (ids for ExerciseLog objects) # Videos: video_id (ids for VideoLog objects) # This lambda partial creates a function to return all items with a particular path from the NODE_CACHE. search_fun_single_path = partial(lambda t, p: t["path"].startswith(p), p=tuple(where)) # This lambda partial creates a function to return all items with paths matching a list of paths from NODE_CACHE. search_fun_multi_path = partial(lambda ts, p: any([t["path"].startswith(p) for t in ts]), p=tuple(where)) # Functions that use the functions defined above to return topics, exercises, and videos based on paths. query_topics = partial(lambda t, sf: t if t is not None else [t[0]["id"] for t in filter(sf, get_node_cache('Topic').values())], sf=search_fun_single_path) query_exercises = partial(lambda e, sf: e if e is not None else [ex[0]["id"] for ex in filter(sf, get_node_cache('Exercise').values())], sf=search_fun_multi_path) query_videos = partial(lambda v, sf: v if v is not None else [vid[0]["id"] for vid in filter(sf, get_node_cache('Video').values())], sf=search_fun_multi_path) # No users, don't bother. if len(who) > 0: # Query out all exercises, videos, exercise logs, and video logs before looping to limit requests. # This means we could pull data for n-dimensional coach report displays with the same number of requests! # Note: User activity is polled inside the loop, to prevent possible slowdown for exercise and video reports. exercises = query_exercises(exercises) videos = query_videos(videos) if exercises: ex_logs = query_logs(data.keys(), exercises, "exercise", ex_logs) if videos: vid_logs = query_logs(data.keys(), videos, "video", vid_logs) for data_type in (data_types if not hasattr(data_types, "lower") else [data_types]): # convert list from string, if necessary if data_type in data[data.keys()[0]]: # if the first user has it, then all do; no need to calc again. continue # # These are summary stats: you only get one per user # if data_type == "pct_mastery": # Efficient query out, spread out to dict for user in data.keys(): data[user][data_type] = 0 if not ex_logs[user] else 100. * sum([el['complete'] for el in ex_logs[user]]) / float(len(exercises)) elif data_type == "effort": if "ex:attempts" in data[data.keys()[0]] and "vid:total_seconds_watched" in data[data.keys()[0]]: # exercises and videos would be initialized already for user in data.keys(): avg_attempts = 0 if len(exercises) == 0 else sum(data[user]["ex:attempts"].values()) / float(len(exercises)) avg_video_points = 0 if len(videos) == 0 else sum(data[user]["vid:total_seconds_watched"].values()) / float(len(videos)) data[user][data_type] = 100. * (0.5 * avg_attempts / 10. + 0.5 * avg_video_points / 750.) else: data_types += ["ex:attempts", "vid:total_seconds_watched", "effort"] # # These are detail stats: you get many per user # # Just querying out data directly: Video elif data_type.startswith("vid:") and data_type[4:] in [f.name for f in VideoLog._meta.fields]: for user in data.keys(): data[user][data_type] = OrderedDict([(v['video_id'], v[data_type[4:]]) for v in vid_logs[user]]) # Just querying out data directly: Exercise elif data_type.startswith("ex:") and data_type[3:] in [f.name for f in ExerciseLog._meta.fields]: for user in data.keys(): data[user][data_type] = OrderedDict([(el['exercise_id'], el[data_type[3:]]) for el in ex_logs[user]]) # User Log Queries elif data_type.startswith("user:"******"", "activity", activity_logs) for user in data.keys(): data[user][data_type] = [log[data_type[5:]] for log in activity_logs[user]] # User Summary Queries elif data_type.startswith("usersum:") and data_type[8:] in [f.name for f in UserLogSummary._meta.fields] and UserLog.is_enabled(): activity_logs = query_logs(data.keys(), "", "summaryactivity", activity_logs) for user in data.keys(): data[user][data_type] = sum([log[data_type[8:]] for log in activity_logs[user]]) # Unknown requested quantity else: raise Exception("Unknown type: '%s' not in %s" % (data_type, str([f.name for f in ExerciseLog._meta.fields]))) # Returning empty list instead of None allows javascript on client # side to read 'length' property without error. exercises = exercises or [] videos = videos or [] return { "data": data, "topics": topics, "exercises": exercises, "videos": videos, }
def tabular_view(request, facility, report_type="exercise"): """Tabular view also gets data server-side.""" # Define how students are ordered--used to be as efficient as possible. student_ordering = ["last_name", "first_name", "username"] # Get a list of topics (sorted) and groups topics = [ get_node_cache("Topic").get(tid) for tid in get_knowledgemap_topics() ] (groups, facilities) = get_accessible_objects_from_logged_in_user( request, facility=facility) context = plotting_metadata_context(request, facility=facility) context.update({ # For translators: the following two translations are nouns "report_types": (_("exercise"), _("video")), "request_report_type": report_type, "topics": [{ "id": t[0]["id"], "title": t[0]["title"] } for t in topics if t], }) # get querystring info topic_id = request.GET.get("topic", "") # No valid data; just show generic if not topic_id or not re.match("^[\w\-]+$", topic_id): return context group_id = request.GET.get("group", "") if group_id: # Narrow by group users = FacilityUser.objects.filter( group=group_id, is_teacher=False).order_by(*student_ordering) elif facility: # Narrow by facility search_groups = [ groups_dict["groups"] for groups_dict in groups if groups_dict["facility"] == facility.id ] assert len(search_groups) <= 1, "Should only have one or zero matches." # Return groups and ungrouped search_groups = search_groups[ 0] # make sure to include ungrouped students users = FacilityUser.objects.filter( Q(group__in=search_groups) | Q(group=None, facility=facility), is_teacher=False).order_by(*student_ordering) else: # Show all (including ungrouped) for groups_dict in groups: search_groups += groups_dict["groups"] users = FacilityUser.objects.filter( Q(group__in=search_groups) | Q(group=None), is_teacher=False).order_by(*student_ordering) # We have enough data to render over a group of students # Get type-specific information if report_type == "exercise": # Fill in exercises exercises = get_topic_exercises(topic_id=topic_id) exercises = sorted(exercises, key=lambda e: (e["h_position"], e["v_position"])) context["exercises"] = exercises # More code, but much faster exercise_names = [ex["name"] for ex in context["exercises"]] # Get students context["students"] = [] exlogs = ExerciseLog.objects \ .filter(user__in=users, exercise_id__in=exercise_names) \ .order_by(*["user__%s" % field for field in student_ordering]) \ .values("user__id", "struggling", "complete", "exercise_id") exlogs = list(exlogs) # force the query to be evaluated exlog_idx = 0 for user in users: log_table = {} while exlog_idx < len( exlogs) and exlogs[exlog_idx]["user__id"] == user.id: log_table[exlogs[exlog_idx]["exercise_id"]] = exlogs[exlog_idx] exlog_idx += 1 context["students"].append({ # this could be DRYer "first_name": user.first_name, "last_name": user.last_name, "username": user.username, "name": user.get_name(), "id": user.id, "exercise_logs": log_table, }) elif report_type == "video": # Fill in videos context["videos"] = get_topic_videos(topic_id=topic_id) # More code, but much faster video_ids = [vid["id"] for vid in context["videos"]] # Get students context["students"] = [] vidlogs = VideoLog.objects \ .filter(user__in=users, video_id__in=video_ids) \ .order_by(*["user__%s" % field for field in student_ordering])\ .values("user__id", "complete", "video_id", "total_seconds_watched", "points") vidlogs = list(vidlogs) # force the query to be executed now vidlog_idx = 0 for user in users: log_table = {} while vidlog_idx < len( vidlogs) and vidlogs[vidlog_idx]["user__id"] == user.id: log_table[vidlogs[vidlog_idx] ["video_id"]] = vidlogs[vidlog_idx] vidlog_idx += 1 context["students"].append({ # this could be DRYer "first_name": user.first_name, "last_name": user.last_name, "username": user.username, "name": user.get_name(), "id": user.id, "video_logs": log_table, }) else: raise Http404( _("Unknown report_type: %(report_type)s") % {"report_type": report_type}) if "facility_user" in request.session: try: # Log a "begin" and end here user = request.session["facility_user"] UserLog.begin_user_activity(user, activity_type="coachreport") UserLog.update_user_activity( user, activity_type="login" ) # to track active login time for teachers UserLog.end_user_activity(user, activity_type="coachreport") except ValidationError as e: # Never report this error; don't want this logging to block other functionality. logging.error( "Failed to update Teacher userlog activity login: %s" % e) return context
def student_view_context(request, xaxis="pct_mastery", yaxis="ex:attempts"): """ Context done separately, to be importable for similar pages. """ user = get_user_from_request(request=request) if not user: raise Http404("User not found.") node_cache = get_node_cache() topic_ids = get_knowledgemap_topics() topic_ids += [ ch["id"] for node in get_topic_tree()["children"] for ch in node["children"] if node["id"] != "math" ] topics = [node_cache["Topic"][id][0] for id in topic_ids] user_id = user.id exercise_logs = list(ExerciseLog.objects \ .filter(user=user) \ .values("exercise_id", "complete", "points", "attempts", "streak_progress", "struggling", "completion_timestamp")) video_logs = list(VideoLog.objects \ .filter(user=user) \ .values("video_id", "complete", "total_seconds_watched", "points", "completion_timestamp")) exercise_sparklines = dict() stats = dict() topic_exercises = dict() topic_videos = dict() exercises_by_topic = dict() videos_by_topic = dict() # Categorize every exercise log into a "midlevel" exercise for elog in exercise_logs: if not elog["exercise_id"] in node_cache["Exercise"]: # Sometimes KA updates their topic tree and eliminates exercises; # we also want to support 3rd party switching of trees arbitrarily. logging.debug("Skip unknown exercise log for %s/%s" % (user_id, elog["exercise_id"])) continue parent_ids = [ topic for ex in node_cache["Exercise"][elog["exercise_id"]] for topic in ex["ancestor_ids"] ] topic = set(parent_ids).intersection(set(topic_ids)) if not topic: logging.error( "Could not find a topic for exercise %s (parents=%s)" % (elog["exercise_id"], parent_ids)) continue topic = topic.pop() if not topic in topic_exercises: topic_exercises[topic] = get_topic_exercises( path=node_cache["Topic"][topic][0]["path"]) exercises_by_topic[topic] = exercises_by_topic.get(topic, []) + [elog] # Categorize every video log into a "midlevel" exercise. for vlog in video_logs: if not vlog["video_id"] in node_cache["Video"]: # Sometimes KA updates their topic tree and eliminates videos; # we also want to support 3rd party switching of trees arbitrarily. logging.debug("Skip unknown video log for %s/%s" % (user_id, vlog["video_id"])) continue parent_ids = [ topic for vid in node_cache["Video"][vlog["video_id"]] for topic in vid["ancestor_ids"] ] topic = set(parent_ids).intersection(set(topic_ids)) if not topic: logging.error("Could not find a topic for video %s (parents=%s)" % (vlog["video_id"], parent_ids)) continue topic = topic.pop() if not topic in topic_videos: topic_videos[topic] = get_topic_videos( path=node_cache["Topic"][topic][0]["path"]) videos_by_topic[topic] = videos_by_topic.get(topic, []) + [vlog] # Now compute stats for id in topic_ids: #set(topic_exercises.keys()).union(set(topic_videos.keys())): n_exercises = len(topic_exercises.get(id, [])) n_videos = len(topic_videos.get(id, [])) exercises = exercises_by_topic.get(id, []) videos = videos_by_topic.get(id, []) n_exercises_touched = len(exercises) n_videos_touched = len(videos) exercise_sparklines[id] = [ el["completion_timestamp"] for el in filter(lambda n: n["complete"], exercises) ] # total streak currently a pct, but expressed in max 100; convert to # proportion (like other percentages here) stats[id] = { "ex:pct_mastery": 0 if not n_exercises_touched else sum([el["complete"] for el in exercises]) / float(n_exercises), "ex:pct_started": 0 if not n_exercises_touched else n_exercises_touched / float(n_exercises), "ex:average_points": 0 if not n_exercises_touched else sum([el["points"] for el in exercises]) / float(n_exercises_touched), "ex:average_attempts": 0 if not n_exercises_touched else sum([el["attempts"] for el in exercises]) / float(n_exercises_touched), "ex:average_streak": 0 if not n_exercises_touched else sum([el["streak_progress"] for el in exercises]) / float(n_exercises_touched) / 100., "ex:total_struggling": 0 if not n_exercises_touched else sum( [el["struggling"] for el in exercises]), "ex:last_completed": None if not n_exercises_touched else max_none( [el["completion_timestamp"] or None for el in exercises]), "vid:pct_started": 0 if not n_videos_touched else n_videos_touched / float(n_videos), "vid:pct_completed": 0 if not n_videos_touched else sum([vl["complete"] for vl in videos]) / float(n_videos), "vid:total_minutes": 0 if not n_videos_touched else sum([vl["total_seconds_watched"] for vl in videos]) / 60., "vid:average_points": 0. if not n_videos_touched else float( sum([vl["points"] for vl in videos]) / float(n_videos_touched)), "vid:last_completed": None if not n_videos_touched else max_none( [vl["completion_timestamp"] or None for vl in videos]), } context = plotting_metadata_context(request) return { "form": context["form"], "groups": context["groups"], "facilities": context["facilities"], "student": user, "topics": topics, "exercises": topic_exercises, "exercise_logs": exercises_by_topic, "video_logs": videos_by_topic, "exercise_sparklines": exercise_sparklines, "no_data": not exercise_logs and not video_logs, "stats": stats, "stat_defs": [ # this order determines the order of display { "key": "ex:pct_mastery", "title": _("% Mastery"), "type": "pct" }, { "key": "ex:pct_started", "title": _("% Started"), "type": "pct" }, { "key": "ex:average_points", "title": _("Average Points"), "type": "float" }, { "key": "ex:average_attempts", "title": _("Average Attempts"), "type": "float" }, { "key": "ex:average_streak", "title": _("Average Streak"), "type": "pct" }, { "key": "ex:total_struggling", "title": _("Struggling"), "type": "int" }, { "key": "ex:last_completed", "title": _("Last Completed"), "type": "date" }, { "key": "vid:pct_completed", "title": _("% Completed"), "type": "pct" }, { "key": "vid:pct_started", "title": _("% Started"), "type": "pct" }, { "key": "vid:total_minutes", "title": _("Average Minutes Watched"), "type": "float" }, { "key": "vid:average_points", "title": _("Average Points"), "type": "float" }, { "key": "vid:last_completed", "title": _("Last Completed"), "type": "date" }, ] }
def generate_dubbed_video_mappings(csv_data=None): # This CSV file is in standard format: separated by ",", quoted by '"' logging.info("Parsing csv file.") reader = csv.reader(StringIO(csv_data)) # Build a two-level video map. # First key: language name # Second key: english youtube ID # Value: corresponding youtube ID in the new language. video_map = {} row_num = -1 try: # Loop through each row in the spreadsheet. while (True): row_num += 1 row = reader.next() if row_num < 4: # Rows 1-4 are crap. continue elif row_num == 4: # Row 5 is the header row. header_row = [ v.lower() for v in row ] # lcase all header row values (including language names) slug_idx = header_row.index("title id") english_idx = header_row.index("english") assert slug_idx != -1, "Video slug column header should be found." assert english_idx != -1, "English video column header should be found." else: # Rows 6 and beyond are data. assert len(row) == len( header_row ), "Values line length equals headers line length" # Grab the slug and english video ID. video_slug = row[slug_idx] english_video_id = row[english_idx] assert english_video_id, "English Video ID should not be empty" assert video_slug, "Slug should not be empty" # English video is the first video ID column, # and following columns (until the end) are other languages. # Loop through those columns and, if a video exists, # add it to the dictionary. for idx in range(english_idx, len(row)): if not row[idx]: # make sure there's a dubbed video continue lang = header_row[idx] if lang not in video_map: # add the first level if it doesn't exist video_map[lang] = {} dubbed_youtube_id = row[idx] if english_video_id == dubbed_youtube_id and lang != "english": logging.error( "Removing entry for (%s, %s): dubbed and english youtube ID are the same." % (lang, english_video_id)) #elif dubbed_youtube_id in video_map[lang].values(): # Talked to Bilal, and this is actually supposed to be OK. Would throw us for a loop! # For now, just keep one. #for key in video_map[lang].keys(): # if video_map[lang][key] == dubbed_youtube_id: # del video_map[lang][key] # break #logging.error("Removing entry for (%s, %s): the same dubbed video ID is used in two places, and we can only keep one in our current system." % (lang, english_video_id)) else: video_map[lang][english_video_id] = row[ idx] # add the corresponding video id for the video, in this language. except StopIteration: # The loop ends when the CSV file hits the end and throws a StopIteration pass # Now, validate the mappings with our topic data known_videos = get_node_cache("Video").keys() missing_videos = set(known_videos) - set(video_map["english"].keys()) extra_videos = set(video_map["english"].keys()) - set(known_videos) if missing_videos: logging.warn( "There are %d known videos not in the list of dubbed videos" % len(missing_videos)) logging.warn( "Adding missing English videos to English dubbed video map") for video in missing_videos: video_map["english"][video] = video if extra_videos: logging.warn( "There are %d videos in the list of dubbed videos that we have never heard of." % len(extra_videos)) return video_map
def _get_user_usage_data(users, groups=None, period_start=None, period_end=None, group_id=None): """ Returns facility user data, within the given date range. """ groups = groups or set([user.group for user in users]) # compute period start and end # Now compute stats, based on queried data num_exercises = len(get_node_cache('Exercise')) user_data = OrderedDict() group_data = OrderedDict() # Make queries efficiently exercise_logs = ExerciseLog.objects.filter(user__in=users, complete=True) video_logs = VideoLog.objects.filter(user__in=users) login_logs = UserLogSummary.objects.filter(user__in=users) # filter results if period_start: exercise_logs = exercise_logs.filter(completion_timestamp__gte=period_start) video_logs = video_logs.filter(completion_timestamp__gte=period_start) login_logs = login_logs.filter(start_datetime__gte=period_start) if period_end: exercise_logs = exercise_logs.filter(completion_timestamp__lte=period_end) video_logs = video_logs.filter(completion_timestamp__lte=period_end) login_logs = login_logs.filter(end_datetime__lte=period_end) # Force results in a single query exercise_logs = list(exercise_logs.values("exercise_id", "user__pk")) video_logs = list(video_logs.values("video_id", "user__pk")) login_logs = list(login_logs.values("activity_type", "total_seconds", "user__pk")) for user in users: user_data[user.pk] = OrderedDict() user_data[user.pk]["id"] = user.pk user_data[user.pk]["first_name"] = user.first_name user_data[user.pk]["last_name"] = user.last_name user_data[user.pk]["username"] = user.username user_data[user.pk]["group"] = user.group user_data[user.pk]["total_report_views"] = 0#report_stats["count__sum"] or 0 user_data[user.pk]["total_logins"] =0# login_stats["count__sum"] or 0 user_data[user.pk]["total_hours"] = 0#login_stats["total_seconds__sum"] or 0)/3600. user_data[user.pk]["total_exercises"] = 0 user_data[user.pk]["pct_mastery"] = 0. user_data[user.pk]["exercises_mastered"] = [] user_data[user.pk]["total_videos"] = 0 user_data[user.pk]["videos_watched"] = [] for elog in exercise_logs: user_data[elog["user__pk"]]["total_exercises"] += 1 user_data[elog["user__pk"]]["pct_mastery"] += 1. / num_exercises user_data[elog["user__pk"]]["exercises_mastered"].append(elog["exercise_id"]) for vlog in video_logs: user_data[vlog["user__pk"]]["total_videos"] += 1 user_data[vlog["user__pk"]]["videos_watched"].append(vlog["video_id"]) for llog in login_logs: if llog["activity_type"] == UserLog.get_activity_int("coachreport"): user_data[llog["user__pk"]]["total_report_views"] += 1 elif llog["activity_type"] == UserLog.get_activity_int("login"): user_data[llog["user__pk"]]["total_hours"] += (llog["total_seconds"]) / 3600. user_data[llog["user__pk"]]["total_logins"] += 1 for group in list(groups) + [None]*(group_id==None or _(group_id)==_("Ungrouped")): # None for ungrouped, if no group_id passed. group_pk = getattr(group, "pk", None) group_name = getattr(group, "name", _("Ungrouped")) group_data[group_pk] = { "id": group_pk, "name": group_name, "total_logins": 0, "total_hours": 0, "total_users": 0, "total_videos": 0, "total_exercises": 0, "pct_mastery": 0, } # Add group data. Allow a fake group "Ungrouped" for user in users: group_pk = getattr(user.group, "pk", None) group_data[group_pk]["total_users"] += 1 group_data[group_pk]["total_logins"] += user_data[user.pk]["total_logins"] group_data[group_pk]["total_hours"] += user_data[user.pk]["total_hours"] group_data[group_pk]["total_videos"] += user_data[user.pk]["total_videos"] group_data[group_pk]["total_exercises"] += user_data[user.pk]["total_exercises"] total_mastery_so_far = (group_data[group_pk]["pct_mastery"] * (group_data[group_pk]["total_users"] - 1) + user_data[user.pk]["pct_mastery"]) group_data[group_pk]["pct_mastery"] = total_mastery_so_far / group_data[group_pk]["total_users"] if len(group_data) == 1 and group_data.has_key(None): if not group_data[None]["total_users"]: del group_data[None] return (user_data, group_data)
def student_view_context(request, xaxis="pct_mastery", yaxis="ex:attempts"): """ Context done separately, to be importable for similar pages. """ user = get_user_from_request(request=request) if not user: raise Http404("User not found.") node_cache = get_node_cache() topic_ids = get_knowledgemap_topics() topic_ids += [ch["id"] for node in get_topic_tree()["children"] for ch in node["children"] if node["id"] != "math"] topics = [node_cache["Topic"][id][0] for id in topic_ids] user_id = user.id exercise_logs = list( ExerciseLog.objects.filter(user=user).values( "exercise_id", "complete", "points", "attempts", "streak_progress", "struggling", "completion_timestamp" ) ) video_logs = list( VideoLog.objects.filter(user=user).values( "video_id", "complete", "total_seconds_watched", "points", "completion_timestamp" ) ) exercise_sparklines = dict() stats = dict() topic_exercises = dict() topic_videos = dict() exercises_by_topic = dict() videos_by_topic = dict() # Categorize every exercise log into a "midlevel" exercise for elog in exercise_logs: if not elog["exercise_id"] in node_cache["Exercise"]: # Sometimes KA updates their topic tree and eliminates exercises; # we also want to support 3rd party switching of trees arbitrarily. logging.debug("Skip unknown exercise log for %s/%s" % (user_id, elog["exercise_id"])) continue parent_ids = [topic for ex in node_cache["Exercise"][elog["exercise_id"]] for topic in ex["ancestor_ids"]] topic = set(parent_ids).intersection(set(topic_ids)) if not topic: logging.error("Could not find a topic for exercise %s (parents=%s)" % (elog["exercise_id"], parent_ids)) continue topic = topic.pop() if not topic in topic_exercises: topic_exercises[topic] = get_topic_exercises(path=node_cache["Topic"][topic][0]["path"]) exercises_by_topic[topic] = exercises_by_topic.get(topic, []) + [elog] # Categorize every video log into a "midlevel" exercise. for vlog in video_logs: if not vlog["video_id"] in node_cache["Video"]: # Sometimes KA updates their topic tree and eliminates videos; # we also want to support 3rd party switching of trees arbitrarily. logging.debug("Skip unknown video log for %s/%s" % (user_id, vlog["video_id"])) continue parent_ids = [topic for vid in node_cache["Video"][vlog["video_id"]] for topic in vid["ancestor_ids"]] topic = set(parent_ids).intersection(set(topic_ids)) if not topic: logging.error("Could not find a topic for video %s (parents=%s)" % (vlog["video_id"], parent_ids)) continue topic = topic.pop() if not topic in topic_videos: topic_videos[topic] = get_topic_videos(path=node_cache["Topic"][topic][0]["path"]) videos_by_topic[topic] = videos_by_topic.get(topic, []) + [vlog] # Now compute stats for id in topic_ids: # set(topic_exercises.keys()).union(set(topic_videos.keys())): n_exercises = len(topic_exercises.get(id, [])) n_videos = len(topic_videos.get(id, [])) exercises = exercises_by_topic.get(id, []) videos = videos_by_topic.get(id, []) n_exercises_touched = len(exercises) n_videos_touched = len(videos) exercise_sparklines[id] = [el["completion_timestamp"] for el in filter(lambda n: n["complete"], exercises)] # total streak currently a pct, but expressed in max 100; convert to # proportion (like other percentages here) stats[id] = { "ex:pct_mastery": 0 if not n_exercises_touched else sum([el["complete"] for el in exercises]) / float(n_exercises), "ex:pct_started": 0 if not n_exercises_touched else n_exercises_touched / float(n_exercises), "ex:average_points": 0 if not n_exercises_touched else sum([el["points"] for el in exercises]) / float(n_exercises_touched), "ex:average_attempts": 0 if not n_exercises_touched else sum([el["attempts"] for el in exercises]) / float(n_exercises_touched), "ex:average_streak": 0 if not n_exercises_touched else sum([el["streak_progress"] for el in exercises]) / float(n_exercises_touched) / 100.0, "ex:total_struggling": 0 if not n_exercises_touched else sum([el["struggling"] for el in exercises]), "ex:last_completed": None if not n_exercises_touched else max_none([el["completion_timestamp"] or None for el in exercises]), "vid:pct_started": 0 if not n_videos_touched else n_videos_touched / float(n_videos), "vid:pct_completed": 0 if not n_videos_touched else sum([vl["complete"] for vl in videos]) / float(n_videos), "vid:total_minutes": 0 if not n_videos_touched else sum([vl["total_seconds_watched"] for vl in videos]) / 60.0, "vid:average_points": 0.0 if not n_videos_touched else float(sum([vl["points"] for vl in videos]) / float(n_videos_touched)), "vid:last_completed": None if not n_videos_touched else max_none([vl["completion_timestamp"] or None for vl in videos]), } context = plotting_metadata_context(request) return { "form": context["form"], "groups": context["groups"], "facilities": context["facilities"], "student": user, "topics": topics, "exercises": topic_exercises, "exercise_logs": exercises_by_topic, "video_logs": videos_by_topic, "exercise_sparklines": exercise_sparklines, "no_data": not exercise_logs and not video_logs, "stats": stats, "stat_defs": [ # this order determines the order of display {"key": "ex:pct_mastery", "title": _("% Mastery"), "type": "pct"}, {"key": "ex:pct_started", "title": _("% Started"), "type": "pct"}, {"key": "ex:average_points", "title": _("Average Points"), "type": "float"}, {"key": "ex:average_attempts", "title": _("Average Attempts"), "type": "float"}, {"key": "ex:average_streak", "title": _("Average Streak"), "type": "pct"}, {"key": "ex:total_struggling", "title": _("Struggling"), "type": "int"}, {"key": "ex:last_completed", "title": _("Last Completed"), "type": "date"}, {"key": "vid:pct_completed", "title": _("% Completed"), "type": "pct"}, {"key": "vid:pct_started", "title": _("% Started"), "type": "pct"}, {"key": "vid:total_minutes", "title": _("Average Minutes Watched"), "type": "float"}, {"key": "vid:average_points", "title": _("Average Points"), "type": "float"}, {"key": "vid:last_completed", "title": _("Last Completed"), "type": "date"}, ], }
def update_all_distributed_callback(request): """ """ if request.method != "POST": raise PermissionDenied("Only POST allowed to this URL endpoint.") videos = json.loads(request.POST["video_logs"]) exercises = json.loads(request.POST["exercise_logs"]) user = FacilityUser.objects.get(id=request.POST["user_id"]) node_cache = get_node_cache() # Save videos n_videos_uploaded = 0 for video in videos: video_id = video['video_id'] youtube_id = video['youtube_id'] # Only save video logs for videos that we recognize. if video_id not in node_cache["Video"]: logging.warn("Skipping unknown video %s" % video_id) continue try: (vl, _) = VideoLog.get_or_initialize(user=user, video_id=video_id, youtube_id=youtube_id) for key, val in video.iteritems(): setattr(vl, key, val) logging.debug("Saving video log for %s: %s" % (video_id, vl)) vl.save() n_videos_uploaded += 1 except KeyError: # logging.error( "Could not save video log for data with missing values: %s" % video) except Exception as e: error_message = _( "Unexpected error importing videos: %(err_msg)s") % { "err_msg": e } return JsonResponseMessageError(error_message) # Save exercises n_exercises_uploaded = 0 for exercise in exercises: # Only save video logs for videos that we recognize. if exercise['exercise_id'] not in node_cache['Exercise']: logging.warn("Skipping unknown video %s" % exercise['exercise_id']) continue try: (el, _) = ExerciseLog.get_or_initialize( user=user, exercise_id=exercise["exercise_id"]) for key, val in exercise.iteritems(): setattr(el, key, val) logging.debug("Saving exercise log for %s: %s" % (exercise['exercise_id'], el)) el.save() n_exercises_uploaded += 1 except KeyError: logging.error( "Could not save exercise log for data with missing values: %s" % exercise) except Exception as e: error_message = _( "Unexpected error importing exercises: %(err_msg)s") % { "err_msg": e } return JsonResponseMessageError(error_message) return JsonResponseMessageSuccess( _("Uploaded %(num_exercises)d exercises and %(num_videos)d videos") % { "num_exercises": n_exercises_uploaded, "num_videos": n_videos_uploaded, })
def test_video_availability(self): nvids_local = sum([node_list[0]["on_disk"] for node_list in get_node_cache("Video").values()]) self.assertEqual(self.n_videos, nvids_local, "# videos actually on disk should match # videos in topic tree")
def _get_user_usage_data(users, groups=None, period_start=None, period_end=None, group_id=None): """ Returns facility user data, within the given date range. """ groups = groups or set([user.group for user in users]) # compute period start and end # Now compute stats, based on queried data num_exercises = len(get_node_cache('Exercise')) user_data = OrderedDict() group_data = OrderedDict() # Make queries efficiently exercise_logs = ExerciseLog.objects.filter(user__in=users, complete=True) video_logs = VideoLog.objects.filter(user__in=users) login_logs = UserLogSummary.objects.filter(user__in=users) # filter results if period_start: exercise_logs = exercise_logs.filter( completion_timestamp__gte=period_start) video_logs = video_logs.filter(completion_timestamp__gte=period_start) login_logs = login_logs.filter(start_datetime__gte=period_start) if period_end: exercise_logs = exercise_logs.filter( completion_timestamp__lte=period_end) video_logs = video_logs.filter(completion_timestamp__lte=period_end) login_logs = login_logs.filter(end_datetime__lte=period_end) # Force results in a single query exercise_logs = list(exercise_logs.values("exercise_id", "user__pk")) video_logs = list(video_logs.values("video_id", "user__pk")) login_logs = list( login_logs.values("activity_type", "total_seconds", "user__pk")) for user in users: user_data[user.pk] = OrderedDict() user_data[user.pk]["id"] = user.pk user_data[user.pk]["first_name"] = user.first_name user_data[user.pk]["last_name"] = user.last_name user_data[user.pk]["username"] = user.username user_data[user.pk]["group"] = user.group user_data[user.pk][ "total_report_views"] = 0 #report_stats["count__sum"] or 0 user_data[ user.pk]["total_logins"] = 0 # login_stats["count__sum"] or 0 user_data[user.pk][ "total_hours"] = 0 #login_stats["total_seconds__sum"] or 0)/3600. user_data[user.pk]["total_exercises"] = 0 user_data[user.pk]["pct_mastery"] = 0. user_data[user.pk]["exercises_mastered"] = [] user_data[user.pk]["total_videos"] = 0 user_data[user.pk]["videos_watched"] = [] for elog in exercise_logs: user_data[elog["user__pk"]]["total_exercises"] += 1 user_data[elog["user__pk"]]["pct_mastery"] += 1. / num_exercises user_data[elog["user__pk"]]["exercises_mastered"].append( elog["exercise_id"]) for vlog in video_logs: user_data[vlog["user__pk"]]["total_videos"] += 1 user_data[vlog["user__pk"]]["videos_watched"].append(vlog["video_id"]) for llog in login_logs: if llog["activity_type"] == UserLog.get_activity_int("coachreport"): user_data[llog["user__pk"]]["total_report_views"] += 1 elif llog["activity_type"] == UserLog.get_activity_int("login"): user_data[llog["user__pk"]]["total_hours"] += ( llog["total_seconds"]) / 3600. user_data[llog["user__pk"]]["total_logins"] += 1 for group in list(groups) + [None] * (group_id == None or _(group_id) == _( "Ungrouped")): # None for ungrouped, if no group_id passed. group_pk = getattr(group, "pk", None) group_name = getattr(group, "name", _("Ungrouped")) group_data[group_pk] = { "id": group_pk, "name": group_name, "total_logins": 0, "total_hours": 0, "total_users": 0, "total_videos": 0, "total_exercises": 0, "pct_mastery": 0, } # Add group data. Allow a fake group "Ungrouped" for user in users: group_pk = getattr(user.group, "pk", None) group_data[group_pk]["total_users"] += 1 group_data[group_pk]["total_logins"] += user_data[ user.pk]["total_logins"] group_data[group_pk]["total_hours"] += user_data[ user.pk]["total_hours"] group_data[group_pk]["total_videos"] += user_data[ user.pk]["total_videos"] group_data[group_pk]["total_exercises"] += user_data[ user.pk]["total_exercises"] total_mastery_so_far = (group_data[group_pk]["pct_mastery"] * (group_data[group_pk]["total_users"] - 1) + user_data[user.pk]["pct_mastery"]) group_data[group_pk][ "pct_mastery"] = total_mastery_so_far / group_data[group_pk][ "total_users"] if len(group_data) == 1 and group_data.has_key(None): if not group_data[None]["total_users"]: del group_data[None] return (user_data, group_data)
def create_cache(self, force=False): for node_type in ['Topic', 'Video', 'Exercise']: self.stdout.write("Caching %ss:\n" % node_type) for narr in topic_tools.get_node_cache(node_type).values(): for n in narr: self.create_page_cache(path=n["path"], force=force)
def tabular_view(request, facility, report_type="exercise"): """Tabular view also gets data server-side.""" # Define how students are ordered--used to be as efficient as possible. student_ordering = ["last_name", "first_name", "username"] # Get a list of topics (sorted) and groups topics = [get_node_cache("Topic").get(tid) for tid in get_knowledgemap_topics()] (groups, facilities) = get_accessible_objects_from_logged_in_user(request, facility=facility) context = plotting_metadata_context(request, facility=facility) context.update( { # For translators: the following two translations are nouns "report_types": (_("exercise"), _("video")), "request_report_type": report_type, "topics": [{"id": t[0]["id"], "title": t[0]["title"]} for t in topics if t], } ) # get querystring info topic_id = request.GET.get("topic", "") # No valid data; just show generic if not topic_id or not re.match("^[\w\-]+$", topic_id): return context group_id = request.GET.get("group", "") if group_id: # Narrow by group users = FacilityUser.objects.filter(group=group_id, is_teacher=False).order_by(*student_ordering) elif facility: # Narrow by facility search_groups = [groups_dict["groups"] for groups_dict in groups if groups_dict["facility"] == facility.id] assert len(search_groups) <= 1, "Should only have one or zero matches." # Return groups and ungrouped search_groups = search_groups[0] # make sure to include ungrouped students users = FacilityUser.objects.filter( Q(group__in=search_groups) | Q(group=None, facility=facility), is_teacher=False ).order_by(*student_ordering) else: # Show all (including ungrouped) for groups_dict in groups: search_groups += groups_dict["groups"] users = FacilityUser.objects.filter(Q(group__in=search_groups) | Q(group=None), is_teacher=False).order_by( *student_ordering ) # We have enough data to render over a group of students # Get type-specific information if report_type == "exercise": # Fill in exercises exercises = get_topic_exercises(topic_id=topic_id) exercises = sorted(exercises, key=lambda e: (e["h_position"], e["v_position"])) context["exercises"] = exercises # More code, but much faster exercise_names = [ex["name"] for ex in context["exercises"]] # Get students context["students"] = [] exlogs = ( ExerciseLog.objects.filter(user__in=users, exercise_id__in=exercise_names) .order_by(*["user__%s" % field for field in student_ordering]) .values("user__id", "struggling", "complete", "exercise_id") ) exlogs = list(exlogs) # force the query to be evaluated exlog_idx = 0 for user in users: log_table = {} while exlog_idx < len(exlogs) and exlogs[exlog_idx]["user__id"] == user.id: log_table[exlogs[exlog_idx]["exercise_id"]] = exlogs[exlog_idx] exlog_idx += 1 context["students"].append( { # this could be DRYer "first_name": user.first_name, "last_name": user.last_name, "username": user.username, "name": user.get_name(), "id": user.id, "exercise_logs": log_table, } ) elif report_type == "video": # Fill in videos context["videos"] = get_topic_videos(topic_id=topic_id) # More code, but much faster video_ids = [vid["id"] for vid in context["videos"]] # Get students context["students"] = [] vidlogs = ( VideoLog.objects.filter(user__in=users, video_id__in=video_ids) .order_by(*["user__%s" % field for field in student_ordering]) .values("user__id", "complete", "video_id", "total_seconds_watched", "points") ) vidlogs = list(vidlogs) # force the query to be executed now vidlog_idx = 0 for user in users: log_table = {} while vidlog_idx < len(vidlogs) and vidlogs[vidlog_idx]["user__id"] == user.id: log_table[vidlogs[vidlog_idx]["video_id"]] = vidlogs[vidlog_idx] vidlog_idx += 1 context["students"].append( { # this could be DRYer "first_name": user.first_name, "last_name": user.last_name, "username": user.username, "name": user.get_name(), "id": user.id, "video_logs": log_table, } ) else: raise Http404(_("Unknown report_type: %(report_type)s") % {"report_type": report_type}) if "facility_user" in request.session: try: # Log a "begin" and end here user = request.session["facility_user"] UserLog.begin_user_activity(user, activity_type="coachreport") UserLog.update_user_activity(user, activity_type="login") # to track active login time for teachers UserLog.end_user_activity(user, activity_type="coachreport") except ValidationError as e: # Never report this error; don't want this logging to block other functionality. logging.error("Failed to update Teacher userlog activity login: %s" % e) return context
def generate_dubbed_video_mappings(csv_data=None): # This CSV file is in standard format: separated by ",", quoted by '"' logging.info("Parsing csv file.") reader = csv.reader(StringIO(csv_data)) # Build a two-level video map. # First key: language name # Second key: english youtube ID # Value: corresponding youtube ID in the new language. video_map = {} row_num = -1 try: # Loop through each row in the spreadsheet. while (True): row_num += 1 row = reader.next() if row_num < 4: # Rows 1-4 are crap. continue elif row_num == 4: # Row 5 is the header row. header_row = [v.lower() for v in row] # lcase all header row values (including language names) slug_idx = header_row.index("title id") english_idx = header_row.index("english") assert slug_idx != -1, "Video slug column header should be found." assert english_idx != -1, "English video column header should be found." else: # Rows 6 and beyond are data. assert len(row) == len(header_row), "Values line length equals headers line length" # Grab the slug and english video ID. video_slug = row[slug_idx] english_video_id = row[english_idx] assert english_video_id, "English Video ID should not be empty" assert video_slug, "Slug should not be empty" # English video is the first video ID column, # and following columns (until the end) are other languages. # Loop through those columns and, if a video exists, # add it to the dictionary. for idx in range(english_idx, len(row)): if not row[idx]: # make sure there's a dubbed video continue lang = header_row[idx] if lang not in video_map: # add the first level if it doesn't exist video_map[lang] = {} dubbed_youtube_id = row[idx] if english_video_id == dubbed_youtube_id and lang != "english": logging.error("Removing entry for (%s, %s): dubbed and english youtube ID are the same." % (lang, english_video_id)) #elif dubbed_youtube_id in video_map[lang].values(): # Talked to Bilal, and this is actually supposed to be OK. Would throw us for a loop! # For now, just keep one. #for key in video_map[lang].keys(): # if video_map[lang][key] == dubbed_youtube_id: # del video_map[lang][key] # break #logging.error("Removing entry for (%s, %s): the same dubbed video ID is used in two places, and we can only keep one in our current system." % (lang, english_video_id)) else: video_map[lang][english_video_id] = row[idx] # add the corresponding video id for the video, in this language. except StopIteration: # The loop ends when the CSV file hits the end and throws a StopIteration pass # Now, validate the mappings with our topic data known_videos = get_node_cache("Video").keys() missing_videos = set(known_videos) - set(video_map["english"].keys()) extra_videos = set(video_map["english"].keys()) - set(known_videos) if missing_videos: logging.warn("There are %d known videos not in the list of dubbed videos" % len(missing_videos)) logging.warn("Adding missing English videos to English dubbed video map") for video in missing_videos: video_map["english"][video] = video if extra_videos: logging.warn("There are %d videos in the list of dubbed videos that we have never heard of." % len(extra_videos)) return video_map
def update_all_central_callback(request): """ Callback after authentication. Parses out the request token verification. Then finishes the request by getting an auth token. """ if not "ACCESS_TOKEN" in request.session: finish_auth(request) exercises = get_api_resource(request, "/api/v1/user/exercises") videos = get_api_resource(request, "/api/v1/user/videos") node_cache = get_node_cache() # Collate videos video_logs = [] for video in videos: # Assume that KA videos are all english-language, not dubbed (for now) video_id = youtube_id = video.get('video', {}).get('youtube_id', "") # Only save videos with progress if not video.get('seconds_watched', None): continue # Only save video logs for videos that we recognize. if video_id not in node_cache["Video"]: logging.warn("Skipping unknown video %s" % video_id) continue try: video_logs.append({ "video_id": video_id, "youtube_id": youtube_id, "total_seconds_watched": video['seconds_watched'], "points": VideoLog.calc_points(video['seconds_watched'], video['duration']), "complete": video['completed'], "completion_timestamp": convert_ka_date(video['last_watched']) if video['completed'] else None, }) logging.debug("Got video log for %s: %s" % (video_id, video_logs[-1])) except KeyError: # logging.error("Could not save video log for data with missing values: %s" % video) # Collate exercises exercise_logs = [] for exercise in exercises: # Only save exercises that have any progress. if not exercise.get('last_done', None): continue # Only save video logs for videos that we recognize. slug = exercise.get('exercise', "") if slug not in node_cache['Exercise']: logging.warn("Skipping unknown video %s" % slug) continue try: completed = exercise['streak'] >= 10 basepoints = node_cache['Exercise'][slug][0]['basepoints'] exercise_logs.append({ "exercise_id": slug, "streak_progress": min(100, 100 * exercise['streak']/10), # duplicates logic elsewhere "attempts": exercise['total_done'], "points": ExerciseLog.calc_points(basepoints, ncorrect=exercise['streak'], add_randomness=False), # no randomness when importing from KA "complete": completed, "attempts_before_completion": exercise['total_done'] if not exercise['practiced'] else None, #can't figure this out if they practiced after mastery. "completion_timestamp": convert_ka_date(exercise['proficient_date']) if completed else None, }) logging.debug("Got exercise log for %s: %s" % (slug, exercise_logs[-1])) except KeyError: logging.error("Could not save exercise log for data with missing values: %s" % exercise) # POST the data back to the distributed server try: dthandler = lambda obj: obj.isoformat() if isinstance(obj, datetime.datetime) else None logging.debug("POST'ing to %s" % request.session["distributed_callback_url"]) response = requests.post( request.session["distributed_callback_url"], cookies={ "csrftoken": request.session["distributed_csrf_token"] }, data = { "csrfmiddlewaretoken": request.session["distributed_csrf_token"], "video_logs": json.dumps(video_logs, default=dthandler), "exercise_logs": json.dumps(exercise_logs, default=dthandler), "user_id": request.session["distributed_user_id"], } ) logging.debug("Response (%d): %s" % (response.status_code, response.content)) except requests.exceptions.ConnectionError as e: return HttpResponseRedirect(set_query_params(request.session["distributed_redirect_url"], { "message_type": "error", "message": _("Could not connect to your KA Lite installation to share Khan Academy data."), "message_id": "id_khanload", })) except Exception as e: return HttpResponseRedirect(set_query_params(request.session["distributed_redirect_url"], { "message_type": "error", "message": _("Failure to send data to your KA Lite installation: %s") % e, "message_id": "id_khanload", })) try: json_response = json.loads(response.content) if not isinstance(json_response, dict) or len(json_response) != 1: # Could not validate the message is a single key-value pair raise Exception(_("Unexpected response format from your KA Lite installation.")) message_type = json_response.keys()[0] message = json_response.values()[0] except ValueError as e: message_type = "error" message = unicode(e) except Exception as e: message_type = "error" message = _("Loading json object: %s") % e # If something broke on the distribute d server, we are SCREWED. # For now, just show the error to users. # # Ultimately, we have a message, would like to share with the distributed server. # if response.status_code != 200: # return HttpResponseServerError(response.content) return HttpResponseRedirect(set_query_params(request.session["distributed_redirect_url"], { "message_type": message_type, "message": message, "message_id": "id_khanload", }))
class CachingTest(KALiteTestCase): video_cache = get_node_cache("Video") @unittest.skipIf( True, "Failing test that I'm tired of debugging." ) # TODO(bcipolli): re-enable when we need to be able to auto-cache @unittest.skipIf(settings.CACHE_TIME == 0, "Test only relevant when caching is enabled") def test_cache_invalidation(self): """Create the cache item, then invalidate it and show that it is deleted.""" # Get a random video id n_videos = len(self.video_cache) video_id = self.video_cache.keys()[ 10] #random.choice(self.video_cache.keys()) logging.debug("Testing on video_id = %s" % video_id) video_path = self.video_cache[video_id][0]['path'] # Clean the cache for this item caching.expire_page(path=video_path, failure_ok=True) # Create the cache item, and check it self.assertFalse(caching.has_cache_key(path=video_path), "expect: no cache key after expiring the page") caching.regenerate_all_pages_related_to_videos(video_ids=[video_id]) self.assertTrue(caching.has_cache_key(path=video_path), "expect: Cache key exists after Django Client get") # Invalidate the cache item, and check it caching.invalidate_all_caches() # test the convenience function self.assertTrue(not caching.has_cache_key(path=video_path), "expect: no cache key after expiring the page") @unittest.skipIf(settings.CACHE_TIME == 0, "Test only relevant when caching is enabled") def test_cache_across_clients(self): """Show that caching is accessible across all clients (i.e. that different clients don't generate different cache keys)""" # Get a random video id n_videos = len(self.video_cache) video_id = random.choice(self.video_cache.keys()) logging.debug("Testing on video_id = %s" % video_id) video_path = self.video_cache[video_id][0]['path'] # Clean the cache for this item caching.expire_page(path=video_path, failure_ok=True) self.assertTrue(not caching.has_cache_key(path=video_path), "expect: No cache key after expiring the page") # Set up the cache with Django client Client().get(video_path) self.assertTrue(caching.has_cache_key(path=video_path), "expect: Cache key exists after Django Client get") caching.expire_page(path=video_path) # clean cache self.assertTrue(not caching.has_cache_key(path=video_path), "expect: No cache key after expiring the page") # Get the same cache key when getting with urllib, and make sure the cache is created again urllib.urlopen(self.live_server_url + video_path).close() self.assertTrue(caching.has_cache_key(path=video_path), "expect: Cache key exists after urllib get") caching.expire_page(path=video_path) # clean cache self.assertTrue(not caching.has_cache_key(path=video_path), "expect: No cache key after expiring the page") # Same deal, now using requests library requests.get(self.live_server_url + video_path) self.assertTrue(caching.has_cache_key(path=video_path), "expect: Cache key exists after requestsget") caching.expire_page(path=video_path) # clean cache self.assertTrue(not caching.has_cache_key(path=video_path), "expect: No cache key after expiring the page")