def initialize_content_caches(force=False): """ Catch all function to regenerate any content caches in memory that need annotation with file availability """ for lang in i18n.get_installed_language_packs(force=True).keys(): logging.info("Preloading exercise data for language {lang}.".format(lang=lang)) topic_tools.get_exercise_cache(force=force, language=lang) logging.info("Preloading content data for language {lang}.".format(lang=lang)) topic_tools.get_content_cache(force=force, annotate=True, language=lang) logging.info("Preloading topic tree data for language {lang}.".format(lang=lang)) topic_tools.get_topic_tree(force=force, annotate=True, language=lang)
def create_some_learner_data(): """ Just create a lil' bit-o-data of each type, to populate the table. """ user = CreateStudentMixin.create_student() attempt_states = ( # (name, streak_progress, attempt_count) ("not started", 0, 0), ("completed", 100, 15), ("attempted", 50, 10), ("struggling", 30, 25), ) exercises = random.sample(get_exercise_cache().keys(), len(attempt_states)) # Important they are *distinct* for state in attempt_states: exercise = exercises.pop() log, created = ExerciseLog.objects.get_or_create(exercise_id=exercise, user=user) if "not started" != state[0]: log.streak_progress, log.attempts = state[1:] for i in range(0, log.attempts): AttemptLog.objects.get_or_create( exercise_id=exercise, user=user, seed=i, timestamp=datetime.datetime.now() ) log.latest_activity_timestamp = datetime.datetime.now() log.save()
def before_scenario(context, scenario): base_before_scenario(context, scenario) if "with_progress" in context.tags: user = FacilityUser.objects.get(username=context.user, facility=getattr( context, "facility", None)) exercises = random.sample(get_exercise_cache().keys(), 2) for exercise in exercises: log = ExerciseLog( exercise_id=exercise, user=user, streak_progress=50, attempts=15, latest_activity_timestamp=datetime.datetime.now()) log.save() context.exercises = exercises videos = random.sample(get_content_cache().keys(), 2) for video in videos: log = VideoLog(youtube_id=video, video_id=video, user=user, total_seconds_watched=100, points=600, latest_activity_timestamp=datetime.datetime.now()) log.save() context.videos = videos
def obj_get_list(self, bundle, **kwargs): # self.permission_check(bundle.request) exercise_logs = self.get_object_list(bundle.request) pre_user = None filtered_logs = [] exercises_info = [] for e in exercise_logs: if e.user == pre_user: pass else: pre_user = e.user attempts = exercise_logs.filter(user=e.user).aggregate( Sum("attempts"))["attempts__sum"] mastered = exercise_logs.filter(user=e.user, complete=True).count() exercises_info = exercise_logs.filter(user=e.user).values( 'exercise_id', 'attempts', 'struggling') for i in exercises_info: i["exercise_url"] = get_exercise_cache().get( i['exercise_id']).get("path") user_dic = { "user_name": e.user.get_name(), "total_attempts": attempts, "mastered": mastered, "exercises": list(exercises_info) } filtered_logs.append(e) self.user_info.append(user_dic) self.user_info.reverse() return filtered_logs
def before_feature(context, feature): base_before_feature(context, feature) if "with_progress" in feature.tags: user = FacilityUser.objects.get(username=context.user, facility=getattr(context, "facility", None)) exercises = random.sample(get_exercise_cache().keys(), 2) for exercise in exercises: log = ExerciseLog( exercise_id=exercise, user=user, streak_progress=50, attempts=15, latest_activity_timestamp=datetime.datetime.now() ) log.save() context.exercises = exercises videos = random.sample(get_content_cache().keys(), 2) for video in videos: log = VideoLog( youtube_id=video, video_id=video, user=user, total_seconds_watched=100, points=600, latest_activity_timestamp=datetime.datetime.now() ) log.save() context.videos = videos
def create_some_learner_data(): """ Just create a lil' bit-o-data of each type, to populate the table. """ user = CreateStudentMixin.create_student() attempt_states = ( # (name, streak_progress, attempt_count) ("not started", 0, 0), ("completed", 100, 15), ("attempted", 50, 10), ("struggling", 30, 25), ) exercises = random.sample( get_exercise_cache().keys(), len(attempt_states)) # Important they are *distinct* for state in attempt_states: exercise = exercises.pop() log, created = ExerciseLog.objects.get_or_create(exercise_id=exercise, user=user) if "not started" != state[0]: log.streak_progress, log.attempts = state[1:] for i in range(0, log.attempts): AttemptLog.objects.get_or_create( exercise_id=exercise, user=user, seed=i, timestamp=datetime.datetime.now()) log.latest_activity_timestamp = datetime.datetime.now() log.save()
def learner_logs(request): page = request.GET.get("page", 1) limit = request.GET.get("limit", 50) # Look back a week by default time_window = request.GET.get("time_window", 7) start_date = request.GET.get("start_date", None) end_date = request.GET.get("end_date", None) topic_ids = request.GET.getlist("topic_id", []) learners = get_learners_from_GET(request) pages = int(ceil(len(learners)/float(limit))) if page*limit < len(learners): learners = learners[(page - 1)*limit: page*limit] log_types = request.GET.getlist("log_type", ["exercise", "video", "content"]) output_logs = [] output_objects = [] end_date = datetime.datetime.strptime(end_date,'%Y/%m/%d') if end_date else datetime.datetime.now() start_date = datetime.datetime.strptime(start_date,'%Y/%m/%d') if start_date else end_date - datetime.timedelta(time_window) for log_type in log_types: LogModel, fields, id_field, obj_ids, objects = return_log_type_details(log_type, topic_ids) log_objects = LogModel.objects.filter(user__in=learners, **obj_ids).values(*fields) if not topic_ids: topic_objects = log_objects.filter(latest_activity_timestamp__gte=start_date, latest_activity_timestamp__lte=end_date) if topic_objects.count() == 0: topic_objects = log_objects objects = dict([(obj[id_field], get_content_cache().get(obj[id_field], get_exercise_cache().get(obj[id_field]))) for obj in topic_objects]).values() output_objects.extend(objects) output_logs.extend(log_objects) return JsonResponse({ "logs": output_logs, "contents": output_objects, # Sometimes 'learners' gets collapsed to a list from the Queryset. This insures against that eventuality. "learners": [{ "first_name": learner.first_name, "last_name": learner.last_name, "username": learner.username, "pk": learner.pk } for learner in learners], "page": page, "pages": pages, "limit": limit })
def aggregate_learner_logs(request): learners = get_learners_from_GET(request) event_limit = request.GET.get("event_limit", 10) # Look back a week by default time_window = request.GET.get("time_window", 7) topic_ids = request.GET.getlist("topic_id", []) log_types = request.GET.getlist("log_type", ["exercise", "video", "content"]) output_logs = [] output_dict = { "content_time_spent": 0, "exercise_attempts": 0, "exercise_mastery": None, } start_date = datetime.datetime.now() - datetime.timedelta(time_window) end_date = datetime.datetime.now() for log_type in log_types: LogModel, fields, id_field, obj_ids, objects = return_log_type_details(log_type, topic_ids) log_objects = LogModel.objects.filter( user__in=learners, latest_activity_timestamp__gte=start_date, latest_activity_timestamp__lte=end_date, **obj_ids).order_by("-latest_activity_timestamp") if log_type == "video": output_dict["content_time_spent"] += log_objects.aggregate(Sum("total_seconds_watched"))["total_seconds_watched__sum"] or 0 elif log_type == "content": output_dict["content_time_spent"] += log_objects.aggregate(Sum("time_spent"))["time_spent__sum"] or 0 elif log_type == "exercise": output_dict["exercise_attempts"] = AttemptLog.objects.filter(user__in=learners, timestamp__gte=start_date, timestamp__lte=end_date).count() if log_objects.aggregate(Avg("streak_progress"))["streak_progress__avg"] is not None: output_dict["exercise_mastery"] = round(log_objects.aggregate(Avg("streak_progress"))["streak_progress__avg"]) output_logs.extend(log_objects) # Report total time in hours output_dict["content_time_spent"] = round(output_dict["content_time_spent"]/3600.0,1) output_logs.sort(key=lambda x: x.latest_activity_timestamp, reverse=True) output_dict["learner_events"] = [{ "learner": log.user.get_name(), "complete": log.complete, "struggling": getattr(log, "struggling", None), "progress": getattr(log, "streak_progress", getattr(log, "progress", None)), "content": get_exercise_cache().get(getattr(log, "exercise_id", ""), get_content_cache().get(getattr(log, "video_id", getattr(log, "content_id", "")), {})), } for log in output_logs[:event_limit]] output_dict["total_time_logged"] = UserLogSummary.objects\ .filter(user__in=learners, last_activity_datetime__gte=start_date, last_activity_datetime__lte=end_date)\ .aggregate(Sum("total_seconds")).get("total_seconds__sum") or 0 return JsonResponse(output_dict)
def initialize_content_caches(force=False): """ Catch all function to regenerate any content caches in memory that need annotation with file availability """ for lang in i18n.get_installed_language_packs(force=True).keys(): logging.info( "Preloading exercise data for language {lang}.".format(lang=lang)) topic_tools.get_exercise_cache(force=force, language=lang) logging.info( "Preloading content data for language {lang}.".format(lang=lang)) topic_tools.get_content_cache(force=force, annotate=True, language=lang) logging.info("Preloading topic tree data for language {lang}.".format( lang=lang)) topic_tools.get_topic_tree(force=force, annotate=True, language=lang)
def get_exercise_prereqs(exercises): """Return a list of prequisites (if applicable) for each specified exercise.""" ex_cache = get_exercise_cache() prereqs = [] for exercise in exercises: prereqs += ex_cache[exercise]['prerequisites'] return prereqs
def impl(context): exercises = random.sample(get_exercise_cache().keys(), 10) for user in FacilityUser.objects.all(): for exercise in exercises: log, created = ExerciseLog.objects.get_or_create( exercise_id=exercise, user=user, streak_progress=100, attempts=15, latest_activity_timestamp=datetime.datetime.now())
def impl(context): exercises = random.sample(get_exercise_cache().keys(), 10) for user in FacilityUser.objects.all(): for exercise in exercises: log, created = ExerciseLog.objects.get_or_create( exercise_id=exercise, user=user, streak_progress=100, attempts=15, latest_activity_timestamp=datetime.datetime.now() )
def create_empty_entry(cls, entity_id, kind, playlist): if kind != "Quiz": if kind == "Video": topic_node = get_content_cache().get(entity_id) elif kind == "Exercise": topic_node = get_exercise_cache().get(entity_id) title = topic_node["title"] path = topic_node["path"] else: title = playlist["title"] path = "" entry = {"id": entity_id, "kind": kind, "status": "notstarted", "score": 0, "title": title, "path": path} return entry
def get_exercise_prereqs(exercise_ids): """ Return a list of prequisites (if applicable) for each specified exercise. :param exercise_ids: A list of exercise ids. :return: A list of prerequisite exercises (as dicts), if any are known. """ ex_cache = get_exercise_cache() prereqs = [] for exercise_id in exercise_ids: exercise = ex_cache.get(exercise_id) prereqs += exercise['prerequisites'] if exercise else [] return prereqs
def _list_all_exercises_with_bad_links(): """This is a standalone helper method used to provide KA with a list of exercises with bad URLs in them.""" url_pattern = r"https?://www\.khanacademy\.org/[\/\w\-]*/./(?P<slug>[\w\-]+)" assessment_items = json.load(open(settings.KHAN_ASSESSMENT_ITEM_JSON_PATH)) for ex in get_exercise_cache().values(): checked_urls = [] displayed_title = False for aidict in ex.get("all_assessment_items", []): ai = assessment_items[aidict["id"]] for match in re.finditer(url_pattern, ai["item_data"], flags=re.IGNORECASE): url = str(match.group(0)) if url in checked_urls: continue checked_urls.append(url) status_code = requests.get(url).status_code if status_code != 200: if not displayed_title: print "EXERCISE: '%s'" % ex["title"], ex["path"] displayed_title = True print "\t", status_code, url
def create_empty_entry(cls, entity_id, kind, playlist): if kind != "Quiz": if kind == "Video": topic_node = get_content_cache().get(entity_id) elif kind == "Exercise": topic_node = get_exercise_cache().get(entity_id) title = topic_node["title"] path = topic_node["path"] else: title = playlist["title"] path = "" entry = { "id": entity_id, "kind": kind, "status": "notstarted", "score": 0, "title": title, "path": path, } return entry
def _list_all_exercises_with_bad_links(): """This is a standalone helper method used to provide KA with a list of exercises with bad URLs in them.""" url_pattern = r"https?://www\.khanacademy\.org/[\/\w\-]*/./(?P<slug>[\w\-]+)" assessment_items = json.load(open(ASSESSMENT_ITEMS_PATH)) for ex in get_exercise_cache().values(): checked_urls = [] displayed_title = False for aidict in ex.get("all_assessment_items", []): ai = assessment_items[aidict["id"]] for match in re.finditer(url_pattern, ai["item_data"], flags=re.IGNORECASE): url = str(match.group(0)) if url in checked_urls: continue checked_urls.append(url) status_code = requests.get(url).status_code if status_code != 200: if not displayed_title: print "EXERCISE: '%s'" % ex["title"], ex["path"] displayed_title = True print "\t", status_code, url
def user_progress_detail(cls, user_id, playlist_id, language=None): """ Return a list of video, exercise, and quiz log PlaylistProgressDetail objects associated with a specific user and playlist ID. """ if not language: language = Settings.get("default_language") or settings.LANGUAGE_CODE user = FacilityUser.objects.get(id=user_id) playlist = next((pl for pl in get_leafed_topics() if pl.get("id") == playlist_id), None) pl_video_ids, pl_exercise_ids = cls.get_playlist_entry_ids(playlist) # Retrieve video, exercise, and quiz logs that appear in this playlist user_vid_logs, user_ex_logs = cls.get_user_logs(user, pl_video_ids, pl_exercise_ids) # Format & append quiz the quiz log, if it exists # quiz_exists, quiz_log, quiz_pct_score = cls.get_quiz_log(user, (playlist.get("entries") or playlist.get("children")), playlist.get("id")) # Finally, sort an ordered list of the playlist entries, with user progress # injected where it exists. progress_details = list() for entity_id in playlist.get("children"): entry = {} leaf_node = get_content_cache(language=language).get(entity_id) or get_exercise_cache(language=language).get(entity_id) or {} kind = leaf_node.get("kind") status = "notstarted" score = 0 if kind == "Video": vid_log = next((vid_log for vid_log in user_vid_logs if vid_log["video_id"] == entity_id), None) if vid_log: if vid_log.get("complete"): status = "complete" elif vid_log.get("total_seconds_watched"): status = "inprogress" score = int(float(vid_log.get("points")) / float(750) * 100) elif kind == "Exercise": ex_log = next((ex_log for ex_log in user_ex_logs if ex_log["exercise_id"] == entity_id), None) if ex_log: if ex_log.get("struggling"): status = "struggling" elif ex_log.get("complete"): status = "complete" elif ex_log.get("attempts"): status = "inprogress" score = ex_log.get('streak_progress') entry = { "id": entity_id, "kind": kind, "status": status, "score": score, "title": leaf_node["title"], "path": leaf_node["path"], } progress_details.append(cls(**entry)) return progress_details
def test_detail_view(request, test_id): """View details of student performance on specific exams""" facility, group_id, context = coach_nav_context(request, "test") # get users in this facility and group users = get_user_queryset(request, facility, group_id) # Get test object test_resource = TestResource() test_obj = test_resource._read_test(test_id=test_id) # get all of the test logs for this specific test object and generated by these specific users if group_id: test_logs = TestLog.objects.filter(user__group=group_id, test=test_id) # Narrow all by ungroup facility user if group_id == control_panel_api_resources.UNGROUPED_KEY: if facility: test_logs = TestLog.objects.filter(user__group__isnull=True) else: test_logs = TestLog.objects.filter(facility=facility, user__group__isnull=True) else: # covers the all groups case test_logs = TestLog.objects.filter(user__facility=facility, test=test_id) results_table, scores_dict = OrderedDict(), OrderedDict() # build this up now to use in summary stats section ex_ids = set(literal_eval(test_obj.ids)) for ex in ex_ids: scores_dict[ex] = [] for s in users: s.name = s.get_name() user_attempts = AttemptLog.objects.filter(user=s, context_type='test', context_id=test_id) results_table[s] = [] attempts_count_total, attempts_count_correct_total = 0, 0 for ex in ex_ids: attempts = [attempt for attempt in user_attempts if attempt.exercise_id == ex] attempts_count = len(attempts) attempts_count_correct = len([attempt for attempt in attempts if attempt.correct]) attempts_count_total += attempts_count attempts_count_correct_total += attempts_count_correct if attempts_count: score = round(100 * float(attempts_count_correct)/float(attempts_count), 1) scores_dict[ex].append(score) display_score = "%d%%" % score else: score = '' display_score = '' results_table[s].append({ 'display_score': display_score, 'raw_score': score, }) # Calc overall score if attempts_count_total: score = round(100 * float(attempts_count_correct_total)/float(attempts_count_total), 1) display_score = "%d%%" % score fraction_correct = "(%(correct)d/%(attempts)d)" % ({'correct': attempts_count_correct_total, 'attempts': attempts_count_total}) else: score = '' display_score = '' fraction_correct = '' results_table[s].append({ 'display_score': display_score, 'raw_score': score, 'title': fraction_correct, }) # This retrieves stats for individual exercises stats_dict = OrderedDict() for stat in SUMMARY_STATS: stats_dict[stat] = [] for ex in ex_ids: scores_list = scores_dict[ex] if scores_list: stats_dict[stat].append("%d%%" % return_list_stat(scores_list, stat)) else: stats_dict[stat].append('') # replace the exercise ids with their full names exercises = get_exercise_cache() ex_titles = [] for ex in ex_ids: ex_titles.append(exercises[ex]['title']) # provide a list of test options to view for this group/facility combo if group_id: test_logs = TestLog.objects.filter(user__group=group_id) else: # covers the all/no groups case test_logs = TestLog.objects.filter(user__facility=facility) test_objects = test_resource._read_tests() unique_test_ids = set([test_log.test for test_log in test_logs]) test_options = [{'id': obj.test_id, 'url': reverse('test_detail_view', kwargs={'test_id':obj.test_id}), 'title': obj.title} for obj in test_objects if obj.test_id in unique_test_ids] context = plotting_metadata_context(request, facility=facility) context.update({ "test_obj": test_obj, "ex_cols": ex_titles, "results_table": results_table, "stats_dict": stats_dict, "test_options": test_options, }) return context
def user_progress_detail(cls, user_id, playlist_id): """ Return a list of video, exercise, and quiz log PlaylistProgressDetail objects associated with a specific user and playlist ID. """ user = FacilityUser.objects.get(id=user_id) playlist = next((pl for pl in [plist.__dict__ for plist in Playlist.all()] + get_leafed_topics() if pl.get("id") == playlist_id), None) pl_video_ids, pl_exercise_ids = cls.get_playlist_entry_ids(playlist) # Retrieve video, exercise, and quiz logs that appear in this playlist user_vid_logs, user_ex_logs = cls.get_user_logs(user, pl_video_ids, pl_exercise_ids) # Format & append quiz the quiz log, if it exists quiz_exists, quiz_log, quiz_pct_score = cls.get_quiz_log(user, (playlist.get("entries") or playlist.get("children")), playlist.get("id")) # Finally, sort an ordered list of the playlist entries, with user progress # injected where it exists. progress_details = list() for ent in (playlist.get("entries") or playlist.get("children")): entry = {} kind = ent.get("entity_kind") or ent.get("kind") if kind == "Divider": continue elif kind == "Video": entity_id = get_slug2id_map().get(ent.get("entity_id")) or ent.get("id") vid_log = next((vid_log for vid_log in user_vid_logs if vid_log["video_id"] == entity_id), None) if vid_log: if vid_log.get("complete"): status = "complete" elif vid_log.get("total_seconds_watched"): status = "inprogress" else: status = "notstarted" leaf_node = get_content_cache().get(vid_log["video_id"]) entry = { "id": entity_id, "kind": kind, "status": status, "score": int(float(vid_log.get("points")) / float(750) * 100), "title": leaf_node["title"], "path": leaf_node["path"], } elif kind == "Exercise": entity_id = (ent.get("entity_id") or ent.get("id")) ex_log = next((ex_log for ex_log in user_ex_logs if ex_log["exercise_id"] == entity_id), None) if ex_log: if ex_log.get("struggling"): status = "struggling" elif ex_log.get("complete"): status = "complete" elif ex_log.get("attempts"): status = "inprogress" ex_log_id = ex_log.get("exercise_id") leaf_node = get_exercise_cache().get(ex_log_id) entry = { "id": ex_log_id, "kind": kind, "status": status, "score": ex_log.get("streak_progress"), "title": leaf_node["title"], "path": leaf_node["path"], } elif kind == "Quiz": entity_id = playlist["id"] if quiz_log: if quiz_log.complete: if quiz_pct_score <= 59: status = "fail" elif quiz_pct_score <= 79: status = "borderline" else: status = "pass" elif quiz_log.attempts: status = "inprogress" else: status = "notstarted" quiz_log_id = quiz_log.quiz entry = { "id": quiz_log_id, "kind": "Quiz", "status": status, "score": quiz_pct_score, "title": playlist.get("title"), "path": "", } if not entry: entry = cls.create_empty_entry(entity_id, kind, playlist) progress_details.append(cls(**entry)) return progress_details
def user_progress_detail(cls, user_id, playlist_id, language=None): """ Return a list of video, exercise, and quiz log PlaylistProgressDetail objects associated with a specific user and playlist ID. """ if not language: language = Settings.get( "default_language") or settings.LANGUAGE_CODE user = FacilityUser.objects.get(id=user_id) playlist = next( (pl for pl in get_leafed_topics() if pl.get("id") == playlist_id), None) pl_video_ids, pl_exercise_ids = cls.get_playlist_entry_ids(playlist) # Retrieve video, exercise, and quiz logs that appear in this playlist user_vid_logs, user_ex_logs = cls.get_user_logs( user, pl_video_ids, pl_exercise_ids) # Format & append quiz the quiz log, if it exists # quiz_exists, quiz_log, quiz_pct_score = cls.get_quiz_log(user, (playlist.get("entries") or playlist.get("children")), playlist.get("id")) # Finally, sort an ordered list of the playlist entries, with user progress # injected where it exists. progress_details = list() for entity_id in playlist.get("children"): entry = {} leaf_node = get_content_cache( language=language).get(entity_id) or get_exercise_cache( language=language).get(entity_id) or {} kind = leaf_node.get("kind") status = "notstarted" score = 0 if kind == "Video": vid_log = next((vid_log for vid_log in user_vid_logs if vid_log["video_id"] == entity_id), None) if vid_log: if vid_log.get("complete"): status = "complete" elif vid_log.get("total_seconds_watched"): status = "inprogress" score = int( float(vid_log.get("points")) / float(750) * 100) elif kind == "Exercise": ex_log = next((ex_log for ex_log in user_ex_logs if ex_log["exercise_id"] == entity_id), None) if ex_log: if ex_log.get("struggling"): status = "struggling" elif ex_log.get("complete"): status = "complete" elif ex_log.get("attempts"): status = "inprogress" score = ex_log.get('streak_progress') entry = { "id": entity_id, "kind": kind, "status": status, "score": score, "title": leaf_node["title"], "path": leaf_node["path"], } progress_details.append(cls(**entry)) return progress_details
def user_progress_detail(cls, user_id, playlist_id): """ Return a list of video, exercise, and quiz log PlaylistProgressDetail objects associated with a specific user and playlist ID. """ user = FacilityUser.objects.get(id=user_id) playlist = next((pl for pl in get_leafed_topics() if pl.get("id") == playlist_id), None) pl_video_ids, pl_exercise_ids = cls.get_playlist_entry_ids(playlist) # Retrieve video, exercise, and quiz logs that appear in this playlist user_vid_logs, user_ex_logs = cls.get_user_logs(user, pl_video_ids, pl_exercise_ids) # Format & append quiz the quiz log, if it exists # quiz_exists, quiz_log, quiz_pct_score = cls.get_quiz_log(user, (playlist.get("entries") or playlist.get("children")), playlist.get("id")) # Finally, sort an ordered list of the playlist entries, with user progress # injected where it exists. progress_details = list() for entity_id in playlist.get("children"): entry = {} leaf_node = get_content_cache().get(entity_id, get_exercise_cache().get(entity_id, {})) kind = leaf_node.get("kind") if kind == "Video": vid_log = next((vid_log for vid_log in user_vid_logs if vid_log["video_id"] == entity_id), None) if vid_log: if vid_log.get("complete"): status = "complete" elif vid_log.get("total_seconds_watched"): status = "inprogress" else: status = "notstarted" entry = { "id": entity_id, "kind": kind, "status": status, "score": int(float(vid_log.get("points")) / float(750) * 100), "title": leaf_node["title"], "path": leaf_node["path"], } elif kind == "Exercise": ex_log = next((ex_log for ex_log in user_ex_logs if ex_log["exercise_id"] == entity_id), None) if ex_log: if ex_log.get("struggling"): status = "struggling" elif ex_log.get("complete"): status = "complete" elif ex_log.get("attempts"): status = "inprogress" entry = { "id": entity_id, "kind": kind, "status": status, "score": ex_log.get("streak_progress"), "title": leaf_node["title"], "path": leaf_node["path"], } # Oh Quizzes, we hardly knew ye! # TODO (rtibbles): Sort out the status of Quizzes, and either reinstate them or remove them. # Quizzes were introduced to provide a way of practicing multiple types of exercise at once # However, there is currently no way to access them, and the manner for generating them (from the now deprecated Playlist models) is inaccessible # elif kind == "Quiz": # entity_id = playlist["id"] # if quiz_log: # if quiz_log.complete: # if quiz_pct_score <= 59: # status = "fail" # elif quiz_pct_score <= 79: # status = "borderline" # else: # status = "pass" # elif quiz_log.attempts: # status = "inprogress" # else: # status = "notstarted" # quiz_log_id = quiz_log.quiz # entry = { # "id": quiz_log_id, # "kind": "Quiz", # "status": status, # "score": quiz_pct_score, # "title": playlist.get("title"), # "path": "", # } if not entry: entry = cls.create_empty_entry(entity_id, kind, playlist) progress_details.append(cls(**entry)) return progress_details
def user_progress_detail(cls, user_id, playlist_id): """ Return a list of video, exercise, and quiz log PlaylistProgressDetail objects associated with a specific user and playlist ID. """ user = FacilityUser.objects.get(id=user_id) playlist = next( ( pl for pl in [plist.__dict__ for plist in Playlist.all()] + get_leafed_topics() if pl.get("id") == playlist_id ), None, ) pl_video_ids, pl_exercise_ids = cls.get_playlist_entry_ids(playlist) # Retrieve video, exercise, and quiz logs that appear in this playlist user_vid_logs, user_ex_logs = cls.get_user_logs(user, pl_video_ids, pl_exercise_ids) # Format & append quiz the quiz log, if it exists quiz_exists, quiz_log, quiz_pct_score = cls.get_quiz_log( user, (playlist.get("entries") or playlist.get("children")), playlist.get("id") ) # Finally, sort an ordered list of the playlist entries, with user progress # injected where it exists. progress_details = list() for ent in playlist.get("entries") or playlist.get("children"): entry = {} kind = ent.get("entity_kind") or ent.get("kind") if kind == "Divider": continue elif kind == "Video": entity_id = get_slug2id_map().get(ent.get("entity_id")) or ent.get("id") vid_log = next((vid_log for vid_log in user_vid_logs if vid_log["video_id"] == entity_id), None) if vid_log: if vid_log.get("complete"): status = "complete" elif vid_log.get("total_seconds_watched"): status = "inprogress" else: status = "notstarted" leaf_node = get_content_cache().get(vid_log["video_id"]) entry = { "id": entity_id, "kind": kind, "status": status, "score": int(float(vid_log.get("points")) / float(750) * 100), "title": leaf_node["title"], "path": leaf_node["path"], } elif kind == "Exercise": entity_id = ent.get("entity_id") or ent.get("id") ex_log = next((ex_log for ex_log in user_ex_logs if ex_log["exercise_id"] == entity_id), None) if ex_log: if ex_log.get("struggling"): status = "struggling" elif ex_log.get("complete"): status = "complete" elif ex_log.get("attempts"): status = "inprogress" ex_log_id = ex_log.get("exercise_id") leaf_node = get_exercise_cache().get(ex_log_id) entry = { "id": ex_log_id, "kind": kind, "status": status, "score": ex_log.get("streak_progress"), "title": leaf_node["title"], "path": leaf_node["path"], } elif kind == "Quiz": entity_id = playlist["id"] if quiz_log: if quiz_log.complete: if quiz_pct_score <= 59: status = "fail" elif quiz_pct_score <= 79: status = "borderline" else: status = "pass" elif quiz_log.attempts: status = "inprogress" else: status = "notstarted" quiz_log_id = quiz_log.quiz entry = { "id": quiz_log_id, "kind": "Quiz", "status": status, "score": quiz_pct_score, "title": playlist.get("title"), "path": "", } if not entry: entry = cls.create_empty_entry(entity_id, kind, playlist) progress_details.append(cls(**entry)) return progress_details
def handle(self, *args, **options): if len(args) != 1: raise CommandError("Takes exactly 1 argument") dest_file = os.path.abspath(args[0]) logger.info("Starting up KA Lite export2zim command") beginning = datetime.now() logger.info("Begin: {}".format(beginning)) language = options.get('language') if not language: raise CommandError("Must specify a language!") if not options.get('tmp_dir'): tmp_dir = os.path.join(tempfile.gettempdir(), 'ka-lite-zim_{}'.format(language)) else: tmp_dir = options.get('tmp_dir') tmp_dir = os.path.abspath(tmp_dir) if os.path.exists(tmp_dir) and os.listdir(tmp_dir): if options['clear']: logger.info("Clearing directory {}".format(tmp_dir)) shutil.rmtree(tmp_dir) elif options['resume']: logger.info("Resuming in dirty tmp directory {}".format(tmp_dir)) else: raise CommandError( "{} not empty, use the -c option to clean it, -r to resume, or use an empty destination directory.".format( tmp_dir ) ) zimwriterfs = options.get("zimwriterfs", None) publisher = options.get("publisher") transcode2webm = options.get("transcode2webm") ffmpeg = find_executable("ffmpeg") if not ffmpeg: logger.warning("FFMpeg not found in your path, you won't be able to create missing thumbnails or transcode to webm.") if not zimwriterfs: zimwriterfs = find_executable("zimwriterfs") if not zimwriterfs: raise CommandError("Could not find zimwriterfs in your path, try specifying --zimwriterfs=/path") if not os.path.exists(zimwriterfs): raise CommandError("Invalid --zimwriterfs") from kalite_zim import __name__ as base_path base_path = os.path.abspath(base_path) data_path = os.path.join(base_path, 'data') # Where subtitles are found in KA Lite subtitle_src_dir = i18n.get_srt_path(language) logger.info("Will export videos for language: {}".format(language)) logger.info("Preparing KA Lite topic tree...") # Use live data if not options.get('test'): # This way of doing things will be deprecated in KA Lite 0.16 topic_tree_json_path = topic_tools_settings.TOPICS_FILEPATHS.get('khan') content_cache = get_content_cache(language=language, annotate=True) exercise_cache = get_exercise_cache(language=language) # Use test data else: topic_tree_json_path = os.path.join(data_path, 'test_topics.json') content_cache = json.load( open(os.path.join(data_path, 'test_content.json')) ) exercise_cache = json.load( open(os.path.join(data_path, 'test_exercise.json')) ) topic_tree = softload_json(topic_tree_json_path, logger=logger.debug, raises=False) content_json_output = {} exercise_json_output = {} def annotate_tree(topic, depth=0, parent=None): """ We need to recurse into the tree in order to annotate elements with topic data and exercise data """ children = topic.get('children', []) new_children = [] for child_topic in children: if child_topic.get("kind") in ("Video", "Topic"): annotate_tree(child_topic, depth=depth + 1, parent=topic) new_children.append(child_topic) topic["children"] = new_children if topic.get("kind") == "Exercise": topic['exercise'] = exercise_cache.get(topic.get("id"), {}) exercise_json_output[topic.get("id")] = topic['exercise'] elif topic.get("kind") == "Topic": pass else: topic['exercise'] = None topic['content'] = content_cache.get(topic.get("id"), {}) content_json_output[topic.get("id")] = topic['content'] if not topic['content']: logger.error('No content!?, id is: {}'.format(topic.get('id'))) # Translate everything for good measure with i18n.translate_block(language): topic["title"] = _(topic.get("title", "")) topic["description"] = _(topic.get("description", "")) if topic.get("description") else "" topic["url"] = topic["id"] + ".html" topic["parent"] = parent topic["depth"] = depth for key in ("child_data", "keywords", "hide", "contains"): topic.pop(key, None) # 1. Annotate a topic tree annotate_tree(topic_tree) # 2. Now go through the tree and copy each element into the destination # zim file system def copy_media(node): if node['kind'] == 'Topic': # Don't do anything if it's a topic pass elif node['kind'] == 'Exercise': # Exercises cannot be displayed node["content"]["available"] = False elif node['kind'] == 'Video': if node['content']['format'] == "webm": logger.warning("Found a duplicate ID for {}, re-downloading".format(node['id'])) node['content']['format'] = "mp4" # Available is False by default until we locate the file node["content"]["available"] = False node_dir = os.path.join(tmp_dir, node["path"]) if not os.path.exists(node_dir): os.makedirs(node_dir) video_file_name = node['id'] + '.' + node['content']['format'] thumb_file_name = node['id'] + '.png' video_file_src = os.path.join(CONTENT_ROOT, video_file_name) video_file_dest = os.path.join(node_dir, video_file_name) thumb_file_src = os.path.join(CONTENT_ROOT, thumb_file_name) thumb_file_dest = os.path.join(node_dir, thumb_file_name) if options['download'] and not os.path.exists(video_file_src): logger.info("Video file being downloaded to: {}".format(video_file_src)) download_video( node['content']['youtube_id'], node['content']['format'], CONTENT_ROOT, ) if os.path.exists(video_file_src): if transcode2webm: ffmpeg_pass_log = "/tmp/logfile_vp8.fpf" if os.path.isfile(ffmpeg_pass_log): os.unlink(ffmpeg_pass_log) video_file_name = node['id'] + '.webm' video_file_dest = os.path.join(node_dir, video_file_name) if os.path.isfile(video_file_dest): logger.info("Already encoded: {}".format(video_file_dest)) else: ffmpeg_base_args = [ ffmpeg, "-i", video_file_src, "-codec:v", "libvpx", "-quality", "best", "-cpu-used", "0", "-b:v", "300k", "-qmin", "10", # 10=lowest value "-qmax", "35", # 42=highest value "-maxrate", "300k", "-bufsize", "600k", "-threads", "8", # "-vf", "scale=-1", "-codec:a", "libvorbis", # "-b:a", "128k", "-aq", "5", "-f", "webm", ] ffmpeg_pass1 = ffmpeg_base_args + [ "-an", # Disables audio, no effect first pass "-pass", "1", "-passlogfile", ffmpeg_pass_log, video_file_dest, ] ffmpeg_pass2 = ffmpeg_base_args + [ "-pass", "2", "-y", "-passlogfile", ffmpeg_pass_log, video_file_dest, ] for cmd in (ffmpeg_pass1, ffmpeg_pass2): process = subprocess.Popen(cmd, stdout=subprocess.PIPE) stdout_data, _stderr_data = process.communicate() if process.returncode != 0: logger.error("Error invoking ffmpeg: {}".format((_stderr_data or "") + (stdout_data or ""))) logger.error("Command was: {}".format(" ".join(cmd))) raise CommandError("Could not complete transcoding") node['content']['format'] = "webm" else: # If not transcoding, just link the original file os.link(video_file_src, video_file_dest) node["video_url"] = os.path.join( node["path"], video_file_name ) copy_media.videos_found += 1 logger.info("Videos processed: {}".format(copy_media.videos_found)) node["content"]["available"] = True # Create thumbnail if it wasn't downloaded if not os.path.exists(thumb_file_src): fp = create_thumbnail(video_file_src, output_format="png") if fp is None: logger.error("Failed to create thumbnail for {}".format(video_file_src)) else: logger.info("Successfully created thumbnail for {}".format(video_file_src)) file(thumb_file_src, 'wb').write(fp.read()) # Handle thumbnail if os.path.exists(thumb_file_src): node["thumbnail_url"] = os.path.join( node["path"], node['id'] + '.png' ) if not os.path.exists(thumb_file_dest): os.link(thumb_file_src, thumb_file_dest) else: node["thumbnail_url"] = None subtitle_srt = os.path.join( subtitle_src_dir, node['id'] + '.srt' ) if os.path.isfile(subtitle_srt): subtitle_vtt = os.path.join( node_dir, node['id'] + '.vtt' ) # Convert to .vtt because this format is understood # by latest video.js and the old ones that read # .srt don't work with newer jquery etc. submarine_parser(subtitle_srt, subtitle_vtt) if not os.path.exists(subtitle_vtt): logger.warning("Subtitle not converted: {}".format(subtitle_srt)) else: logger.info("Subtitle convert from SRT to VTT: {}".format(subtitle_vtt)) node["subtitle_url"] = os.path.join( node["path"], node['id'] + '.vtt' ) else: if options['download']: logger.error("File not found or downloaded: {}".format(video_file_src)) else: logger.error("Invalid node, kind: {}".format(node.get("kind", None))) # Exercises cannot be displayed node["content"] = {"available": False} new_children = [] for child in node.get('children', []): copy_media(child) empty_topic = child["kind"] == "Topic" and not child.get("children", []) unavailable_video = child["kind"] == "Video" and not child.get("content", {}).get("available", False) if not (empty_topic or unavailable_video): new_children.append(child) node['children'] = new_children copy_media.videos_found = 0 def render_topic_pages(node): parents = [node] if node.get("children") else [] parent = node["parent"] while parent: parents.append(parent) parent = parent["parent"] # Finally, render templates into the destination template_context = { "topic_tree": topic_tree, "topic": node, "parents": parents } with i18n.translate_block(language): topic_html = render_to_string("kalite_zim/topic.html", template_context) # Replace absolute references to '/static' with relative topic_html = topic_html.replace("/static", "static") dest_html = os.path.join(tmp_dir, node["id"] + ".html") logger.info("Rendering {}".format(dest_html)) open(dest_html, "w").write(topic_html) render_topic_pages.pages_rendered += 1 for child in node.get('children', []): render_topic_pages(child) render_topic_pages.pages_rendered = 0 logger.info("Hard linking video files from KA Lite...") copy_media(topic_tree) sys.stderr.write("\n") logger.info("Done!") # Configure django-compressor compressor_init(os.path.join(base_path, 'static')) # Finally, render templates into the destination template_context = { "topic_tree": topic_tree, "welcome": True, } with i18n.translate_block(language): welcome_html = render_to_string("kalite_zim/welcome.html", template_context) about_html = render_to_string("kalite_zim/about.html", template_context) # Replace absolute references to '/static' with relative welcome_html = welcome_html.replace("/static", "static") about_html = about_html.replace("/static", "static") # Write the welcome.html file open(os.path.join(tmp_dir, 'welcome.html'), 'w').write(welcome_html) open(os.path.join(tmp_dir, 'about.html'), 'w').write(about_html) # Render all topic html files render_topic_pages(topic_tree) # Copy in static data after it's been handled by django compressor # (this happens during template rendering) shutil.copytree(os.path.join(base_path, 'static'), os.path.join(tmp_dir, 'static')) ending = datetime.now() duration = int((ending - beginning).total_seconds()) logger.info("Total number of videos found: {}".format(copy_media.videos_found)) logger.info("Total number of topic pages created: {}".format(render_topic_pages.pages_rendered)) logger.info("Invoking zimwriterfs, writing to: {}".format(dest_file)) zimwriterfs_args = ( zimwriterfs, "--welcome", "welcome.html", "--favicon", "static/img/ka_leaf.png", "--publisher", publisher, "--creator", "KhanAcademy.org", "--description", "Khan Academy ({})".format(language), "--description", "Videos from Khan Academy", "--language", language, tmp_dir, dest_file, ) process = subprocess.Popen(zimwriterfs_args, stdout=subprocess.PIPE) stdout_data, _stderr_data = process.communicate() if process.returncode != 0: logger.error("Error invoking zimwriterfs: {}").format(_stderr_data + stdout_data) logger.info( "Duration: {h:} hours, {m:} minutes, {s:} seconds".format( h=duration // 3600, m=(duration % 3600) // 60, s=duration % 60, ) )
def _get_user_usage_data(users, groups=None, period_start=None, period_end=None, group_id=None): """ Returns facility user data, within the given date range. """ groups = groups or set([user.group for user in users]) # compute period start and end # Now compute stats, based on queried data num_exercises = len(get_exercise_cache()) user_data = OrderedDict() group_data = OrderedDict() # Make queries efficiently exercise_logs = ExerciseLog.objects.filter(user__in=users, complete=True) video_logs = VideoLog.objects.filter(user__in=users, total_seconds_watched__gt=0) login_logs = UserLogSummary.objects.filter(user__in=users) # filter results login_logs = login_logs.filter(total_seconds__gt=0) if period_start: exercise_logs = exercise_logs.filter( completion_timestamp__gte=period_start) video_logs = video_logs.filter(completion_timestamp__gte=period_start) if period_end: # MUST: Fix the midnight bug where period end covers up to the prior day only because # period end is datetime(year, month, day, hour=0, minute=0), meaning midnight of previous day. # Example: # If period_end == '2014-12-01', we cannot include the records dated '2014-12-01 09:30'. # So to fix this, we change it to '2014-12-01 23:59.999999'. period_end = dateutil.parser.parse(period_end) period_end = period_end + dateutil.relativedelta.relativedelta( days=+1, microseconds=-1) exercise_logs = exercise_logs.filter( completion_timestamp__lte=period_end) video_logs = video_logs.filter(completion_timestamp__lte=period_end) if period_start and period_end: exercise_logs = exercise_logs.filter( Q(completion_timestamp__gte=period_start) & Q(completion_timestamp__lte=period_end)) q1 = Q(completion_timestamp__isnull=False) & \ Q(completion_timestamp__gte=period_start) & \ Q(completion_timestamp__lte=period_end) video_logs = video_logs.filter(q1) login_q1 = Q(start_datetime__gte=period_start) & Q(start_datetime__lte=period_end) & \ Q(end_datetime__gte=period_start) & Q(end_datetime__lte=period_end) login_logs = login_logs.filter(login_q1) # Force results in a single query exercise_logs = list(exercise_logs.values("exercise_id", "user__pk")) video_logs = list(video_logs.values("video_id", "user__pk")) login_logs = list( login_logs.values("activity_type", "total_seconds", "user__pk")) for user in users: user_data[user.pk] = OrderedDict() user_data[user.pk]["id"] = user.pk user_data[user.pk]["first_name"] = user.first_name user_data[user.pk]["last_name"] = user.last_name user_data[user.pk]["username"] = user.username user_data[user.pk]["group"] = user.group user_data[user.pk][ "total_report_views"] = 0 #report_stats["count__sum"] or 0 user_data[ user.pk]["total_logins"] = 0 # login_stats["count__sum"] or 0 user_data[user.pk][ "total_hours"] = 0 #login_stats["total_seconds__sum"] or 0)/3600. user_data[user.pk]["total_exercises"] = 0 user_data[user.pk]["pct_mastery"] = 0. user_data[user.pk]["exercises_mastered"] = [] user_data[user.pk]["total_videos"] = 0 user_data[user.pk]["videos_watched"] = [] for elog in exercise_logs: user_data[elog["user__pk"]]["total_exercises"] += 1 user_data[elog["user__pk"]]["pct_mastery"] += 1. / num_exercises user_data[elog["user__pk"]]["exercises_mastered"].append( elog["exercise_id"]) for vlog in video_logs: user_data[vlog["user__pk"]]["total_videos"] += 1 user_data[vlog["user__pk"]]["videos_watched"].append(vlog["video_id"]) for llog in login_logs: if llog["activity_type"] == UserLog.get_activity_int("coachreport"): user_data[llog["user__pk"]]["total_report_views"] += 1 elif llog["activity_type"] == UserLog.get_activity_int("login"): user_data[llog["user__pk"]]["total_hours"] += ( llog["total_seconds"]) / 3600. user_data[llog["user__pk"]]["total_logins"] += 1 for group in list(groups) + [None] * ( group_id == None or group_id == UNGROUPED): # None for ungrouped, if no group_id passed. group_pk = getattr(group, "pk", None) group_name = getattr(group, "name", _(UNGROUPED)) group_title = getattr(group, "title", _(UNGROUPED)) group_data[group_pk] = { "id": group_pk, "name": group_name, "title": group_title, "total_logins": 0, "total_hours": 0, "total_users": 0, "total_videos": 0, "total_exercises": 0, "pct_mastery": 0, } # Add group data. Allow a fake group UNGROUPED for user in users: group_pk = getattr(user.group, "pk", None) if group_pk not in group_data: logging.error("User %s still in nonexistent group %s!" % (user.id, group_pk)) continue group_data[group_pk]["total_users"] += 1 group_data[group_pk]["total_logins"] += user_data[ user.pk]["total_logins"] group_data[group_pk]["total_hours"] += user_data[ user.pk]["total_hours"] group_data[group_pk]["total_videos"] += user_data[ user.pk]["total_videos"] group_data[group_pk]["total_exercises"] += user_data[ user.pk]["total_exercises"] total_mastery_so_far = (group_data[group_pk]["pct_mastery"] * (group_data[group_pk]["total_users"] - 1) + user_data[user.pk]["pct_mastery"]) group_data[group_pk][ "pct_mastery"] = total_mastery_so_far / group_data[group_pk][ "total_users"] if len(group_data) == 1 and group_data.has_key(None): if not group_data[None]["total_users"]: del group_data[None] return (user_data, group_data)
def compute_data(data_types, who, where, language=settings.LANGUAGE_CODE): """ Compute the data in "data_types" for each user in "who", for the topics selected by "where" who: list of users where: topic_path data_types can include: pct_mastery effort attempts """ # None indicates that the data hasn't been queried yet. # We'll query it on demand, for efficiency topics = None exercises = None videos = None # Initialize an empty dictionary of data, video logs, exercise logs, for each user data = OrderedDict(zip([w.id for w in who], [dict() for i in range(len(who))])) # maintain the order of the users vid_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))])) ex_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))])) if UserLog.is_enabled(): activity_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))])) # Set up queries (but don't run them), so we have really easy aliases. # Only do them if they haven't been done yet (tell this by passing in a value to the lambda function) # Topics: topics. # Exercises: names (ids for ExerciseLog objects) # Videos: video_id (ids for VideoLog objects) # This lambda partial creates a function to return all items with a particular path from the NODE_CACHE. search_fun_single_path = partial(lambda t, p: t["path"].startswith(p), p=tuple(where)) # This lambda partial creates a function to return all items with paths matching a list of paths from NODE_CACHE. search_fun_multi_path = partial(lambda ts, p: any([t["path"].startswith(p) for t in ts]), p=tuple(where)) # Functions that use the functions defined above to return topics, exercises, and videos based on paths. query_topics = partial(lambda t, sf: t if t is not None else [t["id"] for t in filter(sf, get_node_cache('Topic', language=language).values())], sf=search_fun_single_path) query_exercises = partial(lambda e, sf: e if e is not None else [ex["id"] for ex in filter(sf, get_exercise_cache(language=language).values())], sf=search_fun_single_path) query_videos = partial(lambda v, sf: v if v is not None else [vid["id"] for vid in filter(sf, get_node_cache('Content', language=language).values())], sf=search_fun_single_path) # No users, don't bother. if len(who) > 0: # Query out all exercises, videos, exercise logs, and video logs before looping to limit requests. # This means we could pull data for n-dimensional coach report displays with the same number of requests! # Note: User activity is polled inside the loop, to prevent possible slowdown for exercise and video reports. exercises = query_exercises(exercises) videos = query_videos(videos) if exercises: ex_logs = query_logs(data.keys(), exercises, "exercise", ex_logs) if videos: vid_logs = query_logs(data.keys(), videos, "video", vid_logs) for data_type in (data_types if not hasattr(data_types, "lower") else [data_types]): # convert list from string, if necessary if data_type in data[data.keys()[0]]: # if the first user has it, then all do; no need to calc again. continue # # These are summary stats: you only get one per user # if data_type == "pct_mastery": # Efficient query out, spread out to dict for user in data.keys(): data[user][data_type] = 0 if not ex_logs[user] else 100. * sum([el['complete'] for el in ex_logs[user]]) / float(len(exercises)) elif data_type == "effort": if "ex:attempts" in data[data.keys()[0]] and "vid:total_seconds_watched" in data[data.keys()[0]]: # exercises and videos would be initialized already for user in data.keys(): avg_attempts = 0 if len(exercises) == 0 else sum(data[user]["ex:attempts"].values()) / float(len(exercises)) avg_video_points = 0 if len(videos) == 0 else sum(data[user]["vid:total_seconds_watched"].values()) / float(len(videos)) data[user][data_type] = 100. * (0.5 * avg_attempts / 10. + 0.5 * avg_video_points / 750.) else: data_types += ["ex:attempts", "vid:total_seconds_watched", "effort"] # # These are detail stats: you get many per user # # Just querying out data directly: Video elif data_type.startswith("vid:") and data_type[4:] in [f.name for f in VideoLog._meta.fields]: for user in data.keys(): data[user][data_type] = OrderedDict([(v['video_id'], v[data_type[4:]]) for v in vid_logs[user]]) # Just querying out data directly: Exercise elif data_type.startswith("ex:") and data_type[3:] in [f.name for f in ExerciseLog._meta.fields]: for user in data.keys(): data[user][data_type] = OrderedDict([(el['exercise_id'], el[data_type[3:]]) for el in ex_logs[user]]) # User Log Queries elif data_type.startswith("user:"******"", "activity", activity_logs) for user in data.keys(): data[user][data_type] = [log[data_type[5:]] for log in activity_logs[user]] # User Summary Queries elif data_type.startswith("usersum:") and data_type[8:] in [f.name for f in UserLogSummary._meta.fields] and UserLog.is_enabled(): activity_logs = query_logs(data.keys(), "", "summaryactivity", activity_logs) for user in data.keys(): data[user][data_type] = sum([log[data_type[8:]] for log in activity_logs[user]]) # Unknown requested quantity else: raise Exception("Unknown type: '%s' not in %s" % (data_type, str([f.name for f in ExerciseLog._meta.fields]))) # Returning empty list instead of None allows javascript on client # side to read 'length' property without error. exercises = exercises or [] videos = videos or [] return { "data": data, "topics": topics, "exercises": exercises, "videos": videos, }
def aggregate_learner_logs(request): lang = request.language learners = get_learners_from_GET(request) event_limit = request.GET.get("event_limit", 10) # Look back a week by default time_window = request.GET.get("time_window", 7) start_date = request.GET.get("start_date", None) end_date = request.GET.get("end_date", None) topic_ids = request.GET.getlist("topic_id", []) log_types = request.GET.getlist("log_type", ["exercise", "video", "content"]) output_logs = [] output_dict = { "content_time_spent": 0, "exercise_attempts": 0, "exercise_mastery": None, } end_date = datetime.datetime.strptime(end_date,'%Y/%m/%d') if end_date else datetime.datetime.now() start_date = datetime.datetime.strptime(start_date,'%Y/%m/%d') if start_date else end_date - datetime.timedelta(time_window) for log_type in log_types: LogModel, fields, id_field, obj_ids, objects = return_log_type_details(log_type, topic_ids) log_objects = LogModel.objects.filter( user__in=learners, latest_activity_timestamp__gte=start_date, latest_activity_timestamp__lte=end_date, **obj_ids).order_by("-latest_activity_timestamp") if log_type == "video": output_dict["content_time_spent"] += log_objects.aggregate(Sum("total_seconds_watched"))["total_seconds_watched__sum"] or 0 elif log_type == "content": output_dict["content_time_spent"] += log_objects.aggregate(Sum("time_spent"))["time_spent__sum"] or 0 elif log_type == "exercise": output_dict["exercise_attempts"] = AttemptLog.objects.filter(user__in=learners, timestamp__gte=start_date, timestamp__lte=end_date).count() if log_objects.aggregate(Avg("streak_progress"))["streak_progress__avg"] is not None: output_dict["exercise_mastery"] = round(log_objects.aggregate(Avg("streak_progress"))["streak_progress__avg"]) output_logs.extend(log_objects) # Report total time in hours output_dict["content_time_spent"] = round(output_dict["content_time_spent"]/3600.0,1) output_logs.sort(key=lambda x: x.latest_activity_timestamp, reverse=True) output_dict["learner_events"] = [{ "learner": log.user.get_name(), "complete": log.complete, "struggling": getattr(log, "struggling", None), "progress": getattr(log, "streak_progress", getattr(log, "progress", None)), "content": get_exercise_cache(language=lang).get(getattr(log, "exercise_id", "")) or get_content_cache(language=lang).get(getattr(log, "video_id", None) or getattr(log, "content_id", "")) or {} } for log in output_logs[:event_limit]] output_dict["total_time_logged"] = round((UserLogSummary.objects\ .filter(user__in=learners, start_datetime__gte=start_date, start_datetime__lte=end_date)\ .aggregate(Sum("total_seconds")).get("total_seconds__sum") or 0)/3600.0, 1) return JsonResponse(output_dict)
def test_detail_view(request, test_id): """View details of student performance on specific exams""" facility, group_id, context = coach_nav_context(request, "test") # get users in this facility and group users = get_user_queryset(request, facility, group_id) # Get test object test_resource = TestResource() test_obj = test_resource._read_test(test_id=test_id) # get all of the test logs for this specific test object and generated by these specific users if group_id: test_logs = TestLog.objects.filter(user__group=group_id, test=test_id) # Narrow all by ungroup facility user if group_id == control_panel_api_resources.UNGROUPED_KEY: if facility: test_logs = TestLog.objects.filter(user__group__isnull=True) else: test_logs = TestLog.objects.filter(facility=facility, user__group__isnull=True) else: # covers the all groups case test_logs = TestLog.objects.filter(user__facility=facility, test=test_id) results_table, scores_dict = OrderedDict(), OrderedDict() # build this up now to use in summary stats section ex_ids = set(literal_eval(test_obj.ids)) for ex in ex_ids: scores_dict[ex] = [] for s in users: s.name = s.get_name() user_attempts = AttemptLog.objects.filter(user=s, context_type='test', context_id=test_id) results_table[s] = [] attempts_count_total, attempts_count_correct_total = 0, 0 for ex in ex_ids: attempts = [ attempt for attempt in user_attempts if attempt.exercise_id == ex ] attempts_count = len(attempts) attempts_count_correct = len( [attempt for attempt in attempts if attempt.correct]) attempts_count_total += attempts_count attempts_count_correct_total += attempts_count_correct if attempts_count: score = round( 100 * float(attempts_count_correct) / float(attempts_count), 1) scores_dict[ex].append(score) display_score = "%d%%" % score else: score = '' display_score = '' results_table[s].append({ 'display_score': display_score, 'raw_score': score, }) # Calc overall score if attempts_count_total: score = round( 100 * float(attempts_count_correct_total) / float(attempts_count_total), 1) display_score = "%d%%" % score fraction_correct = "(%(correct)d/%(attempts)d)" % ( { 'correct': attempts_count_correct_total, 'attempts': attempts_count_total }) else: score = '' display_score = '' fraction_correct = '' results_table[s].append({ 'display_score': display_score, 'raw_score': score, 'title': fraction_correct, }) # This retrieves stats for individual exercises stats_dict = OrderedDict() for stat in SUMMARY_STATS: stats_dict[stat] = [] for ex in ex_ids: scores_list = scores_dict[ex] if scores_list: stats_dict[stat].append("%d%%" % return_list_stat(scores_list, stat)) else: stats_dict[stat].append('') # replace the exercise ids with their full names exercises = get_exercise_cache() ex_titles = [] for ex in ex_ids: ex_titles.append(exercises[ex]['title']) # provide a list of test options to view for this group/facility combo if group_id: test_logs = TestLog.objects.filter(user__group=group_id) else: # covers the all/no groups case test_logs = TestLog.objects.filter(user__facility=facility) test_objects = test_resource._read_tests() unique_test_ids = set([test_log.test for test_log in test_logs]) test_options = [{ 'id': obj.test_id, 'url': reverse('test_detail_view', kwargs={'test_id': obj.test_id}), 'title': obj.title } for obj in test_objects if obj.test_id in unique_test_ids] context = plotting_metadata_context(request, facility=facility) context.update({ "test_obj": test_obj, "ex_cols": ex_titles, "results_table": results_table, "stats_dict": stats_dict, "test_options": test_options, }) return context
def compute_data(data_types, who, where, language=settings.LANGUAGE_CODE): """ Compute the data in "data_types" for each user in "who", for the topics selected by "where" who: list of users where: topic_path data_types can include: pct_mastery effort attempts """ # None indicates that the data hasn't been queried yet. # We'll query it on demand, for efficiency topics = None exercises = None videos = None # Initialize an empty dictionary of data, video logs, exercise logs, for each user data = OrderedDict( zip([w.id for w in who], [dict() for i in range(len(who))])) # maintain the order of the users vid_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))])) ex_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))])) if UserLog.is_enabled(): activity_logs = dict( zip([w.id for w in who], [[] for i in range(len(who))])) # Set up queries (but don't run them), so we have really easy aliases. # Only do them if they haven't been done yet (tell this by passing in a value to the lambda function) # Topics: topics. # Exercises: names (ids for ExerciseLog objects) # Videos: video_id (ids for VideoLog objects) # This lambda partial creates a function to return all items with a particular path from the NODE_CACHE. search_fun_single_path = partial(lambda t, p: t["path"].startswith(p), p=tuple(where)) # This lambda partial creates a function to return all items with paths matching a list of paths from NODE_CACHE. search_fun_multi_path = partial( lambda ts, p: any([t["path"].startswith(p) for t in ts]), p=tuple(where)) # Functions that use the functions defined above to return topics, exercises, and videos based on paths. query_topics = partial(lambda t, sf: t if t is not None else [ t["id"] for t in filter(sf, get_node_cache('Topic', language=language).values()) ], sf=search_fun_single_path) query_exercises = partial(lambda e, sf: e if e is not None else [ ex["id"] for ex in filter(sf, get_exercise_cache(language=language).values()) ], sf=search_fun_single_path) query_videos = partial(lambda v, sf: v if v is not None else [ vid["id"] for vid in filter( sf, get_node_cache('Content', language=language).values()) ], sf=search_fun_single_path) # No users, don't bother. if len(who) > 0: # Query out all exercises, videos, exercise logs, and video logs before looping to limit requests. # This means we could pull data for n-dimensional coach report displays with the same number of requests! # Note: User activity is polled inside the loop, to prevent possible slowdown for exercise and video reports. exercises = query_exercises(exercises) videos = query_videos(videos) if exercises: ex_logs = query_logs(data.keys(), exercises, "exercise", ex_logs) if videos: vid_logs = query_logs(data.keys(), videos, "video", vid_logs) for data_type in (data_types if not hasattr(data_types, "lower") else [ data_types ]): # convert list from string, if necessary if data_type in data[data.keys( )[0]]: # if the first user has it, then all do; no need to calc again. continue # # These are summary stats: you only get one per user # if data_type == "pct_mastery": # Efficient query out, spread out to dict for user in data.keys(): data[user][ data_type] = 0 if not ex_logs[user] else 100. * sum( [el['complete'] for el in ex_logs[user]]) / float(len(exercises)) elif data_type == "effort": if "ex:attempts" in data[data.keys( )[0]] and "vid:total_seconds_watched" in data[data.keys()[0]]: # exercises and videos would be initialized already for user in data.keys(): avg_attempts = 0 if len(exercises) == 0 else sum( data[user]["ex:attempts"].values()) / float( len(exercises)) avg_video_points = 0 if len(videos) == 0 else sum( data[user]["vid:total_seconds_watched"].values( )) / float(len(videos)) data[user][data_type] = 100. * ( 0.5 * avg_attempts / 10. + 0.5 * avg_video_points / 750.) else: data_types += [ "ex:attempts", "vid:total_seconds_watched", "effort" ] # # These are detail stats: you get many per user # # Just querying out data directly: Video elif data_type.startswith("vid:") and data_type[4:] in [ f.name for f in VideoLog._meta.fields ]: for user in data.keys(): data[user][data_type] = OrderedDict([ (v['video_id'], v[data_type[4:]]) for v in vid_logs[user] ]) # Just querying out data directly: Exercise elif data_type.startswith("ex:") and data_type[3:] in [ f.name for f in ExerciseLog._meta.fields ]: for user in data.keys(): data[user][data_type] = OrderedDict([ (el['exercise_id'], el[data_type[3:]]) for el in ex_logs[user] ]) # User Log Queries elif data_type.startswith("user:"******"", "activity", activity_logs) for user in data.keys(): data[user][data_type] = [ log[data_type[5:]] for log in activity_logs[user] ] # User Summary Queries elif data_type.startswith("usersum:") and data_type[8:] in [ f.name for f in UserLogSummary._meta.fields ] and UserLog.is_enabled(): activity_logs = query_logs(data.keys(), "", "summaryactivity", activity_logs) for user in data.keys(): data[user][data_type] = sum( [log[data_type[8:]] for log in activity_logs[user]]) # Unknown requested quantity else: raise Exception( "Unknown type: '%s' not in %s" % (data_type, str([f.name for f in ExerciseLog._meta.fields]))) # Returning empty list instead of None allows javascript on client # side to read 'length' property without error. exercises = exercises or [] videos = videos or [] return { "data": data, "topics": topics, "exercises": exercises, "videos": videos, }
def learner_logs(request): lang = request.language page = request.GET.get("page", 1) limit = request.GET.get("limit", 50) # Look back a week by default time_window = request.GET.get("time_window", 7) start_date = request.GET.get("start_date", None) end_date = request.GET.get("end_date", None) topic_ids = request.GET.getlist("topic_id", []) learners = get_learners_from_GET(request) pages = int(ceil(len(learners)/float(limit))) if page*limit < len(learners): learners = learners[(page - 1)*limit: page*limit] log_types = request.GET.getlist("log_type", ["exercise", "video", "content"]) output_logs = [] output_objects = [] end_date = datetime.datetime.strptime(end_date,'%Y/%m/%d') if end_date else datetime.datetime.now() start_date = datetime.datetime.strptime(start_date,'%Y/%m/%d') if start_date else end_date - datetime.timedelta(time_window) for log_type in log_types: LogModel, fields, id_field, obj_ids, objects = return_log_type_details(log_type, topic_ids) log_objects = LogModel.objects.filter(user__in=learners, **obj_ids).values(*fields) if not topic_ids: topic_objects = log_objects.filter(latest_activity_timestamp__gte=start_date, latest_activity_timestamp__lte=end_date) if topic_objects.count() == 0: topic_objects = log_objects objects = dict([(obj[id_field], get_content_cache(language=lang).get(obj[id_field], get_exercise_cache(language=lang).get(obj[id_field]))) for obj in topic_objects]).values() output_objects.extend(objects) output_logs.extend(log_objects) return JsonResponse({ "logs": output_logs, "contents": output_objects, # Sometimes 'learners' gets collapsed to a list from the Queryset. This insures against that eventuality. "learners": [{ "first_name": learner.first_name, "last_name": learner.last_name, "username": learner.username, "pk": learner.pk } for learner in learners], "page": page, "pages": pages, "limit": limit })
def _get_user_usage_data(users, groups=None, period_start=None, period_end=None, group_id=None): """ Returns facility user data, within the given date range. """ groups = groups or set([user.group for user in users]) # compute period start and end # Now compute stats, based on queried data num_exercises = len(get_exercise_cache()) user_data = OrderedDict() group_data = OrderedDict() # Make queries efficiently exercise_logs = ExerciseLog.objects.filter(user__in=users, complete=True) video_logs = VideoLog.objects.filter(user__in=users, total_seconds_watched__gt=0) login_logs = UserLogSummary.objects.filter(user__in=users) # filter results login_logs = login_logs.filter(total_seconds__gt=0) if period_start: exercise_logs = exercise_logs.filter(completion_timestamp__gte=period_start) video_logs = video_logs.filter(completion_timestamp__gte=period_start) if period_end: # MUST: Fix the midnight bug where period end covers up to the prior day only because # period end is datetime(year, month, day, hour=0, minute=0), meaning midnight of previous day. # Example: # If period_end == '2014-12-01', we cannot include the records dated '2014-12-01 09:30'. # So to fix this, we change it to '2014-12-01 23:59.999999'. period_end = dateutil.parser.parse(period_end) period_end = period_end + dateutil.relativedelta.relativedelta(days=+1, microseconds=-1) exercise_logs = exercise_logs.filter(completion_timestamp__lte=period_end) video_logs = video_logs.filter(completion_timestamp__lte=period_end) if period_start and period_end: exercise_logs = exercise_logs.filter(Q(completion_timestamp__gte=period_start) & Q(completion_timestamp__lte=period_end)) q1 = Q(completion_timestamp__isnull=False) & \ Q(completion_timestamp__gte=period_start) & \ Q(completion_timestamp__lte=period_end) video_logs = video_logs.filter(q1) login_q1 = Q(start_datetime__gte=period_start) & Q(start_datetime__lte=period_end) & \ Q(end_datetime__gte=period_start) & Q(end_datetime__lte=period_end) login_logs = login_logs.filter(login_q1) # Force results in a single query exercise_logs = list(exercise_logs.values("exercise_id", "user__pk")) video_logs = list(video_logs.values("video_id", "user__pk")) login_logs = list(login_logs.values("activity_type", "total_seconds", "user__pk")) for user in users: user_data[user.pk] = OrderedDict() user_data[user.pk]["id"] = user.pk user_data[user.pk]["first_name"] = user.first_name user_data[user.pk]["last_name"] = user.last_name user_data[user.pk]["username"] = user.username user_data[user.pk]["group"] = user.group user_data[user.pk]["total_report_views"] = 0#report_stats["count__sum"] or 0 user_data[user.pk]["total_logins"] =0# login_stats["count__sum"] or 0 user_data[user.pk]["total_hours"] = 0#login_stats["total_seconds__sum"] or 0)/3600. user_data[user.pk]["total_exercises"] = 0 user_data[user.pk]["pct_mastery"] = 0. user_data[user.pk]["exercises_mastered"] = [] user_data[user.pk]["total_videos"] = 0 user_data[user.pk]["videos_watched"] = [] for elog in exercise_logs: user_data[elog["user__pk"]]["total_exercises"] += 1 user_data[elog["user__pk"]]["pct_mastery"] += 1. / num_exercises user_data[elog["user__pk"]]["exercises_mastered"].append(elog["exercise_id"]) for vlog in video_logs: user_data[vlog["user__pk"]]["total_videos"] += 1 user_data[vlog["user__pk"]]["videos_watched"].append(vlog["video_id"]) for llog in login_logs: if llog["activity_type"] == UserLog.get_activity_int("coachreport"): user_data[llog["user__pk"]]["total_report_views"] += 1 elif llog["activity_type"] == UserLog.get_activity_int("login"): user_data[llog["user__pk"]]["total_hours"] += (llog["total_seconds"]) / 3600. user_data[llog["user__pk"]]["total_logins"] += 1 for group in list(groups) + [None]*(group_id==None or group_id==UNGROUPED): # None for ungrouped, if no group_id passed. group_pk = getattr(group, "pk", None) group_name = getattr(group, "name", _(UNGROUPED)) group_title = getattr(group, "title", _(UNGROUPED)) group_data[group_pk] = { "id": group_pk, "name": group_name, "title": group_title, "total_logins": 0, "total_hours": 0, "total_users": 0, "total_videos": 0, "total_exercises": 0, "pct_mastery": 0, } # Add group data. Allow a fake group UNGROUPED for user in users: group_pk = getattr(user.group, "pk", None) if group_pk not in group_data: logging.error("User %s still in nonexistent group %s!" % (user.id, group_pk)) continue group_data[group_pk]["total_users"] += 1 group_data[group_pk]["total_logins"] += user_data[user.pk]["total_logins"] group_data[group_pk]["total_hours"] += user_data[user.pk]["total_hours"] group_data[group_pk]["total_videos"] += user_data[user.pk]["total_videos"] group_data[group_pk]["total_exercises"] += user_data[user.pk]["total_exercises"] total_mastery_so_far = (group_data[group_pk]["pct_mastery"] * (group_data[group_pk]["total_users"] - 1) + user_data[user.pk]["pct_mastery"]) group_data[group_pk]["pct_mastery"] = total_mastery_so_far / group_data[group_pk]["total_users"] if len(group_data) == 1 and group_data.has_key(None): if not group_data[None]["total_users"]: del group_data[None] return (user_data, group_data)
def handle(self, *args, **options): if len(args) != 1: raise CommandError("Takes exactly 1 argument") dest_file = os.path.abspath(args[0]) logger.info("Starting up KA Lite export2zim command") beginning = datetime.now() logger.info("Begin: {}".format(beginning)) language = options.get('language') if not language: raise CommandError("Must specify a language!") if not options.get('tmp_dir'): tmp_dir = os.path.join(tempfile.gettempdir(), 'ka-lite-zim_{}'.format(language)) else: tmp_dir = options.get('tmp_dir') tmp_dir = os.path.abspath(tmp_dir) if os.path.exists(tmp_dir) and os.listdir(tmp_dir): if options['clear']: logger.info("Clearing directory {}".format(tmp_dir)) shutil.rmtree(tmp_dir) elif options['resume']: logger.info( "Resuming in dirty tmp directory {}".format(tmp_dir)) else: raise CommandError( "{} not empty, use the -c option to clean it, -r to resume, or use an empty destination directory." .format(tmp_dir)) zimwriterfs = options.get("zimwriterfs", None) publisher = options.get("publisher") transcode2webm = options.get("transcode2webm") ffmpeg = find_executable("ffmpeg") if not ffmpeg: logger.warning( "FFMpeg not found in your path, you won't be able to create missing thumbnails or transcode to webm." ) if not zimwriterfs: zimwriterfs = find_executable("zimwriterfs") if not zimwriterfs: raise CommandError( "Could not find zimwriterfs in your path, try specifying --zimwriterfs=/path" ) if not os.path.exists(zimwriterfs): raise CommandError("Invalid --zimwriterfs") from kalite_zim import __name__ as base_path base_path = os.path.abspath(base_path) data_path = os.path.join(base_path, 'data') # Where subtitles are found in KA Lite subtitle_src_dir = i18n.get_srt_path(language) logger.info("Will export videos for language: {}".format(language)) logger.info("Preparing KA Lite topic tree...") # Use live data if not options.get('test'): # This way of doing things will be deprecated in KA Lite 0.16 topic_tree_json_path = topic_tools_settings.TOPICS_FILEPATHS.get( 'khan') content_cache = get_content_cache(language=language, annotate=True) exercise_cache = get_exercise_cache(language=language) # Use test data else: topic_tree_json_path = os.path.join(data_path, 'test_topics.json') content_cache = json.load( open(os.path.join(data_path, 'test_content.json'))) exercise_cache = json.load( open(os.path.join(data_path, 'test_exercise.json'))) topic_tree = softload_json(topic_tree_json_path, logger=logger.debug, raises=False) content_json_output = {} exercise_json_output = {} def annotate_tree(topic, depth=0, parent=None): """ We need to recurse into the tree in order to annotate elements with topic data and exercise data """ children = topic.get('children', []) new_children = [] for child_topic in children: if child_topic.get("kind") in ("Video", "Topic"): annotate_tree(child_topic, depth=depth + 1, parent=topic) new_children.append(child_topic) topic["children"] = new_children if topic.get("kind") == "Exercise": topic['exercise'] = exercise_cache.get(topic.get("id"), {}) exercise_json_output[topic.get("id")] = topic['exercise'] elif topic.get("kind") == "Topic": pass else: topic['exercise'] = None topic['content'] = content_cache.get(topic.get("id"), {}) content_json_output[topic.get("id")] = topic['content'] if not topic['content']: logger.error('No content!?, id is: {}'.format( topic.get('id'))) # Translate everything for good measure with i18n.translate_block(language): topic["title"] = _(topic.get("title", "")) topic["description"] = _(topic.get( "description", "")) if topic.get("description") else "" topic["url"] = topic["id"] + ".html" topic["parent"] = parent topic["depth"] = depth for key in ("child_data", "keywords", "hide", "contains"): topic.pop(key, None) # 1. Annotate a topic tree annotate_tree(topic_tree) # 2. Now go through the tree and copy each element into the destination # zim file system def copy_media(node): if node['kind'] == 'Topic': # Don't do anything if it's a topic pass elif node['kind'] == 'Exercise': # Exercises cannot be displayed node["content"]["available"] = False elif node['kind'] == 'Video': if node['content']['format'] == "webm": logger.warning( "Found a duplicate ID for {}, re-downloading".format( node['id'])) node['content']['format'] = "mp4" # Available is False by default until we locate the file node["content"]["available"] = False node_dir = os.path.join(tmp_dir, node["path"]) if not os.path.exists(node_dir): os.makedirs(node_dir) video_file_name = node['id'] + '.' + node['content']['format'] thumb_file_name = node['id'] + '.png' video_file_src = os.path.join(CONTENT_ROOT, video_file_name) video_file_dest = os.path.join(node_dir, video_file_name) thumb_file_src = os.path.join(CONTENT_ROOT, thumb_file_name) thumb_file_dest = os.path.join(node_dir, thumb_file_name) if options['download'] and not os.path.exists(video_file_src): logger.info("Video file being downloaded to: {}".format( video_file_src)) download_video( node['content']['youtube_id'], node['content']['format'], CONTENT_ROOT, ) if os.path.exists(video_file_src): if transcode2webm: ffmpeg_pass_log = "/tmp/logfile_vp8.fpf" if os.path.isfile(ffmpeg_pass_log): os.unlink(ffmpeg_pass_log) video_file_name = node['id'] + '.webm' video_file_dest = os.path.join(node_dir, video_file_name) if os.path.isfile(video_file_dest): logger.info( "Already encoded: {}".format(video_file_dest)) else: ffmpeg_base_args = [ ffmpeg, "-i", video_file_src, "-codec:v", "libvpx", "-quality", "best", "-cpu-used", "0", "-b:v", "300k", "-qmin", "10", # 10=lowest value "-qmax", "35", # 42=highest value "-maxrate", "300k", "-bufsize", "600k", "-threads", "8", # "-vf", "scale=-1", "-codec:a", "libvorbis", # "-b:a", "128k", "-aq", "5", "-f", "webm", ] ffmpeg_pass1 = ffmpeg_base_args + [ "-an", # Disables audio, no effect first pass "-pass", "1", "-passlogfile", ffmpeg_pass_log, video_file_dest, ] ffmpeg_pass2 = ffmpeg_base_args + [ "-pass", "2", "-y", "-passlogfile", ffmpeg_pass_log, video_file_dest, ] for cmd in (ffmpeg_pass1, ffmpeg_pass2): process = subprocess.Popen( cmd, stdout=subprocess.PIPE) stdout_data, _stderr_data = process.communicate( ) if process.returncode != 0: logger.error( "Error invoking ffmpeg: {}".format( (_stderr_data or "") + (stdout_data or ""))) logger.error("Command was: {}".format( " ".join(cmd))) raise CommandError( "Could not complete transcoding") node['content']['format'] = "webm" else: # If not transcoding, just link the original file os.link(video_file_src, video_file_dest) node["video_url"] = os.path.join(node["path"], video_file_name) copy_media.videos_found += 1 logger.info("Videos processed: {}".format( copy_media.videos_found)) node["content"]["available"] = True # Create thumbnail if it wasn't downloaded if not os.path.exists(thumb_file_src): fp = create_thumbnail(video_file_src, output_format="png") if fp is None: logger.error( "Failed to create thumbnail for {}".format( video_file_src)) else: logger.info( "Successfully created thumbnail for {}".format( video_file_src)) file(thumb_file_src, 'wb').write(fp.read()) # Handle thumbnail if os.path.exists(thumb_file_src): node["thumbnail_url"] = os.path.join( node["path"], node['id'] + '.png') if not os.path.exists(thumb_file_dest): os.link(thumb_file_src, thumb_file_dest) else: node["thumbnail_url"] = None subtitle_srt = os.path.join(subtitle_src_dir, node['id'] + '.srt') if os.path.isfile(subtitle_srt): subtitle_vtt = os.path.join(node_dir, node['id'] + '.vtt') # Convert to .vtt because this format is understood # by latest video.js and the old ones that read # .srt don't work with newer jquery etc. submarine_parser(subtitle_srt, subtitle_vtt) if not os.path.exists(subtitle_vtt): logger.warning("Subtitle not converted: {}".format( subtitle_srt)) else: logger.info( "Subtitle convert from SRT to VTT: {}".format( subtitle_vtt)) node["subtitle_url"] = os.path.join( node["path"], node['id'] + '.vtt') else: if options['download']: logger.error("File not found or downloaded: {}".format( video_file_src)) else: logger.error("Invalid node, kind: {}".format( node.get("kind", None))) # Exercises cannot be displayed node["content"] = {"available": False} new_children = [] for child in node.get('children', []): copy_media(child) empty_topic = child["kind"] == "Topic" and not child.get( "children", []) unavailable_video = child["kind"] == "Video" and not child.get( "content", {}).get("available", False) if not (empty_topic or unavailable_video): new_children.append(child) node['children'] = new_children copy_media.videos_found = 0 def render_topic_pages(node): parents = [node] if node.get("children") else [] parent = node["parent"] while parent: parents.append(parent) parent = parent["parent"] # Finally, render templates into the destination template_context = { "topic_tree": topic_tree, "topic": node, "parents": parents } with i18n.translate_block(language): topic_html = render_to_string("kalite_zim/topic.html", template_context) # Replace absolute references to '/static' with relative topic_html = topic_html.replace("/static", "static") dest_html = os.path.join(tmp_dir, node["id"] + ".html") logger.info("Rendering {}".format(dest_html)) open(dest_html, "w").write(topic_html) render_topic_pages.pages_rendered += 1 for child in node.get('children', []): render_topic_pages(child) render_topic_pages.pages_rendered = 0 logger.info("Hard linking video files from KA Lite...") copy_media(topic_tree) sys.stderr.write("\n") logger.info("Done!") # Configure django-compressor compressor_init(os.path.join(base_path, 'static')) # Finally, render templates into the destination template_context = { "topic_tree": topic_tree, "welcome": True, } with i18n.translate_block(language): welcome_html = render_to_string("kalite_zim/welcome.html", template_context) about_html = render_to_string("kalite_zim/about.html", template_context) # Replace absolute references to '/static' with relative welcome_html = welcome_html.replace("/static", "static") about_html = about_html.replace("/static", "static") # Write the welcome.html file open(os.path.join(tmp_dir, 'welcome.html'), 'w').write(welcome_html) open(os.path.join(tmp_dir, 'about.html'), 'w').write(about_html) # Render all topic html files render_topic_pages(topic_tree) # Copy in static data after it's been handled by django compressor # (this happens during template rendering) shutil.copytree(os.path.join(base_path, 'static'), os.path.join(tmp_dir, 'static')) ending = datetime.now() duration = int((ending - beginning).total_seconds()) logger.info("Total number of videos found: {}".format( copy_media.videos_found)) logger.info("Total number of topic pages created: {}".format( render_topic_pages.pages_rendered)) logger.info("Invoking zimwriterfs, writing to: {}".format(dest_file)) zimwriterfs_args = ( zimwriterfs, "--welcome", "welcome.html", "--favicon", "static/img/ka_leaf.png", "--publisher", publisher, "--creator", "KhanAcademy.org", "--description", "Khan Academy ({})".format(language), "--description", "Videos from Khan Academy", "--language", language, tmp_dir, dest_file, ) process = subprocess.Popen(zimwriterfs_args, stdout=subprocess.PIPE) stdout_data, _stderr_data = process.communicate() if process.returncode != 0: logger.error("Error invoking zimwriterfs: {}").format( _stderr_data + stdout_data) logger.info("Duration: {h:} hours, {m:} minutes, {s:} seconds".format( h=duration // 3600, m=(duration % 3600) // 60, s=duration % 60, ))