Пример #1
0
def initialize_content_caches(force=False):
    """
    Catch all function to regenerate any content caches in memory that need annotation
    with file availability
    """
    for lang in i18n.get_installed_language_packs(force=True).keys():
        logging.info("Preloading exercise data for language {lang}.".format(lang=lang))
        topic_tools.get_exercise_cache(force=force, language=lang)
        logging.info("Preloading content data for language {lang}.".format(lang=lang))
        topic_tools.get_content_cache(force=force, annotate=True, language=lang)
        logging.info("Preloading topic tree data for language {lang}.".format(lang=lang))
        topic_tools.get_topic_tree(force=force, annotate=True, language=lang)
Пример #2
0
def create_some_learner_data():
    """
    Just create a lil' bit-o-data of each type, to populate the table.
    """
    user = CreateStudentMixin.create_student()
    attempt_states = (  # (name, streak_progress, attempt_count)
        ("not started", 0, 0),
        ("completed", 100, 15),
        ("attempted", 50, 10),
        ("struggling", 30, 25),
    )
    exercises = random.sample(get_exercise_cache().keys(), len(attempt_states))  # Important they are *distinct*
    for state in attempt_states:
        exercise = exercises.pop()
        log, created = ExerciseLog.objects.get_or_create(exercise_id=exercise, user=user)
        if "not started" != state[0]:
            log.streak_progress, log.attempts = state[1:]
            for i in range(0, log.attempts):
                AttemptLog.objects.get_or_create(
                    exercise_id=exercise,
                    user=user,
                    seed=i,
                    timestamp=datetime.datetime.now()
                )
            log.latest_activity_timestamp = datetime.datetime.now()
            log.save()
Пример #3
0
def before_scenario(context, scenario):
    base_before_scenario(context, scenario)

    if "with_progress" in context.tags:
        user = FacilityUser.objects.get(username=context.user,
                                        facility=getattr(
                                            context, "facility", None))
        exercises = random.sample(get_exercise_cache().keys(), 2)
        for exercise in exercises:
            log = ExerciseLog(
                exercise_id=exercise,
                user=user,
                streak_progress=50,
                attempts=15,
                latest_activity_timestamp=datetime.datetime.now())
            log.save()
        context.exercises = exercises

        videos = random.sample(get_content_cache().keys(), 2)

        for video in videos:
            log = VideoLog(youtube_id=video,
                           video_id=video,
                           user=user,
                           total_seconds_watched=100,
                           points=600,
                           latest_activity_timestamp=datetime.datetime.now())
            log.save()
        context.videos = videos
Пример #4
0
    def obj_get_list(self, bundle, **kwargs):
        # self.permission_check(bundle.request)
        exercise_logs = self.get_object_list(bundle.request)
        pre_user = None
        filtered_logs = []
        exercises_info = []

        for e in exercise_logs:
            if e.user == pre_user:
                pass
            else:
                pre_user = e.user
                attempts = exercise_logs.filter(user=e.user).aggregate(
                    Sum("attempts"))["attempts__sum"]
                mastered = exercise_logs.filter(user=e.user,
                                                complete=True).count()
                exercises_info = exercise_logs.filter(user=e.user).values(
                    'exercise_id', 'attempts', 'struggling')
                for i in exercises_info:
                    i["exercise_url"] = get_exercise_cache().get(
                        i['exercise_id']).get("path")

                user_dic = {
                    "user_name": e.user.get_name(),
                    "total_attempts": attempts,
                    "mastered": mastered,
                    "exercises": list(exercises_info)
                }
                filtered_logs.append(e)
                self.user_info.append(user_dic)

        self.user_info.reverse()
        return filtered_logs
Пример #5
0
def before_feature(context, feature):
    base_before_feature(context, feature)

    if "with_progress" in feature.tags:
        user = FacilityUser.objects.get(username=context.user, facility=getattr(context, "facility", None))
        exercises = random.sample(get_exercise_cache().keys(), 2)
        for exercise in exercises:
            log = ExerciseLog(
                exercise_id=exercise,
                user=user,
                streak_progress=50,
                attempts=15,
                latest_activity_timestamp=datetime.datetime.now()
                )
            log.save()
        context.exercises = exercises

        videos = random.sample(get_content_cache().keys(), 2)

        for video in videos:
            log = VideoLog(
                youtube_id=video,
                video_id=video,
                user=user,
                total_seconds_watched=100,
                points=600,
                latest_activity_timestamp=datetime.datetime.now()
                )
            log.save()
        context.videos = videos
Пример #6
0
def create_some_learner_data():
    """
    Just create a lil' bit-o-data of each type, to populate the table.
    """
    user = CreateStudentMixin.create_student()
    attempt_states = (  # (name, streak_progress, attempt_count)
        ("not started", 0, 0),
        ("completed", 100, 15),
        ("attempted", 50, 10),
        ("struggling", 30, 25),
    )
    exercises = random.sample(
        get_exercise_cache().keys(),
        len(attempt_states))  # Important they are *distinct*
    for state in attempt_states:
        exercise = exercises.pop()
        log, created = ExerciseLog.objects.get_or_create(exercise_id=exercise,
                                                         user=user)
        if "not started" != state[0]:
            log.streak_progress, log.attempts = state[1:]
            for i in range(0, log.attempts):
                AttemptLog.objects.get_or_create(
                    exercise_id=exercise,
                    user=user,
                    seed=i,
                    timestamp=datetime.datetime.now())
            log.latest_activity_timestamp = datetime.datetime.now()
            log.save()
Пример #7
0
def learner_logs(request):

    page = request.GET.get("page", 1)

    limit = request.GET.get("limit", 50)

    # Look back a week by default
    time_window = request.GET.get("time_window", 7)

    start_date = request.GET.get("start_date", None)

    end_date = request.GET.get("end_date", None)

    topic_ids = request.GET.getlist("topic_id", [])

    learners = get_learners_from_GET(request)

    pages = int(ceil(len(learners)/float(limit)))

    if page*limit < len(learners):

        learners = learners[(page - 1)*limit: page*limit]

    log_types = request.GET.getlist("log_type", ["exercise", "video", "content"])

    output_logs = []

    output_objects = []

    end_date = datetime.datetime.strptime(end_date,'%Y/%m/%d') if end_date else datetime.datetime.now()

    start_date = datetime.datetime.strptime(start_date,'%Y/%m/%d') if start_date else end_date - datetime.timedelta(time_window)

    for log_type in log_types:
        LogModel, fields, id_field, obj_ids, objects = return_log_type_details(log_type, topic_ids)

        log_objects = LogModel.objects.filter(user__in=learners, **obj_ids).values(*fields)
        if not topic_ids:
            topic_objects = log_objects.filter(latest_activity_timestamp__gte=start_date, latest_activity_timestamp__lte=end_date)
            if topic_objects.count() == 0:
                topic_objects = log_objects
            objects = dict([(obj[id_field], get_content_cache().get(obj[id_field], get_exercise_cache().get(obj[id_field]))) for obj in topic_objects]).values()
        output_objects.extend(objects)
        output_logs.extend(log_objects)

    return JsonResponse({
        "logs": output_logs,
        "contents": output_objects,
        # Sometimes 'learners' gets collapsed to a list from the Queryset. This insures against that eventuality.
        "learners": [{
            "first_name": learner.first_name,
            "last_name": learner.last_name,
            "username": learner.username,
            "pk": learner.pk
            } for learner in learners],
        "page": page,
        "pages": pages,
        "limit": limit
    })
Пример #8
0
def aggregate_learner_logs(request):

    learners = get_learners_from_GET(request)

    event_limit = request.GET.get("event_limit", 10)

    # Look back a week by default
    time_window = request.GET.get("time_window", 7)

    topic_ids = request.GET.getlist("topic_id", [])

    log_types = request.GET.getlist("log_type", ["exercise", "video", "content"])

    output_logs = []

    output_dict = {
        "content_time_spent": 0,
        "exercise_attempts": 0,
        "exercise_mastery": None,
    }
    start_date = datetime.datetime.now() - datetime.timedelta(time_window)
    end_date = datetime.datetime.now()

    for log_type in log_types:

        LogModel, fields, id_field, obj_ids, objects = return_log_type_details(log_type, topic_ids)

        log_objects = LogModel.objects.filter(
            user__in=learners,
            latest_activity_timestamp__gte=start_date,
            latest_activity_timestamp__lte=end_date, **obj_ids).order_by("-latest_activity_timestamp")


        if log_type == "video":
            output_dict["content_time_spent"] += log_objects.aggregate(Sum("total_seconds_watched"))["total_seconds_watched__sum"] or 0
        elif log_type == "content":
            output_dict["content_time_spent"] += log_objects.aggregate(Sum("time_spent"))["time_spent__sum"] or 0
        elif log_type == "exercise":
            output_dict["exercise_attempts"] = AttemptLog.objects.filter(user__in=learners,
                timestamp__gte=start_date,
                timestamp__lte=end_date).count()
            if log_objects.aggregate(Avg("streak_progress"))["streak_progress__avg"] is not None:
                output_dict["exercise_mastery"] = round(log_objects.aggregate(Avg("streak_progress"))["streak_progress__avg"])
        output_logs.extend(log_objects)

    # Report total time in hours
    output_dict["content_time_spent"] = round(output_dict["content_time_spent"]/3600.0,1)
    output_logs.sort(key=lambda x: x.latest_activity_timestamp, reverse=True)
    output_dict["learner_events"] = [{
        "learner": log.user.get_name(),
        "complete": log.complete,
        "struggling": getattr(log, "struggling", None),
        "progress": getattr(log, "streak_progress", getattr(log, "progress", None)),
        "content": get_exercise_cache().get(getattr(log, "exercise_id", ""), get_content_cache().get(getattr(log, "video_id", getattr(log, "content_id", "")), {})),
        } for log in output_logs[:event_limit]]
    output_dict["total_time_logged"] = UserLogSummary.objects\
        .filter(user__in=learners, last_activity_datetime__gte=start_date, last_activity_datetime__lte=end_date)\
        .aggregate(Sum("total_seconds")).get("total_seconds__sum") or 0
    return JsonResponse(output_dict)
Пример #9
0
def initialize_content_caches(force=False):
    """
    Catch all function to regenerate any content caches in memory that need annotation
    with file availability
    """
    for lang in i18n.get_installed_language_packs(force=True).keys():
        logging.info(
            "Preloading exercise data for language {lang}.".format(lang=lang))
        topic_tools.get_exercise_cache(force=force, language=lang)
        logging.info(
            "Preloading content data for language {lang}.".format(lang=lang))
        topic_tools.get_content_cache(force=force,
                                      annotate=True,
                                      language=lang)
        logging.info("Preloading topic tree data for language {lang}.".format(
            lang=lang))
        topic_tools.get_topic_tree(force=force, annotate=True, language=lang)
Пример #10
0
def get_exercise_prereqs(exercises):
    """Return a list of prequisites (if applicable) for each specified exercise."""

    ex_cache = get_exercise_cache()
    prereqs = []
    for exercise in exercises:
        prereqs += ex_cache[exercise]['prerequisites']

    return prereqs
Пример #11
0
def impl(context):
    exercises = random.sample(get_exercise_cache().keys(), 10)
    for user in FacilityUser.objects.all():
        for exercise in exercises:
            log, created = ExerciseLog.objects.get_or_create(
                exercise_id=exercise,
                user=user,
                streak_progress=100,
                attempts=15,
                latest_activity_timestamp=datetime.datetime.now())
Пример #12
0
def impl(context):
    exercises = random.sample(get_exercise_cache().keys(), 10)
    for user in FacilityUser.objects.all():
        for exercise in exercises:
            log, created = ExerciseLog.objects.get_or_create(
                exercise_id=exercise,
                user=user,
                streak_progress=100,
                attempts=15,
                latest_activity_timestamp=datetime.datetime.now()
                )
Пример #13
0
    def create_empty_entry(cls, entity_id, kind, playlist):
        if kind != "Quiz":
            if kind == "Video":
                topic_node = get_content_cache().get(entity_id)
            elif kind == "Exercise":
                topic_node = get_exercise_cache().get(entity_id)
            title = topic_node["title"]
            path = topic_node["path"]
        else:
            title = playlist["title"]
            path = ""
        entry = {"id": entity_id, "kind": kind, "status": "notstarted", "score": 0, "title": title, "path": path}

        return entry
Пример #14
0
def get_exercise_prereqs(exercise_ids):
    """
    Return a list of prequisites (if applicable) for each specified exercise.

    :param exercise_ids: A list of exercise ids.
    :return: A list of prerequisite exercises (as dicts), if any are known.
    """
    ex_cache = get_exercise_cache()
    prereqs = []
    for exercise_id in exercise_ids:
        exercise = ex_cache.get(exercise_id)
        prereqs += exercise['prerequisites'] if exercise else []

    return prereqs
Пример #15
0
def _list_all_exercises_with_bad_links():
    """This is a standalone helper method used to provide KA with a list of exercises with bad URLs in them."""
    url_pattern = r"https?://www\.khanacademy\.org/[\/\w\-]*/./(?P<slug>[\w\-]+)"
    assessment_items = json.load(open(settings.KHAN_ASSESSMENT_ITEM_JSON_PATH))
    for ex in get_exercise_cache().values():
        checked_urls = []
        displayed_title = False
        for aidict in ex.get("all_assessment_items", []):
            ai = assessment_items[aidict["id"]]
            for match in re.finditer(url_pattern, ai["item_data"], flags=re.IGNORECASE):
                url = str(match.group(0))
                if url in checked_urls:
                    continue
                checked_urls.append(url)
                status_code = requests.get(url).status_code
                if status_code != 200:
                    if not displayed_title:
                        print "EXERCISE: '%s'" % ex["title"], ex["path"]
                        displayed_title = True
                    print "\t", status_code, url
Пример #16
0
    def create_empty_entry(cls, entity_id, kind, playlist):
        if kind != "Quiz":
            if kind == "Video":
                topic_node = get_content_cache().get(entity_id)
            elif kind == "Exercise":
                topic_node = get_exercise_cache().get(entity_id)
            title = topic_node["title"]
            path = topic_node["path"]
        else:
            title = playlist["title"]
            path = ""
        entry = {
            "id": entity_id,
            "kind": kind,
            "status": "notstarted",
            "score": 0,
            "title": title,
            "path": path,
        }

        return entry
Пример #17
0
def _list_all_exercises_with_bad_links():
    """This is a standalone helper method used to provide KA with a list of exercises with bad URLs in them."""
    url_pattern = r"https?://www\.khanacademy\.org/[\/\w\-]*/./(?P<slug>[\w\-]+)"
    assessment_items = json.load(open(ASSESSMENT_ITEMS_PATH))
    for ex in get_exercise_cache().values():
        checked_urls = []
        displayed_title = False
        for aidict in ex.get("all_assessment_items", []):
            ai = assessment_items[aidict["id"]]
            for match in re.finditer(url_pattern,
                                     ai["item_data"],
                                     flags=re.IGNORECASE):
                url = str(match.group(0))
                if url in checked_urls:
                    continue
                checked_urls.append(url)
                status_code = requests.get(url).status_code
                if status_code != 200:
                    if not displayed_title:
                        print "EXERCISE: '%s'" % ex["title"], ex["path"]
                        displayed_title = True
                    print "\t", status_code, url
Пример #18
0
    def user_progress_detail(cls, user_id, playlist_id, language=None):
        """
        Return a list of video, exercise, and quiz log PlaylistProgressDetail
        objects associated with a specific user and playlist ID.
        """
        if not language:
            language = Settings.get("default_language") or settings.LANGUAGE_CODE

        user = FacilityUser.objects.get(id=user_id)
        playlist = next((pl for pl in get_leafed_topics() if pl.get("id") == playlist_id), None)

        pl_video_ids, pl_exercise_ids = cls.get_playlist_entry_ids(playlist)

        # Retrieve video, exercise, and quiz logs that appear in this playlist
        user_vid_logs, user_ex_logs = cls.get_user_logs(user, pl_video_ids, pl_exercise_ids)

        # Format & append quiz the quiz log, if it exists
        # quiz_exists, quiz_log, quiz_pct_score = cls.get_quiz_log(user, (playlist.get("entries") or playlist.get("children")), playlist.get("id"))

        # Finally, sort an ordered list of the playlist entries, with user progress
        # injected where it exists.
        progress_details = list()
        for entity_id in playlist.get("children"):
            entry = {}
            leaf_node = get_content_cache(language=language).get(entity_id) or get_exercise_cache(language=language).get(entity_id) or {}
            kind = leaf_node.get("kind")

            status = "notstarted"
            score = 0

            if kind == "Video":
                vid_log = next((vid_log for vid_log in user_vid_logs if vid_log["video_id"] == entity_id), None)
                if vid_log:
                    if vid_log.get("complete"):
                        status = "complete"
                    elif vid_log.get("total_seconds_watched"):
                        status = "inprogress"

                    score = int(float(vid_log.get("points")) / float(750) * 100)

            elif kind == "Exercise":
                ex_log = next((ex_log for ex_log in user_ex_logs if ex_log["exercise_id"] == entity_id), None)
                if ex_log:
                    if ex_log.get("struggling"):
                        status = "struggling"
                    elif ex_log.get("complete"):
                        status = "complete"
                    elif ex_log.get("attempts"):
                        status = "inprogress"

                    score = ex_log.get('streak_progress')

            entry = {
                "id": entity_id,
                "kind": kind,
                "status": status,
                "score": score,
                "title": leaf_node["title"],
                "path": leaf_node["path"],
            }

            progress_details.append(cls(**entry))

        return progress_details
Пример #19
0
def test_detail_view(request, test_id):
    """View details of student performance on specific exams"""

    facility, group_id, context = coach_nav_context(request, "test")
    # get users in this facility and group
    users = get_user_queryset(request, facility, group_id)

    # Get test object
    test_resource = TestResource()
    test_obj = test_resource._read_test(test_id=test_id)

    # get all of the test logs for this specific test object and generated by these specific users
    if group_id:
        test_logs = TestLog.objects.filter(user__group=group_id, test=test_id)

        # Narrow all by ungroup facility user
        if group_id == control_panel_api_resources.UNGROUPED_KEY:
            if facility:
                test_logs = TestLog.objects.filter(user__group__isnull=True)
            else:
                test_logs = TestLog.objects.filter(facility=facility, user__group__isnull=True)
    else:
        # covers the all groups case
        test_logs = TestLog.objects.filter(user__facility=facility, test=test_id)

    results_table, scores_dict = OrderedDict(), OrderedDict()
    # build this up now to use in summary stats section
    ex_ids = set(literal_eval(test_obj.ids))
    for ex in ex_ids:
        scores_dict[ex] = []
    for s in users:
        s.name = s.get_name()
        user_attempts = AttemptLog.objects.filter(user=s, context_type='test', context_id=test_id)
        results_table[s] = []
        attempts_count_total, attempts_count_correct_total = 0, 0
        for ex in ex_ids:
            attempts = [attempt for attempt in user_attempts if attempt.exercise_id == ex]

            attempts_count = len(attempts)
            attempts_count_correct = len([attempt for attempt in attempts if attempt.correct])

            attempts_count_total += attempts_count
            attempts_count_correct_total += attempts_count_correct

            if attempts_count:
                score = round(100 * float(attempts_count_correct)/float(attempts_count), 1)
                scores_dict[ex].append(score)
                display_score = "%d%%" % score
            else:
                score = ''
                display_score = ''

            results_table[s].append({
                'display_score': display_score,
                'raw_score': score,
            })

        # Calc overall score
        if attempts_count_total:
            score = round(100 * float(attempts_count_correct_total)/float(attempts_count_total), 1)
            display_score = "%d%%" % score
            fraction_correct = "(%(correct)d/%(attempts)d)" % ({'correct': attempts_count_correct_total, 'attempts': attempts_count_total})
        else:
            score = ''
            display_score = ''
            fraction_correct = ''

        results_table[s].append({
            'display_score': display_score,
            'raw_score': score,
            'title': fraction_correct,
        })

    # This retrieves stats for individual exercises
    stats_dict = OrderedDict()
    for stat in SUMMARY_STATS:
        stats_dict[stat] = []
        for ex in ex_ids:
            scores_list = scores_dict[ex]
            if scores_list:
                stats_dict[stat].append("%d%%" % return_list_stat(scores_list, stat))
            else:
                stats_dict[stat].append('')

    # replace the exercise ids with their full names
    exercises = get_exercise_cache()
    ex_titles = []
    for ex in ex_ids:
        ex_titles.append(exercises[ex]['title'])

    # provide a list of test options to view for this group/facility combo
    if group_id:
        test_logs = TestLog.objects.filter(user__group=group_id)
    else:
        # covers the all/no groups case
        test_logs = TestLog.objects.filter(user__facility=facility)
    test_objects = test_resource._read_tests()
    unique_test_ids = set([test_log.test for test_log in test_logs])
    test_options = [{'id': obj.test_id, 'url': reverse('test_detail_view', kwargs={'test_id':obj.test_id}), 'title': obj.title} for obj in test_objects if obj.test_id in unique_test_ids]
    context = plotting_metadata_context(request, facility=facility)
    context.update({
        "test_obj": test_obj,
        "ex_cols": ex_titles,
        "results_table": results_table,
        "stats_dict": stats_dict,
        "test_options": test_options,
    })
    return context
Пример #20
0
    def user_progress_detail(cls, user_id, playlist_id):
        """
        Return a list of video, exercise, and quiz log PlaylistProgressDetail
        objects associated with a specific user and playlist ID.
        """
        user = FacilityUser.objects.get(id=user_id)
        playlist = next((pl for pl in [plist.__dict__ for plist in Playlist.all()] + get_leafed_topics() if pl.get("id") == playlist_id), None)

        pl_video_ids, pl_exercise_ids = cls.get_playlist_entry_ids(playlist)

        # Retrieve video, exercise, and quiz logs that appear in this playlist
        user_vid_logs, user_ex_logs = cls.get_user_logs(user, pl_video_ids, pl_exercise_ids)

        # Format & append quiz the quiz log, if it exists
        quiz_exists, quiz_log, quiz_pct_score = cls.get_quiz_log(user, (playlist.get("entries") or playlist.get("children")), playlist.get("id"))

        # Finally, sort an ordered list of the playlist entries, with user progress
        # injected where it exists.
        progress_details = list()
        for ent in (playlist.get("entries") or playlist.get("children")):
            entry = {}
            kind = ent.get("entity_kind") or ent.get("kind")
            if kind == "Divider":
                continue
            elif kind == "Video":
                entity_id = get_slug2id_map().get(ent.get("entity_id")) or ent.get("id")
                vid_log = next((vid_log for vid_log in user_vid_logs if vid_log["video_id"] == entity_id), None)
                if vid_log:
                    if vid_log.get("complete"):
                        status = "complete"
                    elif vid_log.get("total_seconds_watched"):
                        status = "inprogress"
                    else:
                        status = "notstarted"

                    leaf_node = get_content_cache().get(vid_log["video_id"])

                    entry = {
                        "id": entity_id,
                        "kind": kind,
                        "status": status,
                        "score": int(float(vid_log.get("points")) / float(750) * 100),
                        "title": leaf_node["title"],
                        "path": leaf_node["path"],
                    }

            elif kind == "Exercise":
                entity_id = (ent.get("entity_id") or ent.get("id"))
                ex_log = next((ex_log for ex_log in user_ex_logs if ex_log["exercise_id"] == entity_id), None)
                if ex_log:
                    if ex_log.get("struggling"):
                        status = "struggling"
                    elif ex_log.get("complete"):
                        status = "complete"
                    elif ex_log.get("attempts"):
                        status = "inprogress"

                    ex_log_id = ex_log.get("exercise_id")
                    leaf_node = get_exercise_cache().get(ex_log_id)

                    entry = {
                        "id": ex_log_id,
                        "kind": kind,
                        "status": status,
                        "score": ex_log.get("streak_progress"),
                        "title": leaf_node["title"],
                        "path": leaf_node["path"],
                    }

            elif kind == "Quiz":
                entity_id = playlist["id"]
                if quiz_log:
                    if quiz_log.complete:
                        if quiz_pct_score <= 59:
                            status = "fail"
                        elif quiz_pct_score <= 79:
                            status = "borderline"
                        else:
                            status = "pass"
                    elif quiz_log.attempts:
                        status = "inprogress"
                    else:
                        status = "notstarted"

                    quiz_log_id = quiz_log.quiz

                    entry = {
                        "id": quiz_log_id,
                        "kind": "Quiz",
                        "status": status,
                        "score": quiz_pct_score,
                        "title": playlist.get("title"),
                        "path": "",
                    }

            if not entry:
                entry = cls.create_empty_entry(entity_id, kind, playlist)

            progress_details.append(cls(**entry))

        return progress_details
Пример #21
0
    def user_progress_detail(cls, user_id, playlist_id, language=None):
        """
        Return a list of video, exercise, and quiz log PlaylistProgressDetail
        objects associated with a specific user and playlist ID.
        """
        if not language:
            language = Settings.get(
                "default_language") or settings.LANGUAGE_CODE

        user = FacilityUser.objects.get(id=user_id)
        playlist = next(
            (pl for pl in get_leafed_topics() if pl.get("id") == playlist_id),
            None)

        pl_video_ids, pl_exercise_ids = cls.get_playlist_entry_ids(playlist)

        # Retrieve video, exercise, and quiz logs that appear in this playlist
        user_vid_logs, user_ex_logs = cls.get_user_logs(
            user, pl_video_ids, pl_exercise_ids)

        # Format & append quiz the quiz log, if it exists
        # quiz_exists, quiz_log, quiz_pct_score = cls.get_quiz_log(user, (playlist.get("entries") or playlist.get("children")), playlist.get("id"))

        # Finally, sort an ordered list of the playlist entries, with user progress
        # injected where it exists.
        progress_details = list()
        for entity_id in playlist.get("children"):
            entry = {}
            leaf_node = get_content_cache(
                language=language).get(entity_id) or get_exercise_cache(
                    language=language).get(entity_id) or {}
            kind = leaf_node.get("kind")

            status = "notstarted"
            score = 0

            if kind == "Video":
                vid_log = next((vid_log for vid_log in user_vid_logs
                                if vid_log["video_id"] == entity_id), None)
                if vid_log:
                    if vid_log.get("complete"):
                        status = "complete"
                    elif vid_log.get("total_seconds_watched"):
                        status = "inprogress"

                    score = int(
                        float(vid_log.get("points")) / float(750) * 100)

            elif kind == "Exercise":
                ex_log = next((ex_log for ex_log in user_ex_logs
                               if ex_log["exercise_id"] == entity_id), None)
                if ex_log:
                    if ex_log.get("struggling"):
                        status = "struggling"
                    elif ex_log.get("complete"):
                        status = "complete"
                    elif ex_log.get("attempts"):
                        status = "inprogress"

                    score = ex_log.get('streak_progress')

            entry = {
                "id": entity_id,
                "kind": kind,
                "status": status,
                "score": score,
                "title": leaf_node["title"],
                "path": leaf_node["path"],
            }

            progress_details.append(cls(**entry))

        return progress_details
Пример #22
0
    def user_progress_detail(cls, user_id, playlist_id):
        """
        Return a list of video, exercise, and quiz log PlaylistProgressDetail
        objects associated with a specific user and playlist ID.
        """
        user = FacilityUser.objects.get(id=user_id)
        playlist = next((pl for pl in get_leafed_topics() if pl.get("id") == playlist_id), None)

        pl_video_ids, pl_exercise_ids = cls.get_playlist_entry_ids(playlist)

        # Retrieve video, exercise, and quiz logs that appear in this playlist
        user_vid_logs, user_ex_logs = cls.get_user_logs(user, pl_video_ids, pl_exercise_ids)

        # Format & append quiz the quiz log, if it exists
        # quiz_exists, quiz_log, quiz_pct_score = cls.get_quiz_log(user, (playlist.get("entries") or playlist.get("children")), playlist.get("id"))

        # Finally, sort an ordered list of the playlist entries, with user progress
        # injected where it exists.
        progress_details = list()
        for entity_id in playlist.get("children"):
            entry = {}
            leaf_node = get_content_cache().get(entity_id, get_exercise_cache().get(entity_id, {}))
            kind = leaf_node.get("kind")

            if kind == "Video":
                vid_log = next((vid_log for vid_log in user_vid_logs if vid_log["video_id"] == entity_id), None)
                if vid_log:
                    if vid_log.get("complete"):
                        status = "complete"
                    elif vid_log.get("total_seconds_watched"):
                        status = "inprogress"
                    else:
                        status = "notstarted"

                    entry = {
                        "id": entity_id,
                        "kind": kind,
                        "status": status,
                        "score": int(float(vid_log.get("points")) / float(750) * 100),
                        "title": leaf_node["title"],
                        "path": leaf_node["path"],
                    }

            elif kind == "Exercise":
                ex_log = next((ex_log for ex_log in user_ex_logs if ex_log["exercise_id"] == entity_id), None)
                if ex_log:
                    if ex_log.get("struggling"):
                        status = "struggling"
                    elif ex_log.get("complete"):
                        status = "complete"
                    elif ex_log.get("attempts"):
                        status = "inprogress"

                    entry = {
                        "id": entity_id,
                        "kind": kind,
                        "status": status,
                        "score": ex_log.get("streak_progress"),
                        "title": leaf_node["title"],
                        "path": leaf_node["path"],
                    }

            # Oh Quizzes, we hardly knew ye!
            # TODO (rtibbles): Sort out the status of Quizzes, and either reinstate them or remove them.
            # Quizzes were introduced to provide a way of practicing multiple types of exercise at once
            # However, there is currently no way to access them, and the manner for generating them (from the now deprecated Playlist models) is inaccessible
            # elif kind == "Quiz":
            #     entity_id = playlist["id"]
            #     if quiz_log:
            #         if quiz_log.complete:
            #             if quiz_pct_score <= 59:
            #                 status = "fail"
            #             elif quiz_pct_score <= 79:
            #                 status = "borderline"
            #             else:
            #                 status = "pass"
            #         elif quiz_log.attempts:
            #             status = "inprogress"
            #         else:
            #             status = "notstarted"

            #         quiz_log_id = quiz_log.quiz

            #         entry = {
            #             "id": quiz_log_id,
            #             "kind": "Quiz",
            #             "status": status,
            #             "score": quiz_pct_score,
            #             "title": playlist.get("title"),
            #             "path": "",
            #         }

            if not entry:
                entry = cls.create_empty_entry(entity_id, kind, playlist)

            progress_details.append(cls(**entry))

        return progress_details
Пример #23
0
    def user_progress_detail(cls, user_id, playlist_id):
        """
        Return a list of video, exercise, and quiz log PlaylistProgressDetail
        objects associated with a specific user and playlist ID.
        """
        user = FacilityUser.objects.get(id=user_id)
        playlist = next(
            (
                pl
                for pl in [plist.__dict__ for plist in Playlist.all()] + get_leafed_topics()
                if pl.get("id") == playlist_id
            ),
            None,
        )

        pl_video_ids, pl_exercise_ids = cls.get_playlist_entry_ids(playlist)

        # Retrieve video, exercise, and quiz logs that appear in this playlist
        user_vid_logs, user_ex_logs = cls.get_user_logs(user, pl_video_ids, pl_exercise_ids)

        # Format & append quiz the quiz log, if it exists
        quiz_exists, quiz_log, quiz_pct_score = cls.get_quiz_log(
            user, (playlist.get("entries") or playlist.get("children")), playlist.get("id")
        )

        # Finally, sort an ordered list of the playlist entries, with user progress
        # injected where it exists.
        progress_details = list()
        for ent in playlist.get("entries") or playlist.get("children"):
            entry = {}
            kind = ent.get("entity_kind") or ent.get("kind")
            if kind == "Divider":
                continue
            elif kind == "Video":
                entity_id = get_slug2id_map().get(ent.get("entity_id")) or ent.get("id")
                vid_log = next((vid_log for vid_log in user_vid_logs if vid_log["video_id"] == entity_id), None)
                if vid_log:
                    if vid_log.get("complete"):
                        status = "complete"
                    elif vid_log.get("total_seconds_watched"):
                        status = "inprogress"
                    else:
                        status = "notstarted"

                    leaf_node = get_content_cache().get(vid_log["video_id"])

                    entry = {
                        "id": entity_id,
                        "kind": kind,
                        "status": status,
                        "score": int(float(vid_log.get("points")) / float(750) * 100),
                        "title": leaf_node["title"],
                        "path": leaf_node["path"],
                    }

            elif kind == "Exercise":
                entity_id = ent.get("entity_id") or ent.get("id")
                ex_log = next((ex_log for ex_log in user_ex_logs if ex_log["exercise_id"] == entity_id), None)
                if ex_log:
                    if ex_log.get("struggling"):
                        status = "struggling"
                    elif ex_log.get("complete"):
                        status = "complete"
                    elif ex_log.get("attempts"):
                        status = "inprogress"

                    ex_log_id = ex_log.get("exercise_id")
                    leaf_node = get_exercise_cache().get(ex_log_id)

                    entry = {
                        "id": ex_log_id,
                        "kind": kind,
                        "status": status,
                        "score": ex_log.get("streak_progress"),
                        "title": leaf_node["title"],
                        "path": leaf_node["path"],
                    }

            elif kind == "Quiz":
                entity_id = playlist["id"]
                if quiz_log:
                    if quiz_log.complete:
                        if quiz_pct_score <= 59:
                            status = "fail"
                        elif quiz_pct_score <= 79:
                            status = "borderline"
                        else:
                            status = "pass"
                    elif quiz_log.attempts:
                        status = "inprogress"
                    else:
                        status = "notstarted"

                    quiz_log_id = quiz_log.quiz

                    entry = {
                        "id": quiz_log_id,
                        "kind": "Quiz",
                        "status": status,
                        "score": quiz_pct_score,
                        "title": playlist.get("title"),
                        "path": "",
                    }

            if not entry:
                entry = cls.create_empty_entry(entity_id, kind, playlist)

            progress_details.append(cls(**entry))

        return progress_details
Пример #24
0
    def handle(self, *args, **options):
        if len(args) != 1:
            raise CommandError("Takes exactly 1 argument")

        dest_file = os.path.abspath(args[0])

        logger.info("Starting up KA Lite export2zim command")
        beginning = datetime.now()
        logger.info("Begin: {}".format(beginning))

        language = options.get('language')
        if not language:
            raise CommandError("Must specify a language!")

        if not options.get('tmp_dir'):
            tmp_dir = os.path.join(tempfile.gettempdir(), 'ka-lite-zim_{}'.format(language))
        else:
            tmp_dir = options.get('tmp_dir')

        tmp_dir = os.path.abspath(tmp_dir)

        if os.path.exists(tmp_dir) and os.listdir(tmp_dir):
            if options['clear']:
                logger.info("Clearing directory {}".format(tmp_dir))
                shutil.rmtree(tmp_dir)
            elif options['resume']:
                logger.info("Resuming in dirty tmp directory {}".format(tmp_dir))
            else:
                raise CommandError(
                    "{} not empty, use the -c option to clean it, -r to resume, or use an empty destination directory.".format(
                        tmp_dir
                    )
                )

        zimwriterfs = options.get("zimwriterfs", None)
        publisher = options.get("publisher")
        transcode2webm = options.get("transcode2webm")
        ffmpeg = find_executable("ffmpeg")

        if not ffmpeg:
            logger.warning("FFMpeg not found in your path, you won't be able to create missing thumbnails or transcode to webm.")

        if not zimwriterfs:
            zimwriterfs = find_executable("zimwriterfs")
            if not zimwriterfs:
                raise CommandError("Could not find zimwriterfs in your path, try specifying --zimwriterfs=/path")

        if not os.path.exists(zimwriterfs):
            raise CommandError("Invalid --zimwriterfs")

        from kalite_zim import __name__ as base_path
        base_path = os.path.abspath(base_path)
        data_path = os.path.join(base_path, 'data')

        # Where subtitles are found in KA Lite
        subtitle_src_dir = i18n.get_srt_path(language)

        logger.info("Will export videos for language: {}".format(language))
        logger.info("Preparing KA Lite topic tree...")

        # Use live data
        if not options.get('test'):
            # This way of doing things will be deprecated in KA Lite 0.16
            topic_tree_json_path = topic_tools_settings.TOPICS_FILEPATHS.get('khan')
            content_cache = get_content_cache(language=language, annotate=True)
            exercise_cache = get_exercise_cache(language=language)
        # Use test data
        else:
            topic_tree_json_path = os.path.join(data_path, 'test_topics.json')
            content_cache = json.load(
                open(os.path.join(data_path, 'test_content.json'))
            )
            exercise_cache = json.load(
                open(os.path.join(data_path, 'test_exercise.json'))
            )

        topic_tree = softload_json(topic_tree_json_path, logger=logger.debug, raises=False)

        content_json_output = {}
        exercise_json_output = {}

        def annotate_tree(topic, depth=0, parent=None):
            """
            We need to recurse into the tree in order to annotate elements
            with topic data and exercise data
            """
            children = topic.get('children', [])
            new_children = []
            for child_topic in children:
                if child_topic.get("kind") in ("Video", "Topic"):
                    annotate_tree(child_topic, depth=depth + 1, parent=topic)
                    new_children.append(child_topic)
            topic["children"] = new_children
            if topic.get("kind") == "Exercise":
                topic['exercise'] = exercise_cache.get(topic.get("id"), {})
                exercise_json_output[topic.get("id")] = topic['exercise']
            elif topic.get("kind") == "Topic":
                pass
            else:
                topic['exercise'] = None
                topic['content'] = content_cache.get(topic.get("id"), {})
                content_json_output[topic.get("id")] = topic['content']
                if not topic['content']:
                    logger.error('No content!?, id is: {}'.format(topic.get('id')))

            # Translate everything for good measure
            with i18n.translate_block(language):
                topic["title"] = _(topic.get("title", ""))
                topic["description"] = _(topic.get("description", "")) if topic.get("description") else ""

            topic["url"] = topic["id"] + ".html"
            topic["parent"] = parent
            topic["depth"] = depth
            for key in ("child_data", "keywords", "hide", "contains"):
                topic.pop(key, None)

        # 1. Annotate a topic tree
        annotate_tree(topic_tree)

        # 2. Now go through the tree and copy each element into the destination
        # zim file system

        def copy_media(node):
            if node['kind'] == 'Topic':
                # Don't do anything if it's a topic
                pass
            elif node['kind'] == 'Exercise':
                # Exercises cannot be displayed
                node["content"]["available"] = False
            elif node['kind'] == 'Video':

                if node['content']['format'] == "webm":
                    logger.warning("Found a duplicate ID for {}, re-downloading".format(node['id']))
                    node['content']['format'] = "mp4"

                # Available is False by default until we locate the file
                node["content"]["available"] = False
                node_dir = os.path.join(tmp_dir, node["path"])
                if not os.path.exists(node_dir):
                    os.makedirs(node_dir)
                video_file_name = node['id'] + '.' + node['content']['format']
                thumb_file_name = node['id'] + '.png'
                video_file_src = os.path.join(CONTENT_ROOT, video_file_name)
                video_file_dest = os.path.join(node_dir, video_file_name)
                thumb_file_src = os.path.join(CONTENT_ROOT, thumb_file_name)
                thumb_file_dest = os.path.join(node_dir, thumb_file_name)

                if options['download'] and not os.path.exists(video_file_src):
                    logger.info("Video file being downloaded to: {}".format(video_file_src))
                    download_video(
                        node['content']['youtube_id'],
                        node['content']['format'],
                        CONTENT_ROOT,
                    )

                if os.path.exists(video_file_src):
                    if transcode2webm:
                        ffmpeg_pass_log = "/tmp/logfile_vp8.fpf"
                        if os.path.isfile(ffmpeg_pass_log):
                            os.unlink(ffmpeg_pass_log)
                        video_file_name = node['id'] + '.webm'
                        video_file_dest = os.path.join(node_dir, video_file_name)
                        if os.path.isfile(video_file_dest):
                            logger.info("Already encoded: {}".format(video_file_dest))
                        else:
                            ffmpeg_base_args = [
                                ffmpeg,
                                "-i", video_file_src,
                                "-codec:v", "libvpx",
                                "-quality", "best",
                                "-cpu-used", "0",
                                "-b:v", "300k",
                                "-qmin", "10",  # 10=lowest value
                                "-qmax", "35",  # 42=highest value
                                "-maxrate", "300k",
                                "-bufsize", "600k",
                                "-threads", "8",
                                # "-vf", "scale=-1",
                                "-codec:a", "libvorbis",
                                # "-b:a", "128k",
                                "-aq", "5",
                                "-f", "webm",
                            ]
                            ffmpeg_pass1 = ffmpeg_base_args + [
                                "-an",  # Disables audio, no effect first pass
                                "-pass", "1",
                                "-passlogfile", ffmpeg_pass_log,
                                video_file_dest,
                            ]
                            ffmpeg_pass2 = ffmpeg_base_args + [
                                "-pass", "2",
                                "-y", "-passlogfile", ffmpeg_pass_log,
                                video_file_dest,
                            ]
                            for cmd in (ffmpeg_pass1, ffmpeg_pass2):
                                process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
                                stdout_data, _stderr_data = process.communicate()
                                if process.returncode != 0:
                                    logger.error("Error invoking ffmpeg: {}".format((_stderr_data or "") + (stdout_data or "")))
                                    logger.error("Command was: {}".format(" ".join(cmd)))
                                    raise CommandError("Could not complete transcoding")
                        node['content']['format'] = "webm"
                    else:
                        # If not transcoding, just link the original file
                        os.link(video_file_src, video_file_dest)
                    node["video_url"] = os.path.join(
                        node["path"],
                        video_file_name
                    )
                    copy_media.videos_found += 1
                    logger.info("Videos processed: {}".format(copy_media.videos_found))
                    node["content"]["available"] = True

                    # Create thumbnail if it wasn't downloaded
                    if not os.path.exists(thumb_file_src):
                        fp = create_thumbnail(video_file_src, output_format="png")
                        if fp is None:
                            logger.error("Failed to create thumbnail for {}".format(video_file_src))
                        else:
                            logger.info("Successfully created thumbnail for {}".format(video_file_src))
                            file(thumb_file_src, 'wb').write(fp.read())

                    # Handle thumbnail
                    if os.path.exists(thumb_file_src):
                        node["thumbnail_url"] = os.path.join(
                            node["path"],
                            node['id'] + '.png'
                        )
                        if not os.path.exists(thumb_file_dest):
                            os.link(thumb_file_src, thumb_file_dest)
                    else:
                        node["thumbnail_url"] = None

                    subtitle_srt = os.path.join(
                        subtitle_src_dir,
                        node['id'] + '.srt'
                    )
                    if os.path.isfile(subtitle_srt):
                        subtitle_vtt = os.path.join(
                            node_dir,
                            node['id'] + '.vtt'
                        )
                        # Convert to .vtt because this format is understood
                        # by latest video.js and the old ones that read
                        # .srt don't work with newer jquery etc.
                        submarine_parser(subtitle_srt, subtitle_vtt)
                        if not os.path.exists(subtitle_vtt):
                            logger.warning("Subtitle not converted: {}".format(subtitle_srt))
                        else:
                            logger.info("Subtitle convert from SRT to VTT: {}".format(subtitle_vtt))
                            node["subtitle_url"] = os.path.join(
                                node["path"],
                                node['id'] + '.vtt'
                            )

                else:
                    if options['download']:
                        logger.error("File not found or downloaded: {}".format(video_file_src))
            else:
                logger.error("Invalid node, kind: {}".format(node.get("kind", None)))
                # Exercises cannot be displayed
                node["content"] = {"available": False}

            new_children = []
            for child in node.get('children', []):
                copy_media(child)
                empty_topic = child["kind"] == "Topic" and not child.get("children", [])
                unavailable_video = child["kind"] == "Video" and not child.get("content", {}).get("available", False)
                if not (empty_topic or unavailable_video):
                    new_children.append(child)
            node['children'] = new_children
        copy_media.videos_found = 0

        def render_topic_pages(node):

            parents = [node] if node.get("children") else []
            parent = node["parent"]
            while parent:
                parents.append(parent)
                parent = parent["parent"]

            # Finally, render templates into the destination
            template_context = {
                "topic_tree": topic_tree,
                "topic": node,
                "parents": parents
            }
            with i18n.translate_block(language):
                topic_html = render_to_string("kalite_zim/topic.html", template_context)
            # Replace absolute references to '/static' with relative
            topic_html = topic_html.replace("/static", "static")

            dest_html = os.path.join(tmp_dir, node["id"] + ".html")
            logger.info("Rendering {}".format(dest_html))

            open(dest_html, "w").write(topic_html)

            render_topic_pages.pages_rendered += 1

            for child in node.get('children', []):
                render_topic_pages(child)
        render_topic_pages.pages_rendered = 0

        logger.info("Hard linking video files from KA Lite...")
        copy_media(topic_tree)

        sys.stderr.write("\n")
        logger.info("Done!")

        # Configure django-compressor
        compressor_init(os.path.join(base_path, 'static'))

        # Finally, render templates into the destination
        template_context = {
            "topic_tree": topic_tree,
            "welcome": True,
        }

        with i18n.translate_block(language):
            welcome_html = render_to_string("kalite_zim/welcome.html", template_context)
            about_html = render_to_string("kalite_zim/about.html", template_context)
        # Replace absolute references to '/static' with relative
        welcome_html = welcome_html.replace("/static", "static")
        about_html = about_html.replace("/static", "static")

        # Write the welcome.html file
        open(os.path.join(tmp_dir, 'welcome.html'), 'w').write(welcome_html)
        open(os.path.join(tmp_dir, 'about.html'), 'w').write(about_html)

        # Render all topic html files
        render_topic_pages(topic_tree)

        # Copy in static data after it's been handled by django compressor
        # (this happens during template rendering)

        shutil.copytree(os.path.join(base_path, 'static'), os.path.join(tmp_dir, 'static'))

        ending = datetime.now()
        duration = int((ending - beginning).total_seconds())
        logger.info("Total number of videos found: {}".format(copy_media.videos_found))
        logger.info("Total number of topic pages created: {}".format(render_topic_pages.pages_rendered))

        logger.info("Invoking zimwriterfs, writing to: {}".format(dest_file))

        zimwriterfs_args = (
            zimwriterfs,
            "--welcome", "welcome.html",
            "--favicon", "static/img/ka_leaf.png",
            "--publisher", publisher,
            "--creator", "KhanAcademy.org",
            "--description", "Khan Academy ({})".format(language),
            "--description", "Videos from Khan Academy",
            "--language", language,
            tmp_dir,
            dest_file,
        )

        process = subprocess.Popen(zimwriterfs_args, stdout=subprocess.PIPE)
        stdout_data, _stderr_data = process.communicate()

        if process.returncode != 0:
            logger.error("Error invoking zimwriterfs: {}").format(_stderr_data + stdout_data)

        logger.info(
            "Duration: {h:} hours, {m:} minutes, {s:} seconds".format(
                h=duration // 3600,
                m=(duration % 3600) // 60,
                s=duration % 60,
            )
        )
Пример #25
0
def _get_user_usage_data(users,
                         groups=None,
                         period_start=None,
                         period_end=None,
                         group_id=None):
    """
    Returns facility user data, within the given date range.
    """

    groups = groups or set([user.group for user in users])

    # compute period start and end
    # Now compute stats, based on queried data
    num_exercises = len(get_exercise_cache())
    user_data = OrderedDict()
    group_data = OrderedDict()

    # Make queries efficiently
    exercise_logs = ExerciseLog.objects.filter(user__in=users, complete=True)
    video_logs = VideoLog.objects.filter(user__in=users,
                                         total_seconds_watched__gt=0)
    login_logs = UserLogSummary.objects.filter(user__in=users)

    # filter results
    login_logs = login_logs.filter(total_seconds__gt=0)
    if period_start:
        exercise_logs = exercise_logs.filter(
            completion_timestamp__gte=period_start)
        video_logs = video_logs.filter(completion_timestamp__gte=period_start)
    if period_end:
        # MUST: Fix the midnight bug where period end covers up to the prior day only because
        # period end is datetime(year, month, day, hour=0, minute=0), meaning midnight of previous day.
        # Example:
        #   If period_end == '2014-12-01', we cannot include the records dated '2014-12-01 09:30'.
        #   So to fix this, we change it to '2014-12-01 23:59.999999'.
        period_end = dateutil.parser.parse(period_end)
        period_end = period_end + dateutil.relativedelta.relativedelta(
            days=+1, microseconds=-1)
        exercise_logs = exercise_logs.filter(
            completion_timestamp__lte=period_end)
        video_logs = video_logs.filter(completion_timestamp__lte=period_end)
    if period_start and period_end:
        exercise_logs = exercise_logs.filter(
            Q(completion_timestamp__gte=period_start)
            & Q(completion_timestamp__lte=period_end))

        q1 = Q(completion_timestamp__isnull=False) & \
            Q(completion_timestamp__gte=period_start) & \
            Q(completion_timestamp__lte=period_end)
        video_logs = video_logs.filter(q1)

        login_q1 = Q(start_datetime__gte=period_start) & Q(start_datetime__lte=period_end) & \
            Q(end_datetime__gte=period_start) & Q(end_datetime__lte=period_end)
        login_logs = login_logs.filter(login_q1)
    # Force results in a single query
    exercise_logs = list(exercise_logs.values("exercise_id", "user__pk"))
    video_logs = list(video_logs.values("video_id", "user__pk"))
    login_logs = list(
        login_logs.values("activity_type", "total_seconds", "user__pk"))

    for user in users:
        user_data[user.pk] = OrderedDict()
        user_data[user.pk]["id"] = user.pk
        user_data[user.pk]["first_name"] = user.first_name
        user_data[user.pk]["last_name"] = user.last_name
        user_data[user.pk]["username"] = user.username
        user_data[user.pk]["group"] = user.group

        user_data[user.pk][
            "total_report_views"] = 0  #report_stats["count__sum"] or 0
        user_data[
            user.pk]["total_logins"] = 0  # login_stats["count__sum"] or 0
        user_data[user.pk][
            "total_hours"] = 0  #login_stats["total_seconds__sum"] or 0)/3600.

        user_data[user.pk]["total_exercises"] = 0
        user_data[user.pk]["pct_mastery"] = 0.
        user_data[user.pk]["exercises_mastered"] = []

        user_data[user.pk]["total_videos"] = 0
        user_data[user.pk]["videos_watched"] = []

    for elog in exercise_logs:
        user_data[elog["user__pk"]]["total_exercises"] += 1
        user_data[elog["user__pk"]]["pct_mastery"] += 1. / num_exercises
        user_data[elog["user__pk"]]["exercises_mastered"].append(
            elog["exercise_id"])

    for vlog in video_logs:
        user_data[vlog["user__pk"]]["total_videos"] += 1
        user_data[vlog["user__pk"]]["videos_watched"].append(vlog["video_id"])

    for llog in login_logs:
        if llog["activity_type"] == UserLog.get_activity_int("coachreport"):
            user_data[llog["user__pk"]]["total_report_views"] += 1
        elif llog["activity_type"] == UserLog.get_activity_int("login"):
            user_data[llog["user__pk"]]["total_hours"] += (
                llog["total_seconds"]) / 3600.
            user_data[llog["user__pk"]]["total_logins"] += 1

    for group in list(groups) + [None] * (
            group_id == None or group_id
            == UNGROUPED):  # None for ungrouped, if no group_id passed.
        group_pk = getattr(group, "pk", None)
        group_name = getattr(group, "name", _(UNGROUPED))
        group_title = getattr(group, "title", _(UNGROUPED))
        group_data[group_pk] = {
            "id": group_pk,
            "name": group_name,
            "title": group_title,
            "total_logins": 0,
            "total_hours": 0,
            "total_users": 0,
            "total_videos": 0,
            "total_exercises": 0,
            "pct_mastery": 0,
        }

    # Add group data.  Allow a fake group UNGROUPED
    for user in users:
        group_pk = getattr(user.group, "pk", None)
        if group_pk not in group_data:
            logging.error("User %s still in nonexistent group %s!" %
                          (user.id, group_pk))
            continue
        group_data[group_pk]["total_users"] += 1
        group_data[group_pk]["total_logins"] += user_data[
            user.pk]["total_logins"]
        group_data[group_pk]["total_hours"] += user_data[
            user.pk]["total_hours"]
        group_data[group_pk]["total_videos"] += user_data[
            user.pk]["total_videos"]
        group_data[group_pk]["total_exercises"] += user_data[
            user.pk]["total_exercises"]

        total_mastery_so_far = (group_data[group_pk]["pct_mastery"] *
                                (group_data[group_pk]["total_users"] - 1) +
                                user_data[user.pk]["pct_mastery"])
        group_data[group_pk][
            "pct_mastery"] = total_mastery_so_far / group_data[group_pk][
                "total_users"]

    if len(group_data) == 1 and group_data.has_key(None):
        if not group_data[None]["total_users"]:
            del group_data[None]

    return (user_data, group_data)
Пример #26
0
def compute_data(data_types, who, where, language=settings.LANGUAGE_CODE):
    """
    Compute the data in "data_types" for each user in "who", for the topics selected by "where"

    who: list of users
    where: topic_path
    data_types can include:
        pct_mastery
        effort
        attempts
    """

    # None indicates that the data hasn't been queried yet.
    #   We'll query it on demand, for efficiency
    topics = None
    exercises = None
    videos = None

    # Initialize an empty dictionary of data, video logs, exercise logs, for each user
    data = OrderedDict(zip([w.id for w in who], [dict() for i in range(len(who))]))  # maintain the order of the users
    vid_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))]))
    ex_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))]))
    if UserLog.is_enabled():
        activity_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))]))

    # Set up queries (but don't run them), so we have really easy aliases.
    #   Only do them if they haven't been done yet (tell this by passing in a value to the lambda function)
    # Topics: topics.
    # Exercises: names (ids for ExerciseLog objects)
    # Videos: video_id (ids for VideoLog objects)

    # This lambda partial creates a function to return all items with a particular path from the NODE_CACHE.
    search_fun_single_path = partial(lambda t, p: t["path"].startswith(p), p=tuple(where))
    # This lambda partial creates a function to return all items with paths matching a list of paths from NODE_CACHE.
    search_fun_multi_path = partial(lambda ts, p: any([t["path"].startswith(p) for t in ts]),  p=tuple(where))
    # Functions that use the functions defined above to return topics, exercises, and videos based on paths.
    query_topics = partial(lambda t, sf: t if t is not None else [t["id"] for t in filter(sf, get_node_cache('Topic', language=language).values())], sf=search_fun_single_path)
    query_exercises = partial(lambda e, sf: e if e is not None else [ex["id"] for ex in filter(sf, get_exercise_cache(language=language).values())], sf=search_fun_single_path)
    query_videos = partial(lambda v, sf: v if v is not None else [vid["id"] for vid in filter(sf, get_node_cache('Content', language=language).values())], sf=search_fun_single_path)

    # No users, don't bother.
    if len(who) > 0:

        # Query out all exercises, videos, exercise logs, and video logs before looping to limit requests.
        # This means we could pull data for n-dimensional coach report displays with the same number of requests!
        # Note: User activity is polled inside the loop, to prevent possible slowdown for exercise and video reports.
        exercises = query_exercises(exercises)

        videos = query_videos(videos)

        if exercises:
            ex_logs = query_logs(data.keys(), exercises, "exercise", ex_logs)

        if videos:
            vid_logs = query_logs(data.keys(), videos, "video", vid_logs)

        for data_type in (data_types if not hasattr(data_types, "lower") else [data_types]):  # convert list from string, if necessary
            if data_type in data[data.keys()[0]]:  # if the first user has it, then all do; no need to calc again.
                continue

            #
            # These are summary stats: you only get one per user
            #
            if data_type == "pct_mastery":

                # Efficient query out, spread out to dict
                for user in data.keys():
                    data[user][data_type] = 0 if not ex_logs[user] else 100. * sum([el['complete'] for el in ex_logs[user]]) / float(len(exercises))

            elif data_type == "effort":
                if "ex:attempts" in data[data.keys()[0]] and "vid:total_seconds_watched" in data[data.keys()[0]]:
                    # exercises and videos would be initialized already
                    for user in data.keys():
                        avg_attempts = 0 if len(exercises) == 0 else sum(data[user]["ex:attempts"].values()) / float(len(exercises))
                        avg_video_points = 0 if len(videos) == 0 else sum(data[user]["vid:total_seconds_watched"].values()) / float(len(videos))
                        data[user][data_type] = 100. * (0.5 * avg_attempts / 10. + 0.5 * avg_video_points / 750.)
                else:
                    data_types += ["ex:attempts", "vid:total_seconds_watched", "effort"]

            #
            # These are detail stats: you get many per user
            #
            # Just querying out data directly: Video
            elif data_type.startswith("vid:") and data_type[4:] in [f.name for f in VideoLog._meta.fields]:

                for user in data.keys():
                    data[user][data_type] = OrderedDict([(v['video_id'], v[data_type[4:]]) for v in vid_logs[user]])

            # Just querying out data directly: Exercise
            elif data_type.startswith("ex:") and data_type[3:] in [f.name for f in ExerciseLog._meta.fields]:

                for user in data.keys():
                    data[user][data_type] = OrderedDict([(el['exercise_id'], el[data_type[3:]]) for el in ex_logs[user]])

            # User Log Queries
            elif data_type.startswith("user:"******"", "activity", activity_logs)

                for user in data.keys():
                    data[user][data_type] = [log[data_type[5:]] for log in activity_logs[user]]

            # User Summary Queries
            elif data_type.startswith("usersum:") and data_type[8:] in [f.name for f in UserLogSummary._meta.fields] and UserLog.is_enabled():

                activity_logs = query_logs(data.keys(), "", "summaryactivity", activity_logs)

                for user in data.keys():
                    data[user][data_type] = sum([log[data_type[8:]] for log in activity_logs[user]])
            # Unknown requested quantity
            else:
                raise Exception("Unknown type: '%s' not in %s" % (data_type, str([f.name for f in ExerciseLog._meta.fields])))

    # Returning empty list instead of None allows javascript on client
    # side to read 'length' property without error.
    exercises = exercises or []

    videos = videos or []

    return {
        "data": data,
        "topics": topics,
        "exercises": exercises,
        "videos": videos,
    }
Пример #27
0
def aggregate_learner_logs(request):

    lang = request.language
    learners = get_learners_from_GET(request)

    event_limit = request.GET.get("event_limit", 10)

    # Look back a week by default
    time_window = request.GET.get("time_window", 7)

    start_date = request.GET.get("start_date", None)

    end_date = request.GET.get("end_date", None)

    topic_ids = request.GET.getlist("topic_id", [])

    log_types = request.GET.getlist("log_type", ["exercise", "video", "content"])

    output_logs = []

    output_dict = {
        "content_time_spent": 0,
        "exercise_attempts": 0,
        "exercise_mastery": None,
    }

    end_date = datetime.datetime.strptime(end_date,'%Y/%m/%d') if end_date else datetime.datetime.now()

    start_date = datetime.datetime.strptime(start_date,'%Y/%m/%d') if start_date else end_date - datetime.timedelta(time_window)

    for log_type in log_types:

        LogModel, fields, id_field, obj_ids, objects = return_log_type_details(log_type, topic_ids)

        log_objects = LogModel.objects.filter(
            user__in=learners,
            latest_activity_timestamp__gte=start_date,
            latest_activity_timestamp__lte=end_date, **obj_ids).order_by("-latest_activity_timestamp")


        if log_type == "video":
            output_dict["content_time_spent"] += log_objects.aggregate(Sum("total_seconds_watched"))["total_seconds_watched__sum"] or 0
        elif log_type == "content":
            output_dict["content_time_spent"] += log_objects.aggregate(Sum("time_spent"))["time_spent__sum"] or 0
        elif log_type == "exercise":
            output_dict["exercise_attempts"] = AttemptLog.objects.filter(user__in=learners,
                timestamp__gte=start_date,
                timestamp__lte=end_date).count()
            if log_objects.aggregate(Avg("streak_progress"))["streak_progress__avg"] is not None:
                output_dict["exercise_mastery"] = round(log_objects.aggregate(Avg("streak_progress"))["streak_progress__avg"])
        output_logs.extend(log_objects)

    # Report total time in hours
    output_dict["content_time_spent"] = round(output_dict["content_time_spent"]/3600.0,1)
    output_logs.sort(key=lambda x: x.latest_activity_timestamp, reverse=True)
    output_dict["learner_events"] = [{
        "learner": log.user.get_name(),
        "complete": log.complete,
        "struggling": getattr(log, "struggling", None),
        "progress": getattr(log, "streak_progress", getattr(log, "progress", None)),
        "content": get_exercise_cache(language=lang).get(getattr(log, "exercise_id", "")) or get_content_cache(language=lang).get(getattr(log, "video_id", None) or getattr(log, "content_id", "")) or {}
        } for log in output_logs[:event_limit]]
    output_dict["total_time_logged"] = round((UserLogSummary.objects\
        .filter(user__in=learners, start_datetime__gte=start_date, start_datetime__lte=end_date)\
        .aggregate(Sum("total_seconds")).get("total_seconds__sum") or 0)/3600.0, 1)
    return JsonResponse(output_dict)
Пример #28
0
def test_detail_view(request, test_id):
    """View details of student performance on specific exams"""

    facility, group_id, context = coach_nav_context(request, "test")
    # get users in this facility and group
    users = get_user_queryset(request, facility, group_id)

    # Get test object
    test_resource = TestResource()
    test_obj = test_resource._read_test(test_id=test_id)

    # get all of the test logs for this specific test object and generated by these specific users
    if group_id:
        test_logs = TestLog.objects.filter(user__group=group_id, test=test_id)

        # Narrow all by ungroup facility user
        if group_id == control_panel_api_resources.UNGROUPED_KEY:
            if facility:
                test_logs = TestLog.objects.filter(user__group__isnull=True)
            else:
                test_logs = TestLog.objects.filter(facility=facility,
                                                   user__group__isnull=True)
    else:
        # covers the all groups case
        test_logs = TestLog.objects.filter(user__facility=facility,
                                           test=test_id)

    results_table, scores_dict = OrderedDict(), OrderedDict()
    # build this up now to use in summary stats section
    ex_ids = set(literal_eval(test_obj.ids))
    for ex in ex_ids:
        scores_dict[ex] = []
    for s in users:
        s.name = s.get_name()
        user_attempts = AttemptLog.objects.filter(user=s,
                                                  context_type='test',
                                                  context_id=test_id)
        results_table[s] = []
        attempts_count_total, attempts_count_correct_total = 0, 0
        for ex in ex_ids:
            attempts = [
                attempt for attempt in user_attempts
                if attempt.exercise_id == ex
            ]

            attempts_count = len(attempts)
            attempts_count_correct = len(
                [attempt for attempt in attempts if attempt.correct])

            attempts_count_total += attempts_count
            attempts_count_correct_total += attempts_count_correct

            if attempts_count:
                score = round(
                    100 * float(attempts_count_correct) /
                    float(attempts_count), 1)
                scores_dict[ex].append(score)
                display_score = "%d%%" % score
            else:
                score = ''
                display_score = ''

            results_table[s].append({
                'display_score': display_score,
                'raw_score': score,
            })

        # Calc overall score
        if attempts_count_total:
            score = round(
                100 * float(attempts_count_correct_total) /
                float(attempts_count_total), 1)
            display_score = "%d%%" % score
            fraction_correct = "(%(correct)d/%(attempts)d)" % (
                {
                    'correct': attempts_count_correct_total,
                    'attempts': attempts_count_total
                })
        else:
            score = ''
            display_score = ''
            fraction_correct = ''

        results_table[s].append({
            'display_score': display_score,
            'raw_score': score,
            'title': fraction_correct,
        })

    # This retrieves stats for individual exercises
    stats_dict = OrderedDict()
    for stat in SUMMARY_STATS:
        stats_dict[stat] = []
        for ex in ex_ids:
            scores_list = scores_dict[ex]
            if scores_list:
                stats_dict[stat].append("%d%%" %
                                        return_list_stat(scores_list, stat))
            else:
                stats_dict[stat].append('')

    # replace the exercise ids with their full names
    exercises = get_exercise_cache()
    ex_titles = []
    for ex in ex_ids:
        ex_titles.append(exercises[ex]['title'])

    # provide a list of test options to view for this group/facility combo
    if group_id:
        test_logs = TestLog.objects.filter(user__group=group_id)
    else:
        # covers the all/no groups case
        test_logs = TestLog.objects.filter(user__facility=facility)
    test_objects = test_resource._read_tests()
    unique_test_ids = set([test_log.test for test_log in test_logs])
    test_options = [{
        'id':
        obj.test_id,
        'url':
        reverse('test_detail_view', kwargs={'test_id': obj.test_id}),
        'title':
        obj.title
    } for obj in test_objects if obj.test_id in unique_test_ids]
    context = plotting_metadata_context(request, facility=facility)
    context.update({
        "test_obj": test_obj,
        "ex_cols": ex_titles,
        "results_table": results_table,
        "stats_dict": stats_dict,
        "test_options": test_options,
    })
    return context
Пример #29
0
def compute_data(data_types, who, where, language=settings.LANGUAGE_CODE):
    """
    Compute the data in "data_types" for each user in "who", for the topics selected by "where"

    who: list of users
    where: topic_path
    data_types can include:
        pct_mastery
        effort
        attempts
    """

    # None indicates that the data hasn't been queried yet.
    #   We'll query it on demand, for efficiency
    topics = None
    exercises = None
    videos = None

    # Initialize an empty dictionary of data, video logs, exercise logs, for each user
    data = OrderedDict(
        zip([w.id for w in who],
            [dict()
             for i in range(len(who))]))  # maintain the order of the users
    vid_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))]))
    ex_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))]))
    if UserLog.is_enabled():
        activity_logs = dict(
            zip([w.id for w in who], [[] for i in range(len(who))]))

    # Set up queries (but don't run them), so we have really easy aliases.
    #   Only do them if they haven't been done yet (tell this by passing in a value to the lambda function)
    # Topics: topics.
    # Exercises: names (ids for ExerciseLog objects)
    # Videos: video_id (ids for VideoLog objects)

    # This lambda partial creates a function to return all items with a particular path from the NODE_CACHE.
    search_fun_single_path = partial(lambda t, p: t["path"].startswith(p),
                                     p=tuple(where))
    # This lambda partial creates a function to return all items with paths matching a list of paths from NODE_CACHE.
    search_fun_multi_path = partial(
        lambda ts, p: any([t["path"].startswith(p) for t in ts]),
        p=tuple(where))
    # Functions that use the functions defined above to return topics, exercises, and videos based on paths.
    query_topics = partial(lambda t, sf: t if t is not None else [
        t["id"]
        for t in filter(sf,
                        get_node_cache('Topic', language=language).values())
    ],
                           sf=search_fun_single_path)
    query_exercises = partial(lambda e, sf: e if e is not None else [
        ex["id"]
        for ex in filter(sf,
                         get_exercise_cache(language=language).values())
    ],
                              sf=search_fun_single_path)
    query_videos = partial(lambda v, sf: v if v is not None else [
        vid["id"] for vid in filter(
            sf,
            get_node_cache('Content', language=language).values())
    ],
                           sf=search_fun_single_path)

    # No users, don't bother.
    if len(who) > 0:

        # Query out all exercises, videos, exercise logs, and video logs before looping to limit requests.
        # This means we could pull data for n-dimensional coach report displays with the same number of requests!
        # Note: User activity is polled inside the loop, to prevent possible slowdown for exercise and video reports.
        exercises = query_exercises(exercises)

        videos = query_videos(videos)

        if exercises:
            ex_logs = query_logs(data.keys(), exercises, "exercise", ex_logs)

        if videos:
            vid_logs = query_logs(data.keys(), videos, "video", vid_logs)

        for data_type in (data_types if not hasattr(data_types, "lower") else [
                data_types
        ]):  # convert list from string, if necessary
            if data_type in data[data.keys(
            )[0]]:  # if the first user has it, then all do; no need to calc again.
                continue

            #
            # These are summary stats: you only get one per user
            #
            if data_type == "pct_mastery":

                # Efficient query out, spread out to dict
                for user in data.keys():
                    data[user][
                        data_type] = 0 if not ex_logs[user] else 100. * sum(
                            [el['complete']
                             for el in ex_logs[user]]) / float(len(exercises))

            elif data_type == "effort":
                if "ex:attempts" in data[data.keys(
                )[0]] and "vid:total_seconds_watched" in data[data.keys()[0]]:
                    # exercises and videos would be initialized already
                    for user in data.keys():
                        avg_attempts = 0 if len(exercises) == 0 else sum(
                            data[user]["ex:attempts"].values()) / float(
                                len(exercises))
                        avg_video_points = 0 if len(videos) == 0 else sum(
                            data[user]["vid:total_seconds_watched"].values(
                            )) / float(len(videos))
                        data[user][data_type] = 100. * (
                            0.5 * avg_attempts / 10. +
                            0.5 * avg_video_points / 750.)
                else:
                    data_types += [
                        "ex:attempts", "vid:total_seconds_watched", "effort"
                    ]

            #
            # These are detail stats: you get many per user
            #
            # Just querying out data directly: Video
            elif data_type.startswith("vid:") and data_type[4:] in [
                    f.name for f in VideoLog._meta.fields
            ]:

                for user in data.keys():
                    data[user][data_type] = OrderedDict([
                        (v['video_id'], v[data_type[4:]])
                        for v in vid_logs[user]
                    ])

            # Just querying out data directly: Exercise
            elif data_type.startswith("ex:") and data_type[3:] in [
                    f.name for f in ExerciseLog._meta.fields
            ]:

                for user in data.keys():
                    data[user][data_type] = OrderedDict([
                        (el['exercise_id'], el[data_type[3:]])
                        for el in ex_logs[user]
                    ])

            # User Log Queries
            elif data_type.startswith("user:"******"", "activity",
                                           activity_logs)

                for user in data.keys():
                    data[user][data_type] = [
                        log[data_type[5:]] for log in activity_logs[user]
                    ]

            # User Summary Queries
            elif data_type.startswith("usersum:") and data_type[8:] in [
                    f.name for f in UserLogSummary._meta.fields
            ] and UserLog.is_enabled():

                activity_logs = query_logs(data.keys(), "", "summaryactivity",
                                           activity_logs)

                for user in data.keys():
                    data[user][data_type] = sum(
                        [log[data_type[8:]] for log in activity_logs[user]])
            # Unknown requested quantity
            else:
                raise Exception(
                    "Unknown type: '%s' not in %s" %
                    (data_type, str([f.name
                                     for f in ExerciseLog._meta.fields])))

    # Returning empty list instead of None allows javascript on client
    # side to read 'length' property without error.
    exercises = exercises or []

    videos = videos or []

    return {
        "data": data,
        "topics": topics,
        "exercises": exercises,
        "videos": videos,
    }
Пример #30
0
def learner_logs(request):

    lang = request.language
    page = request.GET.get("page", 1)

    limit = request.GET.get("limit", 50)

    # Look back a week by default
    time_window = request.GET.get("time_window", 7)

    start_date = request.GET.get("start_date", None)

    end_date = request.GET.get("end_date", None)

    topic_ids = request.GET.getlist("topic_id", [])

    learners = get_learners_from_GET(request)

    pages = int(ceil(len(learners)/float(limit)))

    if page*limit < len(learners):

        learners = learners[(page - 1)*limit: page*limit]

    log_types = request.GET.getlist("log_type", ["exercise", "video", "content"])

    output_logs = []

    output_objects = []

    end_date = datetime.datetime.strptime(end_date,'%Y/%m/%d') if end_date else datetime.datetime.now()

    start_date = datetime.datetime.strptime(start_date,'%Y/%m/%d') if start_date else end_date - datetime.timedelta(time_window)

    for log_type in log_types:
        LogModel, fields, id_field, obj_ids, objects = return_log_type_details(log_type, topic_ids)

        log_objects = LogModel.objects.filter(user__in=learners, **obj_ids).values(*fields)
        if not topic_ids:
            topic_objects = log_objects.filter(latest_activity_timestamp__gte=start_date, latest_activity_timestamp__lte=end_date)
            if topic_objects.count() == 0:
                topic_objects = log_objects
            objects = dict([(obj[id_field], get_content_cache(language=lang).get(obj[id_field], get_exercise_cache(language=lang).get(obj[id_field]))) for obj in topic_objects]).values()
        output_objects.extend(objects)
        output_logs.extend(log_objects)

    return JsonResponse({
        "logs": output_logs,
        "contents": output_objects,
        # Sometimes 'learners' gets collapsed to a list from the Queryset. This insures against that eventuality.
        "learners": [{
            "first_name": learner.first_name,
            "last_name": learner.last_name,
            "username": learner.username,
            "pk": learner.pk
            } for learner in learners],
        "page": page,
        "pages": pages,
        "limit": limit
    })
Пример #31
0
def _get_user_usage_data(users, groups=None, period_start=None, period_end=None, group_id=None):
    """
    Returns facility user data, within the given date range.
    """

    groups = groups or set([user.group for user in users])

    # compute period start and end
    # Now compute stats, based on queried data
    num_exercises = len(get_exercise_cache())
    user_data = OrderedDict()
    group_data = OrderedDict()

    # Make queries efficiently
    exercise_logs = ExerciseLog.objects.filter(user__in=users, complete=True)
    video_logs = VideoLog.objects.filter(user__in=users, total_seconds_watched__gt=0)
    login_logs = UserLogSummary.objects.filter(user__in=users)

    # filter results
    login_logs = login_logs.filter(total_seconds__gt=0)
    if period_start:
        exercise_logs = exercise_logs.filter(completion_timestamp__gte=period_start)
        video_logs = video_logs.filter(completion_timestamp__gte=period_start)
    if period_end:
        # MUST: Fix the midnight bug where period end covers up to the prior day only because
        # period end is datetime(year, month, day, hour=0, minute=0), meaning midnight of previous day.
        # Example:
        #   If period_end == '2014-12-01', we cannot include the records dated '2014-12-01 09:30'.
        #   So to fix this, we change it to '2014-12-01 23:59.999999'.
        period_end = dateutil.parser.parse(period_end)
        period_end = period_end + dateutil.relativedelta.relativedelta(days=+1, microseconds=-1)
        exercise_logs = exercise_logs.filter(completion_timestamp__lte=period_end)
        video_logs = video_logs.filter(completion_timestamp__lte=period_end)
    if period_start and period_end:
        exercise_logs = exercise_logs.filter(Q(completion_timestamp__gte=period_start) &
                                             Q(completion_timestamp__lte=period_end))

        q1 = Q(completion_timestamp__isnull=False) & \
            Q(completion_timestamp__gte=period_start) & \
            Q(completion_timestamp__lte=period_end)
        video_logs = video_logs.filter(q1)

        login_q1 = Q(start_datetime__gte=period_start) & Q(start_datetime__lte=period_end) & \
            Q(end_datetime__gte=period_start) & Q(end_datetime__lte=period_end)
        login_logs = login_logs.filter(login_q1)
    # Force results in a single query
    exercise_logs = list(exercise_logs.values("exercise_id", "user__pk"))
    video_logs = list(video_logs.values("video_id", "user__pk"))
    login_logs = list(login_logs.values("activity_type", "total_seconds", "user__pk"))

    for user in users:
        user_data[user.pk] = OrderedDict()
        user_data[user.pk]["id"] = user.pk
        user_data[user.pk]["first_name"] = user.first_name
        user_data[user.pk]["last_name"] = user.last_name
        user_data[user.pk]["username"] = user.username
        user_data[user.pk]["group"] = user.group

        user_data[user.pk]["total_report_views"] = 0#report_stats["count__sum"] or 0
        user_data[user.pk]["total_logins"] =0# login_stats["count__sum"] or 0
        user_data[user.pk]["total_hours"] = 0#login_stats["total_seconds__sum"] or 0)/3600.

        user_data[user.pk]["total_exercises"] = 0
        user_data[user.pk]["pct_mastery"] = 0.
        user_data[user.pk]["exercises_mastered"] = []

        user_data[user.pk]["total_videos"] = 0
        user_data[user.pk]["videos_watched"] = []


    for elog in exercise_logs:
        user_data[elog["user__pk"]]["total_exercises"] += 1
        user_data[elog["user__pk"]]["pct_mastery"] += 1. / num_exercises
        user_data[elog["user__pk"]]["exercises_mastered"].append(elog["exercise_id"])

    for vlog in video_logs:
        user_data[vlog["user__pk"]]["total_videos"] += 1
        user_data[vlog["user__pk"]]["videos_watched"].append(vlog["video_id"])

    for llog in login_logs:
        if llog["activity_type"] == UserLog.get_activity_int("coachreport"):
            user_data[llog["user__pk"]]["total_report_views"] += 1
        elif llog["activity_type"] == UserLog.get_activity_int("login"):
            user_data[llog["user__pk"]]["total_hours"] += (llog["total_seconds"]) / 3600.
            user_data[llog["user__pk"]]["total_logins"] += 1

    for group in list(groups) + [None]*(group_id==None or group_id==UNGROUPED):  # None for ungrouped, if no group_id passed.
        group_pk = getattr(group, "pk", None)
        group_name = getattr(group, "name", _(UNGROUPED))
        group_title = getattr(group, "title", _(UNGROUPED))
        group_data[group_pk] = {
            "id": group_pk,
            "name": group_name,
            "title": group_title,
            "total_logins": 0,
            "total_hours": 0,
            "total_users": 0,
            "total_videos": 0,
            "total_exercises": 0,
            "pct_mastery": 0,
        }

    # Add group data.  Allow a fake group UNGROUPED
    for user in users:
        group_pk = getattr(user.group, "pk", None)
        if group_pk not in group_data:
            logging.error("User %s still in nonexistent group %s!" % (user.id, group_pk))
            continue
        group_data[group_pk]["total_users"] += 1
        group_data[group_pk]["total_logins"] += user_data[user.pk]["total_logins"]
        group_data[group_pk]["total_hours"] += user_data[user.pk]["total_hours"]
        group_data[group_pk]["total_videos"] += user_data[user.pk]["total_videos"]
        group_data[group_pk]["total_exercises"] += user_data[user.pk]["total_exercises"]

        total_mastery_so_far = (group_data[group_pk]["pct_mastery"] * (group_data[group_pk]["total_users"] - 1) + user_data[user.pk]["pct_mastery"])
        group_data[group_pk]["pct_mastery"] =  total_mastery_so_far / group_data[group_pk]["total_users"]

    if len(group_data) == 1 and group_data.has_key(None):
        if not group_data[None]["total_users"]:
            del group_data[None]

    return (user_data, group_data)
Пример #32
0
    def handle(self, *args, **options):
        if len(args) != 1:
            raise CommandError("Takes exactly 1 argument")

        dest_file = os.path.abspath(args[0])

        logger.info("Starting up KA Lite export2zim command")
        beginning = datetime.now()
        logger.info("Begin: {}".format(beginning))

        language = options.get('language')
        if not language:
            raise CommandError("Must specify a language!")

        if not options.get('tmp_dir'):
            tmp_dir = os.path.join(tempfile.gettempdir(),
                                   'ka-lite-zim_{}'.format(language))
        else:
            tmp_dir = options.get('tmp_dir')

        tmp_dir = os.path.abspath(tmp_dir)

        if os.path.exists(tmp_dir) and os.listdir(tmp_dir):
            if options['clear']:
                logger.info("Clearing directory {}".format(tmp_dir))
                shutil.rmtree(tmp_dir)
            elif options['resume']:
                logger.info(
                    "Resuming in dirty tmp directory {}".format(tmp_dir))
            else:
                raise CommandError(
                    "{} not empty, use the -c option to clean it, -r to resume, or use an empty destination directory."
                    .format(tmp_dir))

        zimwriterfs = options.get("zimwriterfs", None)
        publisher = options.get("publisher")
        transcode2webm = options.get("transcode2webm")
        ffmpeg = find_executable("ffmpeg")

        if not ffmpeg:
            logger.warning(
                "FFMpeg not found in your path, you won't be able to create missing thumbnails or transcode to webm."
            )

        if not zimwriterfs:
            zimwriterfs = find_executable("zimwriterfs")
            if not zimwriterfs:
                raise CommandError(
                    "Could not find zimwriterfs in your path, try specifying --zimwriterfs=/path"
                )

        if not os.path.exists(zimwriterfs):
            raise CommandError("Invalid --zimwriterfs")

        from kalite_zim import __name__ as base_path
        base_path = os.path.abspath(base_path)
        data_path = os.path.join(base_path, 'data')

        # Where subtitles are found in KA Lite
        subtitle_src_dir = i18n.get_srt_path(language)

        logger.info("Will export videos for language: {}".format(language))
        logger.info("Preparing KA Lite topic tree...")

        # Use live data
        if not options.get('test'):
            # This way of doing things will be deprecated in KA Lite 0.16
            topic_tree_json_path = topic_tools_settings.TOPICS_FILEPATHS.get(
                'khan')
            content_cache = get_content_cache(language=language, annotate=True)
            exercise_cache = get_exercise_cache(language=language)
        # Use test data
        else:
            topic_tree_json_path = os.path.join(data_path, 'test_topics.json')
            content_cache = json.load(
                open(os.path.join(data_path, 'test_content.json')))
            exercise_cache = json.load(
                open(os.path.join(data_path, 'test_exercise.json')))

        topic_tree = softload_json(topic_tree_json_path,
                                   logger=logger.debug,
                                   raises=False)

        content_json_output = {}
        exercise_json_output = {}

        def annotate_tree(topic, depth=0, parent=None):
            """
            We need to recurse into the tree in order to annotate elements
            with topic data and exercise data
            """
            children = topic.get('children', [])
            new_children = []
            for child_topic in children:
                if child_topic.get("kind") in ("Video", "Topic"):
                    annotate_tree(child_topic, depth=depth + 1, parent=topic)
                    new_children.append(child_topic)
            topic["children"] = new_children
            if topic.get("kind") == "Exercise":
                topic['exercise'] = exercise_cache.get(topic.get("id"), {})
                exercise_json_output[topic.get("id")] = topic['exercise']
            elif topic.get("kind") == "Topic":
                pass
            else:
                topic['exercise'] = None
                topic['content'] = content_cache.get(topic.get("id"), {})
                content_json_output[topic.get("id")] = topic['content']
                if not topic['content']:
                    logger.error('No content!?, id is: {}'.format(
                        topic.get('id')))

            # Translate everything for good measure
            with i18n.translate_block(language):
                topic["title"] = _(topic.get("title", ""))
                topic["description"] = _(topic.get(
                    "description", "")) if topic.get("description") else ""

            topic["url"] = topic["id"] + ".html"
            topic["parent"] = parent
            topic["depth"] = depth
            for key in ("child_data", "keywords", "hide", "contains"):
                topic.pop(key, None)

        # 1. Annotate a topic tree
        annotate_tree(topic_tree)

        # 2. Now go through the tree and copy each element into the destination
        # zim file system

        def copy_media(node):
            if node['kind'] == 'Topic':
                # Don't do anything if it's a topic
                pass
            elif node['kind'] == 'Exercise':
                # Exercises cannot be displayed
                node["content"]["available"] = False
            elif node['kind'] == 'Video':

                if node['content']['format'] == "webm":
                    logger.warning(
                        "Found a duplicate ID for {}, re-downloading".format(
                            node['id']))
                    node['content']['format'] = "mp4"

                # Available is False by default until we locate the file
                node["content"]["available"] = False
                node_dir = os.path.join(tmp_dir, node["path"])
                if not os.path.exists(node_dir):
                    os.makedirs(node_dir)
                video_file_name = node['id'] + '.' + node['content']['format']
                thumb_file_name = node['id'] + '.png'
                video_file_src = os.path.join(CONTENT_ROOT, video_file_name)
                video_file_dest = os.path.join(node_dir, video_file_name)
                thumb_file_src = os.path.join(CONTENT_ROOT, thumb_file_name)
                thumb_file_dest = os.path.join(node_dir, thumb_file_name)

                if options['download'] and not os.path.exists(video_file_src):
                    logger.info("Video file being downloaded to: {}".format(
                        video_file_src))
                    download_video(
                        node['content']['youtube_id'],
                        node['content']['format'],
                        CONTENT_ROOT,
                    )

                if os.path.exists(video_file_src):
                    if transcode2webm:
                        ffmpeg_pass_log = "/tmp/logfile_vp8.fpf"
                        if os.path.isfile(ffmpeg_pass_log):
                            os.unlink(ffmpeg_pass_log)
                        video_file_name = node['id'] + '.webm'
                        video_file_dest = os.path.join(node_dir,
                                                       video_file_name)
                        if os.path.isfile(video_file_dest):
                            logger.info(
                                "Already encoded: {}".format(video_file_dest))
                        else:
                            ffmpeg_base_args = [
                                ffmpeg,
                                "-i",
                                video_file_src,
                                "-codec:v",
                                "libvpx",
                                "-quality",
                                "best",
                                "-cpu-used",
                                "0",
                                "-b:v",
                                "300k",
                                "-qmin",
                                "10",  # 10=lowest value
                                "-qmax",
                                "35",  # 42=highest value
                                "-maxrate",
                                "300k",
                                "-bufsize",
                                "600k",
                                "-threads",
                                "8",
                                # "-vf", "scale=-1",
                                "-codec:a",
                                "libvorbis",
                                # "-b:a", "128k",
                                "-aq",
                                "5",
                                "-f",
                                "webm",
                            ]
                            ffmpeg_pass1 = ffmpeg_base_args + [
                                "-an",  # Disables audio, no effect first pass
                                "-pass",
                                "1",
                                "-passlogfile",
                                ffmpeg_pass_log,
                                video_file_dest,
                            ]
                            ffmpeg_pass2 = ffmpeg_base_args + [
                                "-pass",
                                "2",
                                "-y",
                                "-passlogfile",
                                ffmpeg_pass_log,
                                video_file_dest,
                            ]
                            for cmd in (ffmpeg_pass1, ffmpeg_pass2):
                                process = subprocess.Popen(
                                    cmd, stdout=subprocess.PIPE)
                                stdout_data, _stderr_data = process.communicate(
                                )
                                if process.returncode != 0:
                                    logger.error(
                                        "Error invoking ffmpeg: {}".format(
                                            (_stderr_data or "") +
                                            (stdout_data or "")))
                                    logger.error("Command was: {}".format(
                                        " ".join(cmd)))
                                    raise CommandError(
                                        "Could not complete transcoding")
                        node['content']['format'] = "webm"
                    else:
                        # If not transcoding, just link the original file
                        os.link(video_file_src, video_file_dest)
                    node["video_url"] = os.path.join(node["path"],
                                                     video_file_name)
                    copy_media.videos_found += 1
                    logger.info("Videos processed: {}".format(
                        copy_media.videos_found))
                    node["content"]["available"] = True

                    # Create thumbnail if it wasn't downloaded
                    if not os.path.exists(thumb_file_src):
                        fp = create_thumbnail(video_file_src,
                                              output_format="png")
                        if fp is None:
                            logger.error(
                                "Failed to create thumbnail for {}".format(
                                    video_file_src))
                        else:
                            logger.info(
                                "Successfully created thumbnail for {}".format(
                                    video_file_src))
                            file(thumb_file_src, 'wb').write(fp.read())

                    # Handle thumbnail
                    if os.path.exists(thumb_file_src):
                        node["thumbnail_url"] = os.path.join(
                            node["path"], node['id'] + '.png')
                        if not os.path.exists(thumb_file_dest):
                            os.link(thumb_file_src, thumb_file_dest)
                    else:
                        node["thumbnail_url"] = None

                    subtitle_srt = os.path.join(subtitle_src_dir,
                                                node['id'] + '.srt')
                    if os.path.isfile(subtitle_srt):
                        subtitle_vtt = os.path.join(node_dir,
                                                    node['id'] + '.vtt')
                        # Convert to .vtt because this format is understood
                        # by latest video.js and the old ones that read
                        # .srt don't work with newer jquery etc.
                        submarine_parser(subtitle_srt, subtitle_vtt)
                        if not os.path.exists(subtitle_vtt):
                            logger.warning("Subtitle not converted: {}".format(
                                subtitle_srt))
                        else:
                            logger.info(
                                "Subtitle convert from SRT to VTT: {}".format(
                                    subtitle_vtt))
                            node["subtitle_url"] = os.path.join(
                                node["path"], node['id'] + '.vtt')

                else:
                    if options['download']:
                        logger.error("File not found or downloaded: {}".format(
                            video_file_src))
            else:
                logger.error("Invalid node, kind: {}".format(
                    node.get("kind", None)))
                # Exercises cannot be displayed
                node["content"] = {"available": False}

            new_children = []
            for child in node.get('children', []):
                copy_media(child)
                empty_topic = child["kind"] == "Topic" and not child.get(
                    "children", [])
                unavailable_video = child["kind"] == "Video" and not child.get(
                    "content", {}).get("available", False)
                if not (empty_topic or unavailable_video):
                    new_children.append(child)
            node['children'] = new_children

        copy_media.videos_found = 0

        def render_topic_pages(node):

            parents = [node] if node.get("children") else []
            parent = node["parent"]
            while parent:
                parents.append(parent)
                parent = parent["parent"]

            # Finally, render templates into the destination
            template_context = {
                "topic_tree": topic_tree,
                "topic": node,
                "parents": parents
            }
            with i18n.translate_block(language):
                topic_html = render_to_string("kalite_zim/topic.html",
                                              template_context)
            # Replace absolute references to '/static' with relative
            topic_html = topic_html.replace("/static", "static")

            dest_html = os.path.join(tmp_dir, node["id"] + ".html")
            logger.info("Rendering {}".format(dest_html))

            open(dest_html, "w").write(topic_html)

            render_topic_pages.pages_rendered += 1

            for child in node.get('children', []):
                render_topic_pages(child)

        render_topic_pages.pages_rendered = 0

        logger.info("Hard linking video files from KA Lite...")
        copy_media(topic_tree)

        sys.stderr.write("\n")
        logger.info("Done!")

        # Configure django-compressor
        compressor_init(os.path.join(base_path, 'static'))

        # Finally, render templates into the destination
        template_context = {
            "topic_tree": topic_tree,
            "welcome": True,
        }

        with i18n.translate_block(language):
            welcome_html = render_to_string("kalite_zim/welcome.html",
                                            template_context)
            about_html = render_to_string("kalite_zim/about.html",
                                          template_context)
        # Replace absolute references to '/static' with relative
        welcome_html = welcome_html.replace("/static", "static")
        about_html = about_html.replace("/static", "static")

        # Write the welcome.html file
        open(os.path.join(tmp_dir, 'welcome.html'), 'w').write(welcome_html)
        open(os.path.join(tmp_dir, 'about.html'), 'w').write(about_html)

        # Render all topic html files
        render_topic_pages(topic_tree)

        # Copy in static data after it's been handled by django compressor
        # (this happens during template rendering)

        shutil.copytree(os.path.join(base_path, 'static'),
                        os.path.join(tmp_dir, 'static'))

        ending = datetime.now()
        duration = int((ending - beginning).total_seconds())
        logger.info("Total number of videos found: {}".format(
            copy_media.videos_found))
        logger.info("Total number of topic pages created: {}".format(
            render_topic_pages.pages_rendered))

        logger.info("Invoking zimwriterfs, writing to: {}".format(dest_file))

        zimwriterfs_args = (
            zimwriterfs,
            "--welcome",
            "welcome.html",
            "--favicon",
            "static/img/ka_leaf.png",
            "--publisher",
            publisher,
            "--creator",
            "KhanAcademy.org",
            "--description",
            "Khan Academy ({})".format(language),
            "--description",
            "Videos from Khan Academy",
            "--language",
            language,
            tmp_dir,
            dest_file,
        )

        process = subprocess.Popen(zimwriterfs_args, stdout=subprocess.PIPE)
        stdout_data, _stderr_data = process.communicate()

        if process.returncode != 0:
            logger.error("Error invoking zimwriterfs: {}").format(
                _stderr_data + stdout_data)

        logger.info("Duration: {h:} hours, {m:} minutes, {s:} seconds".format(
            h=duration // 3600,
            m=(duration % 3600) // 60,
            s=duration % 60,
        ))