示例#1
0
 def create_random_video_file(self):
     """
     Helper function for testing video files.
     """
     video_id = get_node_cache("Video").keys()[0]
     youtube_id = get_node_cache("Video")[video_id]["youtube_id"]
     fake_video_file = os.path.join(settings.CONTENT_ROOT, "%s.mp4" % youtube_id)
     with open(fake_video_file, "w") as fh:
         fh.write("")
     self.assertTrue(os.path.exists(fake_video_file), "Make sure the video file was created, youtube_id='%s'." % youtube_id)
     return (fake_video_file, video_id, youtube_id)
示例#2
0
def get_video_page_paths(video_id=None):
    try:
        return [
            n["path"] for n in topic_tools.get_node_cache("Video")[video_id]
        ]
    except:
        return []
示例#3
0
def update_all_distributed_callback(request):
    """
    """

    if request.method != "POST":
        raise PermissionDenied("Only POST allowed to this URL endpoint.")

    videos = json.loads(request.POST["video_logs"])
    exercises = json.loads(request.POST["exercise_logs"])
    user = FacilityUser.objects.get(id=request.POST["user_id"])
    node_cache = get_node_cache()
    # Save videos
    n_videos_uploaded = 0
    for video in videos:
        video_id = video['video_id']
        youtube_id = video['youtube_id']

        # Only save video logs for videos that we recognize.
        if video_id not in node_cache["Video"]:
            logging.warn("Skipping unknown video %s" % video_id)
            continue

        try:
            (vl, _) = VideoLog.get_or_initialize(user=user, video_id=video_id, youtube_id=youtube_id)
            for key,val in video.iteritems():
                setattr(vl, key, val)
            logging.debug("Saving video log for %s: %s" % (video_id, vl))
            vl.save()
            n_videos_uploaded += 1
        except KeyError:  #
            logging.error("Could not save video log for data with missing values: %s" % video)
        except Exception as e:
            error_message = "Unexpected error importing videos: %s" % e
            return JsonResponse({"error": error_message}, status=500)

    # Save exercises
    n_exercises_uploaded = 0
    for exercise in exercises:
        # Only save video logs for videos that we recognize.
        if exercise['exercise_id'] not in node_cache['Exercise']:
            logging.warn("Skipping unknown video %s" % exercise['exercise_id'])
            continue

        try:
            (el, _) = ExerciseLog.get_or_initialize(user=user, exercise_id=exercise["exercise_id"])
            for key,val in exercise.iteritems():
                setattr(el, key, val)
            logging.debug("Saving exercise log for %s: %s" % (exercise['exercise_id'], el))
            el.save()
            n_exercises_uploaded += 1
        except KeyError:
            logging.error("Could not save exercise log for data with missing values: %s" % exercise)
        except Exception as e:
            error_message = "Unexpected error importing exercises: %s" % e
            return JsonResponse({"error": error_message}, status=500)

    return JsonResponse({"success": "Uploaded %d exercises and %d videos" % (n_exercises_uploaded, n_videos_uploaded)})
示例#4
0
    def show_cache(self, force=False):
        """Go through each cacheable page, and show which are cached and which are NOT"""

        for node_type in ['Topic', 'Video', 'Exercise']:
            self.stdout.write("Cached %ss:\n" % node_type)
            for narr in topic_tools.get_node_cache(node_type).values():
                for n in narr:
                    if caching.has_cache_key(path=n["path"]):
                        self.stdout.write("\t%s\n" % n["path"])
示例#5
0
def get_video_page_paths(video_id=None, video_slug=None):
    assert (video_id or video_slug) and not (video_id and video_slug), "One arg, not two" 

    try:
        if not video_slug:
            video_slug = topic_tools.get_id2slug_map()[video_id]
        
        return topic_tools.get_node_cache("Video")[video_slug]['paths']
    except:
        return []
def get_exercise_page_paths(video_id=None):

    try:
        exercise_paths = set()
        for exercise in topic_tools.get_related_exercises(videos=topic_tools.get_node_cache("Video")[video_id]):
            exercise_paths = exercise_paths.union(set([exercise["path"]]))
        return list(exercise_paths)
    except Exception as e:
        logging.debug("Exception while getting exercise paths: %s" % e)
        return []
示例#7
0
def get_exercise_page_paths(video_id=None, video_slug=None):
    assert (video_id or video_slug) and not (video_id and video_slug), "One arg, not two" 

    try:
        exercise_paths = set()
        for exercise in get_related_exercises(video=topic_tools.get_node_cache("Video")[video_slug]):
            exercise_paths = exercise_paths.union(set(exercise["paths"]))
        return list(exercise_paths)
    except:
        return []
示例#8
0
def get_video_page_paths(video_id=None, video_slug=None):
    assert (video_id or
            video_slug) and not (video_id and video_slug), "One arg, not two"

    try:
        if not video_slug:
            video_slug = topic_tools.get_id2slug_map()[video_id]

        return topic_tools.get_node_cache("Video")[video_slug]['paths']
    except:
        return []
示例#9
0
def get_exercise_page_paths(video_id=None):

    try:
        exercise_paths = set()
        for exercise in topic_tools.get_related_exercises(
                videos=topic_tools.get_node_cache("Video")[video_id]):
            exercise_paths = exercise_paths.union(set([exercise["path"]]))
        return list(exercise_paths)
    except Exception as e:
        logging.debug("Exception while getting exercise paths: %s" % e)
        return []
示例#10
0
def get_exercise_page_paths(video_id=None, video_slug=None):
    assert (video_id or
            video_slug) and not (video_id and video_slug), "One arg, not two"

    try:
        exercise_paths = set()
        for exercise in get_related_exercises(
                video=topic_tools.get_node_cache("Video")[video_slug]):
            exercise_paths = exercise_paths.union(set(exercise["paths"]))
        return list(exercise_paths)
    except:
        return []
    def handle(self, *args, **options):
        if settings.CENTRAL_SERVER:
            raise CommandError("This must only be run on the distributed server.")

        if not options["lang_code"]:
            raise CommandError("You must specify a language code.")

        # Get list of exercises
        lang_code = lcode_to_ietf(options["lang_code"])
        exercise_ids = options["exercise_ids"].split(",") if options["exercise_ids"] else None
        exercise_ids = exercise_ids or ([ex["id"] for ex in get_topic_exercises(topic_id=options["topic_id"])] if options["topic_id"] else None)
        exercise_ids = exercise_ids or get_node_cache("Exercise").keys()

        # Download the exercises
        for exercise_id in exercise_ids:
            scrape_exercise(exercise_id=exercise_id, lang_code=lang_code, force=options["force"])

        logging.info("Process complete.")
示例#12
0
def create_youtube_id_to_slug_map(node_cache=None, data_path=settings.PROJECT_PATH + "/static/data/"):
    """
    Go through all videos, and make a map of youtube_id to slug, for fast look-up later
    """

    if not node_cache:
        node_cache = topic_tools.get_node_cache(force=True)

    map_file = os.path.join(data_path, topic_tools.video_remap_file)
    id2slug_map = dict()

    # Make a map from youtube ID to video slug
    for v in node_cache['Video'].values():
        assert v["youtube_id"] not in id2slug_map, "Make sure there's a 1-to-1 mapping between youtube_id and slug"
        id2slug_map[v['youtube_id']] = v['slug']

    # Save the map!
    with open(map_file, "w") as fp:
        fp.write(json.dumps(id2slug_map, indent=2))
示例#13
0
def create_youtube_id_to_slug_map(node_cache=None,
                                  data_path=settings.PROJECT_PATH +
                                  "/static/data/"):
    """
    Go through all videos, and make a map of youtube_id to slug, for fast look-up later
    """

    if not node_cache:
        node_cache = topic_tools.get_node_cache(force=True)

    map_file = os.path.join(data_path, topic_tools.video_remap_file)
    id2slug_map = dict()

    # Make a map from youtube ID to video slug
    for v in node_cache['Video'].values():
        assert v[
            "youtube_id"] not in id2slug_map, "Make sure there's a 1-to-1 mapping between youtube_id and slug"
        id2slug_map[v['youtube_id']] = v['slug']

    # Save the map!
    with open(map_file, "w") as fp:
        fp.write(json.dumps(id2slug_map, indent=2))
    def handle(self, *args, **options):
        if settings.CENTRAL_SERVER:
            raise CommandError("This must only be run on the distributed server.")

        if not options["lang_code"]:
            raise CommandError("You must specify a language code.")

        # Get list of videos
        lang_code = lcode_to_ietf(options["lang_code"])
        video_map = get_dubbed_video_map(lang_code) or {}
        video_ids = options["video_ids"].split(",") if options["video_ids"] else None
        video_ids = video_ids or (
            [vid["id"] for vid in get_topic_videos(topic_id=options["topic_id"])] if options["topic_id"] else None
        )
        video_ids = video_ids or video_map.keys()

        # Download the videos
        for video_id in video_ids:
            if video_id in video_map:
                youtube_id = video_map[video_id]

            elif video_id in video_map.values():
                # Perhaps they sent in a youtube ID?  We can handle that!
                youtube_id = video_id
            else:
                logging.error("No mapping for video_id=%s; skipping" % video_id)
                continue

            try:
                scrape_video(youtube_id=youtube_id, format=options["format"], force=options["force"])
                # scrape_thumbnail(youtube_id=youtube_id)
                logging.info("Access video %s at %s" % (youtube_id, get_node_cache("Video")[video_id][0]["path"]))
            except Exception as e:
                logging.error("Failed to download video %s: %s" % (youtube_id, e))

        logging.info("Process complete.")
    def _setup(self, num_logs=50, **kwargs):
        super(OneHundredRandomLogUpdates, self)._setup(**kwargs)
        node_cache = get_node_cache()

        try:
            self.user = FacilityUser.objects.get(username=self.username)
        except:
            #take username from ExerciseLog
            all_exercises = ExerciseLog.objects.all()
            self.user = FacilityUser.objects.get(id=all_exercises[0].user_id)
            print self.username, " not in FacilityUsers, using ", self.user
        self.num_logs = num_logs
        #give the platform a chance to cache the logs
        ExerciseLog.objects.filter(user=self.user).delete()
        for x in range(num_logs):
            while True:
                ex_idx = int(self.random.random() * len(node_cache["Exercise"].keys()))
                ex_id = node_cache["Exercise"].keys()[ex_idx]
                if not ExerciseLog.objects.filter(user=self.user, exercise_id=ex_id):
                    break
            ex = ExerciseLog(user=self.user, exercise_id=ex_id)
            ex.save()
        self.exercise_list = ExerciseLog.objects.filter(user=self.user)
        self.exercise_count = self.exercise_list.count()

        VideoLog.objects.filter(user=self.user).delete()
        for x in range(num_logs):
            while True:
                vid_idx = int(self.random.random() * len(node_cache["Video"].keys()))
                vid_id = node_cache["Video"].keys()[vid_idx]
                if not VideoLog.objects.filter(user=self.user, video_id=vid_id):
                    break
            vid = VideoLog(user=self.user, video_id=vid_id)
            vid.save()
        self.video_list = VideoLog.objects.filter(user=self.user)
        self.video_count = self.video_list.count()
示例#16
0
def _get_user_usage_data(users, period_start=None, period_end=None):
    """
    Returns facility user data, within the given date range.
    """

    # compute period start and end
    # Now compute stats, based on queried data
    num_exercises = len(get_node_cache('Exercise'))
    user_data = OrderedDict()
    group_data = OrderedDict()


    # Make queries efficiently
    exercise_logs = ExerciseLog.objects.filter(user__in=users, complete=True)
    video_logs = VideoLog.objects.filter(user__in=users)
    login_logs = UserLogSummary.objects.filter(user__in=users)

    # filter results
    if period_start:
        exercise_logs = exercise_logs.filter(completion_timestamp__gte=period_start)
        video_logs = video_logs.filter(completion_timestamp__gte=period_start)
        login_logs = login_logs.filter(start_datetime__gte=period_start)
    if period_end:
        exercise_logs = exercise_logs.filter(completion_timestamp__lte=period_end)
        video_logs = video_logs.filter(completion_timestamp__lte=period_end)
        login_logs = login_logs.filter(end_datetime__lte=period_end)

    # Force results in a single query
    exercise_logs = list(exercise_logs.values("exercise_id", "user__pk"))
    video_logs = list(video_logs.values("video_id", "user__pk"))
    login_logs = list(login_logs.values("activity_type", "total_seconds", "user__pk"))

    for user in users:
        user_data[user.pk] = OrderedDict()
        user_data[user.pk]["first_name"] = user.first_name
        user_data[user.pk]["last_name"] = user.last_name
        user_data[user.pk]["username"] = user.username
        user_data[user.pk]["group"] = user.group


        user_data[user.pk]["total_report_views"] = 0#report_stats["count__sum"] or 0
        user_data[user.pk]["total_logins"] =0# login_stats["count__sum"] or 0
        user_data[user.pk]["total_hours"] = 0#login_stats["total_seconds__sum"] or 0)/3600.

        user_data[user.pk]["total_exercises"] = 0
        user_data[user.pk]["pct_mastery"] = 0.
        user_data[user.pk]["exercises_mastered"] = []

        user_data[user.pk]["total_videos"] = 0
        user_data[user.pk]["videos_watched"] = []


    for elog in exercise_logs:
        user_data[elog["user__pk"]]["total_exercises"] += 1
        user_data[elog["user__pk"]]["pct_mastery"] += 1. / num_exercises
        user_data[elog["user__pk"]]["exercises_mastered"].append(elog["exercise_id"])

    for vlog in video_logs:
        user_data[vlog["user__pk"]]["total_videos"] += 1
        user_data[vlog["user__pk"]]["videos_watched"].append(vlog["video_id"])

    for llog in login_logs:
        if llog["activity_type"] == UserLog.get_activity_int("coachreport"):
            user_data[llog["user__pk"]]["total_report_views"] += 1
        elif llog["activity_type"] == UserLog.get_activity_int("login"):
            user_data[llog["user__pk"]]["total_hours"] += (llog["total_seconds"]) / 3600.
            user_data[llog["user__pk"]]["total_logins"] += 1

    # Add group data.  Allow a fake group "Ungrouped"
    for user in users:
        group_pk = getattr(user.group, "pk", None)
        group_name = getattr(user.group, "name", _("Ungrouped"))
        if not group_pk in group_data:
            group_data[group_pk] = {
                "name": group_name,
                "total_logins": 0,
                "total_hours": 0,
                "total_users": 0,
                "total_videos": 0,
                "total_exercises": 0,
                "pct_mastery": 0,
            }
        group_data[group_pk]["total_users"] += 1
        group_data[group_pk]["total_logins"] += user_data[user.pk]["total_logins"]
        group_data[group_pk]["total_hours"] += user_data[user.pk]["total_hours"]
        group_data[group_pk]["total_videos"] += user_data[user.pk]["total_videos"]
        group_data[group_pk]["total_exercises"] += user_data[user.pk]["total_exercises"]

        total_mastery_so_far = (group_data[group_pk]["pct_mastery"] * (group_data[group_pk]["total_users"] - 1) + user_data[user.pk]["pct_mastery"])
        group_data[group_pk]["pct_mastery"] =  total_mastery_so_far / group_data[group_pk]["total_users"]

    return (user_data, group_data)
def generate_dubbed_video_mappings(download_url=DUBBED_VIDEOS_SPREADSHEET_CSV_URL, csv_data=None):
    """
    Function to do the heavy lifting in getting the dubbed videos map.

    Could be moved into utils
    """
    if not csv_data:
        logging.info("Downloading dubbed video data from %s" % download_url)
        response = requests.get(download_url)
        if response.status_code != 200:
            raise CommandError("Failed to download dubbed video CSV data: status=%s" % response.status)
        csv_data = response.content

    # This CSV file is in standard format: separated by ",", quoted by '"'
    logging.info("Parsing csv file.")
    reader = csv.reader(StringIO(csv_data))

    # Build a two-level video map.
    #   First key: language name
    #   Second key: english youtube ID
    #   Value: corresponding youtube ID in the new language.
    video_map = {}

    row_num = -1
    try:
        # Loop through each row in the spreadsheet.
        while (True):
            row_num += 1
            row = reader.next()


            if row_num < 5:
                # Rows 1-4 are crap.
                continue

            elif row_num == 5:
                # Row 5 is the header row.
                header_row = [v.lower() for v in row]  # lcase all header row values (including language names)
                slug_idx = header_row.index("titled id")
                english_idx = header_row.index("english")
                assert slug_idx != -1, "Video slug column header should be found."
                assert english_idx != -1, "English video column header should be found."

            else:
                # Rows 6 and beyond are data.
                assert len(row) == len(header_row), "Values line length equals headers line length"

                # Grab the slug and english video ID.
                video_slug = row[slug_idx]
                english_video_id = row[english_idx]
                assert english_video_id, "English Video ID should not be empty"
                assert video_slug, "Slug should not be empty"

                # English video is the first video ID column,
                #   and following columns (until the end) are other languages.
                # Loop through those columns and, if a video exists,
                #   add it to the dictionary.
                for idx in range(english_idx, len(row)):
                    if row[idx]:  # make sure there's a dubbed video
                        lang = header_row[idx]
                        if lang not in video_map:  # add the first level if it doesn't exist
                            video_map[lang] = {}
                        video_map[lang][english_video_id] = row[idx]  # add the corresponding video id for the video, in this language.

    except StopIteration:
        # The loop ends when the CSV file hits the end and throws a StopIteration
        pass

    # Now, validate the mappings with our topic data
    known_videos = get_node_cache("Video").keys()
    missing_videos = set(known_videos) - set(video_map["english"].keys())
    extra_videos = set(video_map["english"].keys()) - set(known_videos)
    if missing_videos:
        logging.warn("There are %d known videos not in the list of dubbed videos" % len(missing_videos))
    if extra_videos:
        logging.warn("There are %d videos in the list of dubbed videos that we have never heard of." % len(extra_videos))

    return (video_map, csv_data)
示例#18
0
 def create_cache(self, force=False):
     for node_type in ['Topic', 'Video', 'Exercise']:
         self.stdout.write("Caching %ss:\n" % node_type)
         for narr in topic_tools.get_node_cache(node_type).values():
             for n in narr:
                 self.create_page_cache(path=n["path"], force=force)
示例#19
0
from shared import topic_tools

TOPICS = topic_tools.get_topic_tree()
NODE_CACHE = topic_tools.get_node_cache()
SLUG2ID_MAP = topic_tools.get_slug2id_map()
 def clean_exercise_id(self):
     """
     Make sure the exercise ID is found.
     """
     if not self.cleaned_data.get("exercise_id", "") in get_node_cache('Exercise'):
         raise forms.ValidationError(_("Exercise ID not recognized"))
 def clean_video_id(self):
     """
     Make sure the video ID is found.
     """
     if self.cleaned_data["video_id"] not in get_node_cache("Video"):
         raise forms.ValidationError(_("Video ID not recognized."))
示例#22
0
def student_view_context(request, xaxis="pct_mastery", yaxis="ex:attempts"):
    """
    Context done separately, to be importable for similar pages.
    """
    user = get_user_from_request(request=request)
    if not user:
        raise Http404

    node_cache = get_node_cache()
    topic_ids = [t["id"] for t in get_knowledgemap_topics()]
    topics = [node_cache["Topic"][id][0] for id in topic_ids]

    user_id = user.id
    exercise_logs = list(ExerciseLog.objects \
        .filter(user=user) \
        .values("exercise_id", "complete", "points", "attempts", "streak_progress", "struggling", "completion_timestamp"))
    video_logs = list(VideoLog.objects \
        .filter(user=user) \
        .values("video_id", "complete", "total_seconds_watched", "points", "completion_timestamp"))

    exercise_sparklines = dict()
    stats = dict()
    topic_exercises = dict()
    topic_videos = dict()
    exercises_by_topic = dict()
    videos_by_topic = dict()

    # Categorize every exercise log into a "midlevel" exercise
    for elog in exercise_logs:
        if not elog["exercise_id"] in node_cache["Exercise"]:
            # Sometimes KA updates their topic tree and eliminates exercises;
            #   we also want to support 3rd party switching of trees arbitrarily.
            logging.debug("Skip unknown exercise log for %s/%s" % (user_id, elog["exercise_id"]))
            continue

        parent_ids = [topic for ex in node_cache["Exercise"][elog["exercise_id"]] for topic in ex["ancestor_ids"]]
        topic = set(parent_ids).intersection(set(topic_ids))
        if not topic:
            logging.error("Could not find a topic for exercise %s (parents=%s)" % (elog["exercise_id"], parent_ids))
            continue
        topic = topic.pop()
        if not topic in topic_exercises:
            topic_exercises[topic] = get_topic_exercises(path=node_cache["Topic"][topic][0]["path"])
        exercises_by_topic[topic] = exercises_by_topic.get(topic, []) + [elog]

    # Categorize every video log into a "midlevel" exercise.
    for vlog in video_logs:
        if not vlog["video_id"] in node_cache["Video"]:
            # Sometimes KA updates their topic tree and eliminates videos;
            #   we also want to support 3rd party switching of trees arbitrarily.
            logging.debug("Skip unknown video log for %s/%s" % (user_id, vlog["video_id"]))
            continue

        parent_ids = [topic for vid in node_cache["Video"][vlog["video_id"]] for topic in vid["ancestor_ids"]]
        topic = set(parent_ids).intersection(set(topic_ids))
        if not topic:
            logging.error("Could not find a topic for video %s (parents=%s)" % (vlog["video_id"], parent_ids))
            continue
        topic = topic.pop()
        if not topic in topic_videos:
            topic_videos[topic] = get_topic_videos(path=node_cache["Topic"][topic][0]["path"])
        videos_by_topic[topic] = videos_by_topic.get(topic, []) + [vlog]


    # Now compute stats
    for id in topic_ids:#set(topic_exercises.keys()).union(set(topic_videos.keys())):
        n_exercises = len(topic_exercises.get(id, []))
        n_videos = len(topic_videos.get(id, []))

        exercises = exercises_by_topic.get(id, [])
        videos = videos_by_topic.get(id, [])
        n_exercises_touched = len(exercises)
        n_videos_touched = len(videos)

        exercise_sparklines[id] = [el["completion_timestamp"] for el in filter(lambda n: n["complete"], exercises)]

         # total streak currently a pct, but expressed in max 100; convert to
         # proportion (like other percentages here)
        stats[id] = {
            "ex:pct_mastery":      0 if not n_exercises_touched else sum([el["complete"] for el in exercises]) / float(n_exercises),
            "ex:pct_started":      0 if not n_exercises_touched else n_exercises_touched / float(n_exercises),
            "ex:average_points":   0 if not n_exercises_touched else sum([el["points"] for el in exercises]) / float(n_exercises_touched),
            "ex:average_attempts": 0 if not n_exercises_touched else sum([el["attempts"] for el in exercises]) / float(n_exercises_touched),
            "ex:average_streak":   0 if not n_exercises_touched else sum([el["streak_progress"] for el in exercises]) / float(n_exercises_touched) / 100.,
            "ex:total_struggling": 0 if not n_exercises_touched else sum([el["struggling"] for el in exercises]),
            "ex:last_completed": None if not n_exercises_touched else max_none([el["completion_timestamp"] or None for el in exercises]),

            "vid:pct_started":      0 if not n_videos_touched else n_videos_touched / float(n_videos),
            "vid:pct_completed":    0 if not n_videos_touched else sum([vl["complete"] for vl in videos]) / float(n_videos),
            "vid:total_minutes":      0 if not n_videos_touched else sum([vl["total_seconds_watched"] for vl in videos]) / 60.,
            "vid:average_points":   0. if not n_videos_touched else float(sum([vl["points"] for vl in videos]) / float(n_videos_touched)),
            "vid:last_completed": None if not n_videos_touched else max_none([vl["completion_timestamp"] or None for vl in videos]),
        }

    context = plotting_metadata_context(request)

    return {
        "form": context["form"],
        "groups": context["groups"],
        "facilities": context["facilities"],
        "student": user,
        "topics": topics,
        "exercises": topic_exercises,
        "exercise_logs": exercises_by_topic,
        "video_logs": videos_by_topic,
        "exercise_sparklines": exercise_sparklines,
        "no_data": not exercise_logs and not video_logs,
        "stats": stats,
        "stat_defs": [  # this order determines the order of display
            {"key": "ex:pct_mastery",      "title": _("% Mastery"),        "type": "pct"},
            {"key": "ex:pct_started",      "title": _("% Started"),        "type": "pct"},
            {"key": "ex:average_points",   "title": _("Average Points"),   "type": "float"},
            {"key": "ex:average_attempts", "title": _("Average Attempts"), "type": "float"},
            {"key": "ex:average_streak",   "title": _("Average Streak"),   "type": "pct"},
            {"key": "ex:total_struggling", "title": _("Struggling"),       "type": "int"},
            {"key": "ex:last_completed",   "title": _("Last Completed"),   "type": "date"},
            {"key": "vid:pct_completed",   "title": _("% Completed"),      "type": "pct"},
            {"key": "vid:pct_started",     "title": _("% Started"),        "type": "pct"},
            {"key": "vid:total_minutes",   "title": _("Average Minutes Watched"),"type": "float"},
            {"key": "vid:average_points",  "title": _("Average Points"),   "type": "float"},
            {"key": "vid:last_completed",  "title": _("Last Completed"),   "type": "date"},
        ]
    }
from shared import topic_tools

TOPICS          = topic_tools.get_topic_tree()
NODE_CACHE      = topic_tools.get_node_cache()
SLUG2ID_MAP     = topic_tools.get_slug2id_map()
def create_all_mappings(force=False, frequency_to_save=100, response_to_check=None, date_to_check=None):
    """
    Write or update JSON file that maps from YouTube ID to Amara code and languages available.

    This command updates the json file that records what languages videos have been subtitled in.
    It loops through all video ids, records a list of which languages Amara says it has been subtitled in
    and meta data about the request (e.g. date, response code).

    See the schema in the docstring for fcn update_video_entry.
    """
    videos = get_node_cache('Video').values()

    # Initialize the data
    out_file = settings.SUBTITLES_DATA_ROOT + SRTS_JSON_FILENAME

    if not os.path.exists(out_file):
        ensure_dir(os.path.dirname(out_file))
        srts_dict = {}
    else:
        # Open the file, read, and clean out old videos.
        try:
            with open(out_file, "r") as fp:
                srts_dict = json.load(fp)
        except Exception as e:
            logging.error("JSON file corrupted, using empty json and starting from scratch (%s)" % e)
            srts_dict = {}
        else:
            logging.info("Loaded %d mappings." % (len(srts_dict)))

        # Set of videos no longer used by KA Lite
        removed_videos = set(srts_dict.keys()) - set([v[0]["youtube_id"] for v in videos])
        if removed_videos:
            logging.info("Removing subtitle information for %d videos (no longer used)." % len(removed_videos))
            for vid in removed_videos:
                del srts_dict[vid]
    logging.info("Querying %d mappings." % (len(videos) - (0 if (force or date_to_check) else len(srts_dict))))

    # Once we have the current mapping, proceed through logic to update the mapping
    n_new_entries = 0
    n_failures = 0
    for video_nodes in videos:
        # Decide whether or not to update this video based on the arguments provided at the command line
        youtube_id = video_nodes[0]['youtube_id']
        cached = youtube_id in srts_dict
        if not force and cached:
            # First, check against date
            flag_for_refresh = True # not (response_code or last_attempt)
            last_attempt = srts_dict[youtube_id].get("last_attempt")
            last_attempt = None if not last_attempt else datetime.datetime.strptime(last_attempt, '%Y-%m-%d')
            flag_for_refresh = flag_for_refresh and (not date_to_check or date_to_check > last_attempt)
            if not flag_for_refresh:
                logging.debug("Skipping %s for date-check" % youtube_id)
                continue
            # Second, check against response code
            response_code = srts_dict[youtube_id].get("api_response")
            flag_for_refresh = flag_for_refresh and (not response_to_check or response_to_check == "all" or response_to_check == response_code)
            if not (flag_for_refresh):
                logging.debug("Skipping %s for response-code" % youtube_id)
                continue
            if not response_to_check and not date_to_check and cached: # no flags specified and already cached - skip
                logging.debug("Skipping %s for already-cached and no flags specified" % youtube_id)
                continue
        else:
            if force and not cached:
                logging.debug("Updating %s because force flag (-f) given and video not cached." % youtube_id)
            elif force and cached:
                logging.debug("Updating %s because force flag (-f) given. Video was previously cached." % youtube_id)
            else:
                logging.debug("Updating %s because video not yet cached." % youtube_id)

        # If it makes it to here without hitting a continue, then update the entry
        try:
            srts_dict[youtube_id] = update_video_entry(youtube_id, entry=srts_dict.get(youtube_id, {}))
        except Exception as e:
            logging.warn("Error updating video %s: %s" % (youtube_id, e))
            n_failures += 1
            continue

        if n_new_entries % frequency_to_save == 0:
            logging.info("On loop %d dumping dictionary into %s" % (n_new_entries, out_file))
            with open(out_file, 'wb') as fp:
                json.dump(srts_dict, fp)
        n_new_entries += 1

    # Finished the loop: save and report
    with open(out_file, 'wb') as fp:
        json.dump(srts_dict, fp)
    if n_failures == 0:
        logging.info("Great success! Stored %d fresh entries, %d total." % (n_new_entries, len(srts_dict)))
    else:
        logging.warn("Stored %s fresh entries, but with %s failures." % (n_new_entries, n_failures))
def compute_data(data_types, who, where):
    """
    Compute the data in "data_types" for each user in "who", for the topics selected by "where"

    who: list of users
    where: topic_path
    data_types can include:
        pct_mastery
        effort
        attempts
    """

    # None indicates that the data hasn't been queried yet.
    #   We'll query it on demand, for efficiency
    topics = None
    exercises = None
    videos = None

    # Initialize an empty dictionary of data, video logs, exercise logs, for each user
    data = OrderedDict(zip([w.id for w in who], [dict() for i in range(len(who))]))  # maintain the order of the users
    vid_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))]))
    ex_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))]))
    if UserLog.is_enabled():
        activity_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))]))

    # Set up queries (but don't run them), so we have really easy aliases.
    #   Only do them if they haven't been done yet (tell this by passing in a value to the lambda function)
    # Topics: topics.
    # Exercises: names (ids for ExerciseLog objects)
    # Videos: video_id (ids for VideoLog objects)

    # This lambda partial creates a function to return all items with a particular path from the NODE_CACHE.
    search_fun_single_path = partial(lambda t, p: t["path"].startswith(p), p=tuple(where))
    # This lambda partial creates a function to return all items with paths matching a list of paths from NODE_CACHE.
    search_fun_multi_path = partial(lambda ts, p: any([t["path"].startswith(p) for t in ts]),  p=tuple(where))
    # Functions that use the functions defined above to return topics, exercises, and videos based on paths.
    query_topics = partial(lambda t, sf: t if t is not None else [t[0]["id"] for t in filter(sf, get_node_cache('Topic').values())], sf=search_fun_single_path)
    query_exercises = partial(lambda e, sf: e if e is not None else [ex[0]["id"] for ex in filter(sf, get_node_cache('Exercise').values())], sf=search_fun_multi_path)
    query_videos = partial(lambda v, sf: v if v is not None else [vid[0]["id"] for vid in filter(sf, get_node_cache('Video').values())], sf=search_fun_multi_path)

    # No users, don't bother.
    if len(who) > 0:

        # Query out all exercises, videos, exercise logs, and video logs before looping to limit requests.
        # This means we could pull data for n-dimensional coach report displays with the same number of requests!
        # Note: User activity is polled inside the loop, to prevent possible slowdown for exercise and video reports.
        exercises = query_exercises(exercises)

        videos = query_videos(videos)

        if exercises:
            ex_logs = query_logs(data.keys(), exercises, "exercise", ex_logs)

        if videos:
            vid_logs = query_logs(data.keys(), videos, "video", vid_logs)

        for data_type in (data_types if not hasattr(data_types, "lower") else [data_types]):  # convert list from string, if necessary
            if data_type in data[data.keys()[0]]:  # if the first user has it, then all do; no need to calc again.
                continue

            #
            # These are summary stats: you only get one per user
            #
            if data_type == "pct_mastery":

                # Efficient query out, spread out to dict
                for user in data.keys():
                    data[user][data_type] = 0 if not ex_logs[user] else 100. * sum([el['complete'] for el in ex_logs[user]]) / float(len(exercises))

            elif data_type == "effort":
                if "ex:attempts" in data[data.keys()[0]] and "vid:total_seconds_watched" in data[data.keys()[0]]:
                    # exercises and videos would be initialized already
                    for user in data.keys():
                        avg_attempts = 0 if len(exercises) == 0 else sum(data[user]["ex:attempts"].values()) / float(len(exercises))
                        avg_video_points = 0 if len(videos) == 0 else sum(data[user]["vid:total_seconds_watched"].values()) / float(len(videos))
                        data[user][data_type] = 100. * (0.5 * avg_attempts / 10. + 0.5 * avg_video_points / 750.)
                else:
                    data_types += ["ex:attempts", "vid:total_seconds_watched", "effort"]

            #
            # These are detail stats: you get many per user
            #
            # Just querying out data directly: Video
            elif data_type.startswith("vid:") and data_type[4:] in [f.name for f in VideoLog._meta.fields]:

                for user in data.keys():
                    data[user][data_type] = OrderedDict([(v['video_id'], v[data_type[4:]]) for v in vid_logs[user]])

            # Just querying out data directly: Exercise
            elif data_type.startswith("ex:") and data_type[3:] in [f.name for f in ExerciseLog._meta.fields]:

                for user in data.keys():
                    data[user][data_type] = OrderedDict([(el['exercise_id'], el[data_type[3:]]) for el in ex_logs[user]])

            # User Log Queries
            elif data_type.startswith("user:"******"", "activity", activity_logs)

                for user in data.keys():
                    data[user][data_type] = [log[data_type[5:]] for log in activity_logs[user]]

            # User Summary Queries
            elif data_type.startswith("usersum:") and data_type[8:] in [f.name for f in UserLogSummary._meta.fields] and UserLog.is_enabled():

                activity_logs = query_logs(data.keys(), "", "summaryactivity", activity_logs)

                for user in data.keys():
                    data[user][data_type] = sum([log[data_type[8:]] for log in activity_logs[user]])
            # Unknown requested quantity
            else:
                raise Exception("Unknown type: '%s' not in %s" % (data_type, str([f.name for f in ExerciseLog._meta.fields])))

    # Returning empty list instead of None allows javascript on client
    # side to read 'length' property without error.
    exercises = exercises or []

    videos = videos or []

    return {
        "data": data,
        "topics": topics,
        "exercises": exercises,
        "videos": videos,
    }
示例#26
0
def get_video_page_paths(video_id=None):
    try:
        return [n["path"] for n in topic_tools.get_node_cache("Video")[video_id]]
    except:
        return []
示例#27
0
def update_all_central_callback(request):
    """
    Callback after authentication.

    Parses out the request token verification.
    Then finishes the request by getting an auth token.
    """
    if not "ACCESS_TOKEN" in request.session:
        finish_auth(request)

    exercises = get_api_resource(request, "/api/v1/user/exercises")
    videos = get_api_resource(request, "/api/v1/user/videos")
    node_cache = get_node_cache()

    # Save videos
    video_logs = []
    for video in videos:
        # Assume that KA videos are all english-language, not dubbed (for now)
        video_id = youtube_id = video.get('video', {}).get('youtube_id', "")

        # Only save videos with progress
        if not video.get('seconds_watched', None):
            continue

        # Only save video logs for videos that we recognize.
        if video_id not in node_cache["Video"]:
            logging.warn("Skipping unknown video %s" % video_id)
            continue

        try:
            video_logs.append({
                "video_id": video_id,
                "youtube_id": youtube_id,
                "total_seconds_watched": video['seconds_watched'],
                "points": VideoLog.calc_points(video['seconds_watched'], video['duration']),
                "complete": video['completed'],
                "completion_timestamp": convert_ka_date(video['last_watched']) if video['completed'] else None,
            })
            logging.debug("Got video log for %s: %s" % (video_id, video_logs[-1]))
        except KeyError:  #
            logging.error("Could not save video log for data with missing values: %s" % video)

    # Save exercises
    exercise_logs = []
    for exercise in exercises:
        # Only save exercises that have any progress.
        if not exercise.get('last_done', None):
            continue

        # Only save video logs for videos that we recognize.
        slug = exercise.get('exercise', "")
        if slug not in node_cache['Exercise']:
            logging.warn("Skipping unknown video %s" % slug)
            continue

        try:
            completed = exercise['streak'] >= 10
            basepoints = node_cache['Exercise'][slug][0]['basepoints']
            exercise_logs.append({
                "exercise_id": slug,
                "streak_progress": min(100, 100 * exercise['streak']/10),  # duplicates logic elsewhere
                "attempts": exercise['total_done'],
                "points": ExerciseLog.calc_points(basepoints, ncorrect=exercise['streak'], add_randomness=False),  # no randomness when importing from KA
                "complete": completed,
                "attempts_before_completion": exercise['total_done'] if not exercise['practiced'] else None,  #can't figure this out if they practiced after mastery.
                "completion_timestamp": convert_ka_date(exercise['proficient_date']) if completed else None,
            })
            logging.debug("Got exercise log for %s: %s" % (slug, exercise_logs[-1]))
        except KeyError:
            logging.error("Could not save exercise log for data with missing values: %s" % exercise)

    # POST the data back to the distributed server
    dthandler = lambda obj: obj.isoformat() if isinstance(obj, datetime.datetime) else None
    logging.debug("POST'ing to %s" % request.session["distributed_callback_url"])
    response = requests.post(
        request.session["distributed_callback_url"],
        cookies={ "csrftoken": request.session["distributed_csrf_token"] },
        data = {
            "csrfmiddlewaretoken": request.session["distributed_csrf_token"],
            "video_logs": json.dumps(video_logs, default=dthandler),
            "exercise_logs": json.dumps(exercise_logs, default=dthandler),
            "user_id": request.session["distributed_user_id"],
        }
    )
    logging.debug("Response (%d): %s" % (response.status_code, response.content))
    try:
        message = json.loads(response.content)
    except ValueError as e:
        message = { "error": unicode(e) }
    except Exception as e:
        message = { "error": u"Loading json object: %s" % e }

    # If something broke on the distribute d server, we are SCREWED.
    #   For now, just show the error to users.
    #
    # Ultimately, we have a message, would like to share with the distributed server.
#    if response.status_code != 200:
#        return HttpResponseServerError(response.content)

    return HttpResponseRedirect(request.session["distributed_redirect_url"] + "?message_type=%s&message=%s&message_id=id_khanload" % (message.keys()[0], message.values()[0]))
示例#28
0
def create_all_mappings(force=False,
                        frequency_to_save=100,
                        response_to_check=None,
                        date_to_check=None):
    """Write or update JSON file that maps from YouTube ID to Amara code and languages available"""
    videos = get_node_cache('Video')

    # Initialize the data
    out_file = settings.SUBTITLES_DATA_ROOT + SRTS_JSON_FILENAME

    if not os.path.exists(out_file):
        srts_dict = {}
    else:
        # Open the file, read, and clean out old videos.
        try:
            with open(out_file, "r") as fp:
                srts_dict = json.load(fp)
        except Exception as e:
            logging.error(
                "JSON file corrupted, using empty json and starting from scratch (%s)"
                % e)
            srts_dict = {}
        else:
            logging.info("Loaded %d mappings." % (len(srts_dict)))

        # Set of videos no longer used by KA Lite
        removed_videos = set(srts_dict.keys()) - set(
            [v["youtube_id"] for v in videos.values()])
        if removed_videos:
            logging.info(
                "Removing subtitle information for %d videos (no longer used)."
                % len(removed_videos))
            for vid in removed_videos:
                del srts_dict[vid]
    logging.info("Querying %d mappings." %
                 (len(videos) -
                  (0 if (force or date_to_check) else len(srts_dict))))

    # Once we have the current mapping, proceed through logic to update the mapping
    n_new_entries = 0
    n_failures = 0
    for video, data in videos.iteritems():
        # Decide whether or not to update this video based on the arguments provided at the command line
        youtube_id = data['youtube_id']
        cached = youtube_id in srts_dict
        if not force and cached:
            # First, check against date
            flag_for_refresh = True  # not (response_code or last_attempt)
            last_attempt = srts_dict[youtube_id].get("last_attempt")
            last_attempt = None if not last_attempt else datetime.datetime.strptime(
                last_attempt, '%Y-%m-%d')
            flag_for_refresh = flag_for_refresh and (
                not date_to_check or date_to_check > last_attempt)
            if not flag_for_refresh:
                logging.debug("Skipping %s for date-check" % youtube_id)
                continue
            # Second, check against response code
            response_code = srts_dict[youtube_id].get("api_response")
            flag_for_refresh = flag_for_refresh and (
                not response_to_check or response_to_check == "all"
                or response_to_check == response_code)
            if not (flag_for_refresh):
                logging.debug("Skipping %s for response-code" % youtube_id)
                continue
            if not response_to_check and not date_to_check and cached:  # no flags specified and already cached - skip
                logging.debug(
                    "Skipping %s for already-cached and no flags specified" %
                    youtube_id)
                continue
        else:
            if force and not cached:
                logging.debug(
                    "Updating %s because force flag (-f) given and video not cached."
                    % youtube_id)
            elif force and cached:
                logging.debug(
                    "Updating %s because force flag (-f) given. Video was previously cached."
                    % youtube_id)
            else:
                logging.debug("Updating %s because video not yet cached." %
                              youtube_id)

        # If it makes it to here without hitting a continue, then update the entry
        try:
            srts_dict[youtube_id] = update_video_entry(youtube_id,
                                                       entry=srts_dict.get(
                                                           youtube_id, {}))
        except Exception as e:
            logging.warn("Error updating video %s: %s" % (youtube_id, e))
            n_failures += 1
            continue

        if n_new_entries % frequency_to_save == 0:
            logging.info("On loop %d dumping dictionary into %s" %
                         (n_new_entries, out_file))
            with open(out_file, 'wb') as fp:
                json.dump(srts_dict, fp)
        n_new_entries += 1

    # Finished the loop: save and report
    with open(out_file, 'wb') as fp:
        json.dump(srts_dict, fp)
    if n_failures == 0:
        logging.info("Great success! Stored %d fresh entries, %d total." %
                     (n_new_entries, len(srts_dict)))
    else:
        logging.warn("Stored %s fresh entries, but with %s failures." %
                     (n_new_entries, n_failures))
 def validate_mapping(srt_file, srt_issues):
     youtube_id = os.path.basename(srt_file)[:-4]
     if youtube_id not in get_node_cache("Video"):
         srt_issues.append("youtube ID unknown: %s" % youtube_id)