示例#1
0
def add_syncing_models(models):
    """When sync is run, these models will be sync'd"""
    for model in models:
        if model in _syncing_models:
            logging.warn("We are already syncing model %s" % str(model))
        else:
            _syncing_models.append(model)
示例#2
0
    def download_kmap_icons(knowledge_map):
        for key, value in knowledge_map["topics"].items():
            # Note: id here is retrieved from knowledge_map, so we're OK
            #   that we blew away ID in the topic tree earlier.
            if "icon_url" not in value:
                logging.warn("No icon URL for %s" % key)

            value["icon_url"] = iconfilepath + value["id"] + iconextension
            knowledge_map["topics"][key] = value

            out_path = data_path + "../" + value["icon_url"]
            if os.path.exists(out_path) and not force_icons:
                continue

            icon_khan_url = "http://www.khanacademy.org" + value["icon_url"]
            sys.stdout.write("Downloading icon %s from %s..." % (value["id"], icon_khan_url))
            sys.stdout.flush()
            try:
                icon = requests.get(icon_khan_url)
            except Exception as e:
                sys.stdout.write("\n")  # complete the "downloading" output
                sys.stderr.write("Failed to download %-80s: %s\n" % (icon_khan_url, e))
                continue
            if icon.status_code == 200:
                iconfile = file(data_path + "../" + value["icon_url"], "w")
                iconfile.write(icon.content)
            else:
                sys.stdout.write(" [NOT FOUND]")
                value["icon_url"] = iconfilepath + defaulticon + iconextension
            sys.stdout.write(" done.\n")  # complete the "downloading" output
示例#3
0
def zip_language_packs(lang_codes=None):
    """Zip up and expose all language packs"""

    lang_codes = lang_codes or listdir(LOCALE_ROOT)
    logging.info("Zipping up %d language pack(s)" % len(lang_codes))

    ensure_dir(settings.LANGUAGE_PACK_ROOT)
    for lang in lang_codes:
        lang_locale_path = os.path.join(LOCALE_ROOT, lang)

        if not os.path.exists(lang_locale_path):
            logging.warn("Unexpectedly skipping missing directory: %s" % lang)
        elif not os.path.isdir(lang_locale_path):
            logging.error("Skipping language where a file exists: %s" % lang)

        # Create a zipfile for this language
        zip_path = os.path.join(settings.LANGUAGE_PACK_ROOT, version.VERSION)
        ensure_dir(zip_path)
        z = zipfile.ZipFile(os.path.join(zip_path, "%s.zip" % convert_language_code_format(lang)), 'w')

        # Get every single file in the directory and zip it up
        for metadata_file in glob.glob('%s/*.json' % lang_locale_path):
            z.write(os.path.join(lang_locale_path, metadata_file), arcname=os.path.basename(metadata_file))
        for mo_file in glob.glob('%s/LC_MESSAGES/*.mo' % lang_locale_path):
            z.write(os.path.join(lang_locale_path, mo_file), arcname=os.path.join("LC_MESSAGES", os.path.basename(mo_file)))
        for srt_file in glob.glob('%s/subtitles/*.srt' % lang_locale_path):
            z.write(os.path.join(lang_locale_path, srt_file), arcname=os.path.join("subtitles", os.path.basename(srt_file)))
        z.close()
    logging.info("Done.")
def move_srts(lang_code):
    """
    Srts live in the locale directory, but that's not exposed at any URL.  So instead,
    we have to move the srts out to /static/subtitles/[lang_code]/
    """
    lang_code_ietf = lcode_to_ietf(lang_code)
    lang_code_django = lcode_to_django_dir(lang_code)

    subtitles_static_dir = os.path.join(settings.STATIC_ROOT, "subtitles")
    src_dir = os.path.join(LOCALE_ROOT, lang_code_django, "subtitles")
    dest_dir = get_srt_path(lang_code_django)
    ensure_dir(dest_dir)

    lang_subtitles = glob.glob(os.path.join(src_dir, "*.srt"))
    logging.info("Moving %d subtitles from %s to %s" %
                 (len(lang_subtitles), src_dir, dest_dir))

    for fil in lang_subtitles:
        srt_dest_path = os.path.join(dest_dir, os.path.basename(fil))
        if os.path.exists(srt_dest_path):
            os.remove(srt_dest_path
                      )  # we're going to replace any srt with a newer version
        shutil.move(fil, srt_dest_path)

    if not os.path.exists(src_dir):
        logging.info("No subtitles for language pack %s" % lang_code)
    elif os.listdir(src_dir):
        logging.warn(
            "%s is not empty; will not remove.  Please check that all subtitles were moved."
            % src_dir)
    else:
        logging.info("Removing empty source directory (%s)." % src_dir)
        shutil.rmtree(src_dir)
示例#5
0
    def verify_inner_zip(self, zip_file):
        """
        Extract contents of outer zip, verify the inner zip
        """
        zip = ZipFile(zip_file, "r")
        nfiles = len(zip.namelist())
        for fi, afile in enumerate(zip.namelist()):
            zip.extract(afile, path=self.working_dir)

        self.signature_file = os.path.join(self.working_dir,
                                           Command.signature_filename)
        self.inner_zip_file = os.path.join(self.working_dir,
                                           Command.inner_zip_filename)

        central_server = Device.get_central_server()
        lines = open(self.signature_file, "r").read().split("\n")
        chunk_size = int(lines.pop(0))
        if not central_server:
            logging.warn(
                "No central server device object found; trusting zip file because you asked me to..."
            )
        elif central_server.key.verify_large_file(self.inner_zip_file,
                                                  signature=lines,
                                                  chunk_size=chunk_size):
            logging.info("Verified file!")
        else:
            raise Exception("Failed to verify inner zip file.")
        return self.inner_zip_file
示例#6
0
def add_syncing_models(models):
    """When sync is run, these models will be sync'd"""
    for model in models:
        if model in _syncing_models:
            logging.warn("We are already syncing model %s" % str(model))
        else:
            _syncing_models.append(model)
示例#7
0
    def recurse_nodes_to_extract_knowledge_map(node, node_cache):
        """
        Internal function for recursing the topic tree and building the knowledge map.
        Requires rebranding of metadata done by recurse_nodes function.
        """
        assert node["kind"] == "Topic"

        if node.get("in_knowledge_map", None):
            if node["slug"] not in knowledge_map["topics"]:
                logging.debug("Not in knowledge map: %s" % node["slug"])
                node["in_knowledge_map"] = False
                for node in node_cache["Topic"][node["slug"]]:
                    node["in_knowledge_map"] = False

            knowledge_topics[node["slug"]] = topic_tools.get_all_leaves(node, leaf_type="Exercise")

            if not knowledge_topics[node["slug"]]:
                sys.stderr.write("Removing topic from topic tree: no exercises. %s" % node["slug"])
                del knowledge_topics[node["slug"]]
                del knowledge_map["topics"][node["slug"]]
                node["in_knowledge_map"] = False
                for node in node_cache["Topic"][node["slug"]]:
                    node["in_knowledge_map"] = False
        else:
            if node["slug"] in knowledge_map["topics"]:
                sys.stderr.write("Removing topic from topic tree; does not belong. '%s'" % node["slug"])
                logging.warn("Removing from knowledge map: %s" % node["slug"])
                del knowledge_map["topics"][node["slug"]]

        for child in [n for n in node.get("children", []) if n["kind"] == "Topic"]:
            recurse_nodes_to_extract_knowledge_map(child, node_cache)
示例#8
0
    def recurse_nodes_to_clean_related_videos(node):
        """
        Internal function for recursing the topic tree and marking related exercises.
        Requires rebranding of metadata done by recurse_nodes function.
        """
        def get_video_node(video_slug, node):
            if node["kind"] == "Topic":
                for child in node.get("children", []):
                    video_node = get_video_node(video_slug, child)
                    if video_node:
                        return video_node
            elif node["kind"] == "Video" and node["slug"] == video_slug:
                return node

            return None

        if node["kind"] == "Exercise":
            videos_to_delete = []
            for vi, video_slug in enumerate(
                    node["related_video_readable_ids"]):
                if not get_video_node(video_slug, topictree):
                    videos_to_delete.append(vi)
            for vi in reversed(videos_to_delete):
                logging.warn("Deleting unknown video %s" %
                             node["related_video_readable_ids"][vi])
                del node["related_video_readable_ids"][vi]
        for child in node.get("children", []):
            recurse_nodes_to_clean_related_videos(child)
示例#9
0
    def update_user_activity(cls, user, activity_type="login", update_datetime=None, language=None, suppress_save=False):
        """Helper function to update an existing user activity log entry."""

        # Do nothing if the max # of records is zero
        # (i.e. this functionality is disabled)
        if not cls.is_enabled():
            return

        if not user:
            raise ValidationError("A valid user must always be specified.")
        if not update_datetime:  # must be done outside the function header (else becomes static)
            update_datetime = datetime.now()
        activity_type = cls.get_activity_int(activity_type)

        cur_log = cls.get_latest_open_log_or_None(user=user, activity_type=activity_type)
        if cur_log:
            # How could you start after you updated??
            if cur_log.start_datetime > update_datetime:
                raise ValidationError("Update time must always be later than the login time.")
        else:
            # No unstopped starts.  Start should have been called first!
            logging.warn("%s: Had to create a user log entry on an UPDATE(%d)! @ %s" % (user.username, activity_type, update_datetime))
            cur_log = cls.begin_user_activity(user=user, activity_type=activity_type, start_datetime=update_datetime, suppress_save=True)

        logging.debug("%s: UPDATE activity (%d) @ %s" % (user.username, activity_type, update_datetime))
        cur_log.last_active_datetime = update_datetime
        cur_log.language = language or cur_log.language  # set the language to the current language, if there is one.
        if not suppress_save:
            cur_log.save()
        return cur_log
示例#10
0
def generate_zipped_srts(lang_codes_to_update, download_path=DOWNLOAD_PATH):

    # Create media directory if it doesn't yet exist
    ensure_dir(settings.MEDIA_ROOT)
    zip_path = settings.MEDIA_ROOT + "subtitles/"
    ensure_dir(zip_path)
    lang_codes_to_update = lang_codes_to_update or os.listdir(download_path)

    for lang_code in lang_codes_to_update:
        srt_dir = os.path.join(download_path, lang_code, "subtitles")
        zip_file = os.path.join(zip_path, "%s_subtitles.zip" % lang_code)

        # Remove any old version (as we may not re-create)
        if os.path.exists(zip_file):
            os.remove(zip_file)

        if not os.path.exists(srt_dir):
            logging.warn("No srt directory for %s; skipping." % lang_code)
            continue

        srts = glob.glob(os.path.join(srt_dir, "*.srt"))
        if len(srts) == 0:
            logging.warn("No srts for %s; skipping." % lang_code)
            continue

        logging.info("Zipping up a new pack for language code: %s" % lang_code)
        zf = zipfile.ZipFile(zip_file, 'w')
        for f in srts:
            zf.write(f, arcname=os.path.basename(f))
        zf.close()
示例#11
0
    def recurse_nodes_to_clean_related_videos(node):
        """
        Internal function for recursing the topic tree and marking related exercises.
        Requires rebranding of metadata done by recurse_nodes function.
        """
        def get_video_node(video_slug, node):
            if node["kind"] == "Topic":
                for child in node.get("children", []):
                    video_node = get_video_node(video_slug, child)
                    if video_node:
                        return video_node
            elif node["kind"] == "Video" and node["slug"] == video_slug:
                return node

            return None

        if node["kind"] == "Exercise":
            videos_to_delete = []
            for vi, video_slug in enumerate(node["related_video_slugs"]):
                if not get_video_node(video_slug, topictree):
                    videos_to_delete.append(vi)
            for vi in reversed(videos_to_delete):
                logging.warn("Deleting unknown video %s" % node["related_video_slugs"][vi])
                del node["related_video_slugs"][vi]
        for child in node.get("children", []):
            recurse_nodes_to_clean_related_videos(child)
示例#12
0
    def begin_user_activity(cls, user, activity_type="login", start_datetime=None):
        """Helper function to create a user activity log entry."""

        # Do nothing if the max # of records is zero or None
        # (i.e. this functionality is disabled)
        if not settings.USER_LOG_MAX_RECORDS:
            return

        assert user is not None, "A valid user must always be specified."
        if not start_datetime:  # must be done outside the function header (else becomes static)
            start_datetime = datetime.now()
        activity_type = cls.get_activity_int(activity_type)
        cur_user_log_entry = get_object_or_None(cls, user=user, end_datetime=None)

        logging.debug("%s: BEGIN activity(%d) @ %s" % (user.username, activity_type, start_datetime))

        # Seems we're logging in without logging out of the previous.
        #   Best thing to do is simulate a login
        #   at the previous last update time.
        #
        # Note: this can be a recursive call
        if cur_user_log_entry:
            logging.warn("%s: END activity on a begin @ %s" % (user.username, start_datetime))
            cls.end_user_activity(
                user=user, activity_type=activity_type, end_datetime=cur_user_log_entry.last_active_datetime
            )

        # Create a new entry
        cur_user_log_entry = cls(
            user=user, activity_type=activity_type, start_datetime=start_datetime, last_active_datetime=start_datetime
        )

        cur_user_log_entry.save()
        return cur_user_log_entry
示例#13
0
    def begin_user_activity(cls, user, activity_type="login", start_datetime=None, language=None, suppress_save=False):
        """Helper function to create a user activity log entry."""

        # Do nothing if the max # of records is zero
        # (i.e. this functionality is disabled)
        if not cls.is_enabled():
            return

        if not user:
            raise ValidationError("A valid user must always be specified.")
        if not start_datetime:  # must be done outside the function header (else becomes static)
            start_datetime = datetime.now()
        activity_type = cls.get_activity_int(activity_type)

        cur_log = cls.get_latest_open_log_or_None(user=user, activity_type=activity_type)
        if cur_log:
            # Seems we're logging in without logging out of the previous.
            #   Best thing to do is simulate a login
            #   at the previous last update time.
            #
            # Note: this can be a recursive call
            logging.warn("%s: had to END activity on a begin(%d) @ %s" % (user.username, activity_type, start_datetime))
            # Don't mark current language when closing an old one
            cls.end_user_activity(user=user, activity_type=activity_type, end_datetime=cur_log.last_active_datetime)  # can't suppress save
            cur_log = None

        # Create a new entry
        logging.debug("%s: BEGIN activity(%d) @ %s" % (user.username, activity_type, start_datetime))
        cur_log = cls(user=user, activity_type=activity_type, start_datetime=start_datetime, last_active_datetime=start_datetime, language=language)
        if not suppress_save:
            cur_log.save()

        return cur_log
示例#14
0
    def update_user_activity(cls, user, activity_type="login", update_datetime=None, language=language, suppress_save=False):
        """Helper function to update an existing user activity log entry."""

        # Do nothing if the max # of records is zero
        # (i.e. this functionality is disabled)
        if not cls.is_enabled():
            return

        if not user:
            raise ValidationError("A valid user must always be specified.")
        if not update_datetime:  # must be done outside the function header (else becomes static)
            update_datetime = datetime.now()
        activity_type = cls.get_activity_int(activity_type)

        cur_log = cls.get_latest_open_log_or_None(user=user, activity_type=activity_type)
        if cur_log:
            # How could you start after you updated??
            if cur_log.start_datetime > update_datetime:
                raise ValidationError("Update time must always be later than the login time.")
        else:
            # No unstopped starts.  Start should have been called first!
            logging.warn("%s: Had to create a user log entry on an UPDATE(%d)! @ %s" % (user.username, activity_type, update_datetime))
            cur_log = cls.begin_user_activity(user=user, activity_type=activity_type, start_datetime=update_datetime, suppress_save=True)

        logging.debug("%s: UPDATE activity (%d) @ %s" % (user.username, activity_type, update_datetime))
        cur_log.last_active_datetime = update_datetime
        cur_log.language = language or cur_log.language  # set the language to the current language, if there is one.
        if not suppress_save:
            cur_log.save()
        return cur_log
示例#15
0
    def end_user_activity(cls, user, activity_type="login", end_datetime=None, suppress_save=False):  # don't accept language--we're just closing previous activity.
        """Helper function to complete an existing user activity log entry."""

        # Do nothing if the max # of records is zero
        # (i.e. this functionality is disabled)
        if not cls.is_enabled():
            return

        if not user:
            raise ValidationError("A valid user must always be specified.")
        if not end_datetime:  # must be done outside the function header (else becomes static)
            end_datetime = datetime.now()
        activity_type = cls.get_activity_int(activity_type)

        cur_log = cls.get_latest_open_log_or_None(user=user, activity_type=activity_type)

        if cur_log:
            # How could you start after you ended??
            if cur_log.start_datetime > end_datetime:
                raise ValidationError("Update time must always be later than the login time.")
        else:
            # No unstopped starts.  Start should have been called first!
            logging.warn("%s: Had to BEGIN a user log entry, but ENDING(%d)! @ %s" % (user.username, activity_type, end_datetime))
            cur_log = cls.begin_user_activity(user=user, activity_type=activity_type, start_datetime=end_datetime, suppress_save=True)

        logging.debug("%s: Logging LOGOUT activity @ %s" % (user.username, end_datetime))
        cur_log.end_datetime = end_datetime
        if not suppress_save:
            cur_log.save()  # total-seconds will be computed here.
        return cur_log
def zip_language_packs(lang_codes=None):
    """Zip up and expose all language packs

    converts all into ietf
    """

    lang_codes = lang_codes or os.listdir(LOCALE_ROOT)
    lang_codes = [lcode_to_ietf(lc) for lc in lang_codes]
    logging.info("Zipping up %d language pack(s)" % len(lang_codes))

    for lang_code_ietf in lang_codes:
        lang_code_django = lcode_to_django_dir(lang_code_ietf)
        lang_locale_path = os.path.join(LOCALE_ROOT, lang_code_django)

        if not os.path.exists(lang_locale_path):
            logging.warn("Unexpectedly skipping missing directory: %s" % lang_code_django)
        elif not os.path.isdir(lang_locale_path):
            logging.error("Skipping language where a file exists where a directory was expected: %s" % lang_code_django)

        # Create a zipfile for this language
        zip_filepath = get_language_pack_filepath(lang_code_ietf)
        ensure_dir(os.path.dirname(zip_filepath))
        logging.info("Creating zip file in %s" % zip_filepath)
        z = zipfile.ZipFile(zip_filepath, 'w', zipfile.ZIP_DEFLATED)

        # Get every single file in the directory and zip it up
        for metadata_file in glob.glob('%s/*.json' % lang_locale_path):
            z.write(os.path.join(lang_locale_path, metadata_file), arcname=os.path.basename(metadata_file))

        srt_dirpath = get_srt_path(lang_code_django)
        for srt_file in glob.glob(os.path.join(srt_dirpath, "*.srt")):
            z.write(srt_file, arcname=os.path.join("subtitles", os.path.basename(srt_file)))
        z.close()
    logging.info("Done.")
示例#17
0
def get_cache_key(path=None, url_name=None, cache=None, failure_ok=False):
    """Call into Django to retrieve a cache key for the given url, or given url name

    NOTE: ONLY RETURNS CACHE_KEY IF THE CACHE_ITEM HAS BEEN CREATED ELSEWHERE!!!"""

    assert (path or url_name) and not (
        path
        and url_name), "Must have path or url_name parameter, but not both"

    if not cache:
        cache = get_web_cache()

    request = HttpRequest()
    request.path = path or reverse(url_name)
    request.session = {
        settings.LANGUAGE_COOKIE_NAME: translation.get_language()
    }

    cache_key = django_get_cache_key(request, cache=get_web_cache())
    if not cache_key and not failure_ok:
        logging.warn(
            "The cache item does not exist, and so could not be retrieved (path=%s)."
            % request.path)

    return cache_key
示例#18
0
def move_exercises(lang_code):
    lang_pack_location = os.path.join(LOCALE_ROOT, lang_code)
    src_exercise_dir = os.path.join(lang_pack_location, "exercises")
    dest_exercise_dir = get_localized_exercise_dirpath(lang_code,
                                                       is_central_server=False)

    if not os.path.exists(src_exercise_dir):
        logging.warn("Could not find downloaded exercises; skipping: %s" %
                     src_exercise_dir)
    else:
        # Move over one at a time, to combine with any other resources that were there before.
        ensure_dir(dest_exercise_dir)
        all_exercise_files = glob.glob(os.path.join(src_exercise_dir,
                                                    "*.html"))
        logging.info("Moving %d downloaded exercises to %s" %
                     (len(all_exercise_files), dest_exercise_dir))

        for exercise_file in all_exercise_files:
            shutil.move(
                exercise_file,
                os.path.join(dest_exercise_dir,
                             os.path.basename(exercise_file)))

        logging.debug("Removing emtpy directory")
        try:
            shutil.rmtree(src_exercise_dir)
        except Exception as e:
            logging.error("Error removing dubbed video directory (%s): %s" %
                          (src_exercise_dir, e))
示例#19
0
def generate_zipped_srts(lang_codes_to_update, download_path):

    # Create media directory if it doesn't yet exist
    ensure_dir(settings.MEDIA_ROOT)
    zip_path = settings.MEDIA_ROOT + "subtitles/"
    ensure_dir(zip_path)
    lang_codes_to_update = lang_codes_to_update or os.listdir(download_path)

    for lang_code in lang_codes_to_update:
        srt_dir = os.path.join(download_path, lang_code, "subtitles")
        zip_file = os.path.join(zip_path, "%s_subtitles.zip" % lang_code)

        # Remove any old version (as we may not re-create)
        if os.path.exists(zip_file):
            os.remove(zip_file)

        if not os.path.exists(srt_dir):
            logging.warn("No srt directory for %s; skipping." % lang_code)
            continue

        srts = glob.glob(os.path.join(srt_dir, "*.srt"))
        if len(srts) == 0:
            logging.warn("No srts for %s; skipping." % lang_code)
            continue

        logging.info("Zipping up a new pack for language code: %s" % lang_code)
        zf = zipfile.ZipFile(zip_file, 'w')
        for f in srts:
            zf.write(f, arcname=os.path.basename(f))
        zf.close()
示例#20
0
    def end_user_activity(cls, user, activity_type="login", end_datetime=None):
        """Helper function to complete an existing user activity log entry."""

        # Do nothing if the max # of records is zero or None
        # (i.e. this functionality is disabled)
        if not settings.USER_LOG_MAX_RECORDS:
            return

        assert user is not None, "A valid user must always be specified."
        if not end_datetime:  # must be done outside the function header (else becomes static)
            end_datetime = datetime.now()
        activity_type = cls.get_activity_int(activity_type)

        cur_user_log_entry = get_object_or_None(cls, user=user, end_datetime=None)

        # No unstopped starts.  Start should have been called first!
        if not cur_user_log_entry:
            logging.warn(
                "%s: Had to create a user log entry, but STOPPING('%d')! @ %s"
                % (user.username, activity_type, end_datetime)
            )
            cur_user_log_entry = cls.begin_user_activity(
                user=user, activity_type=activity_type, start_datetime=end_datetime
            )

        logging.debug("%s: Logging LOGOUT activity @ %s" % (user.username, end_datetime))
        cur_user_log_entry.end_datetime = end_datetime
        cur_user_log_entry.save()  # total-seconds will be computed here.
示例#21
0
    def begin_user_activity(cls, user, activity_type="login", start_datetime=None, language=None, suppress_save=False):
        """Helper function to create a user activity log entry."""

        # Do nothing if the max # of records is zero
        # (i.e. this functionality is disabled)
        if not cls.is_enabled():
            return

        if not user:
            raise ValidationError("A valid user must always be specified.")
        if not start_datetime:  # must be done outside the function header (else becomes static)
            start_datetime = datetime.now()
        activity_type = cls.get_activity_int(activity_type)

        cur_log = cls.get_latest_open_log_or_None(user=user, activity_type=activity_type)
        if cur_log:
            # Seems we're logging in without logging out of the previous.
            #   Best thing to do is simulate a login
            #   at the previous last update time.
            #
            # Note: this can be a recursive call
            logging.warn("%s: had to END activity on a begin(%d) @ %s" % (user.username, activity_type, start_datetime))
            # Don't mark current language when closing an old one
            cls.end_user_activity(user=user, activity_type=activity_type, end_datetime=cur_log.last_active_datetime)  # can't suppress save
            cur_log = None

        # Create a new entry
        logging.debug("%s: BEGIN activity(%d) @ %s" % (user.username, activity_type, start_datetime))
        cur_log = cls(user=user, activity_type=activity_type, start_datetime=start_datetime, last_active_datetime=start_datetime, language=language)
        if not suppress_save:
            cur_log.save()

        return cur_log
示例#22
0
    def end_user_activity(cls, user, activity_type="login", end_datetime=None, suppress_save=False):  # don't accept language--we're just closing previous activity.
        """Helper function to complete an existing user activity log entry."""

        # Do nothing if the max # of records is zero
        # (i.e. this functionality is disabled)
        if not cls.is_enabled():
            return

        if not user:
            raise ValidationError("A valid user must always be specified.")
        if not end_datetime:  # must be done outside the function header (else becomes static)
            end_datetime = datetime.now()
        activity_type = cls.get_activity_int(activity_type)

        cur_log = cls.get_latest_open_log_or_None(user=user, activity_type=activity_type)

        if cur_log:
            # How could you start after you ended??
            if cur_log.start_datetime > end_datetime:
                raise ValidationError("Update time must always be later than the login time.")
        else:
            # No unstopped starts.  Start should have been called first!
            logging.warn("%s: Had to BEGIN a user log entry, but ENDING(%d)! @ %s" % (user.username, activity_type, end_datetime))
            cur_log = cls.begin_user_activity(user=user, activity_type=activity_type, start_datetime=end_datetime, suppress_save=True)

        logging.debug("%s: Logging LOGOUT activity @ %s" % (user.username, end_datetime))
        cur_log.end_datetime = end_datetime
        if not suppress_save:
            cur_log.save()  # total-seconds will be computed here.
        return cur_log
示例#23
0
    def end_user_activity(cls, user, activity_type="login", end_datetime=None):
        """Helper function to complete an existing user activity log entry."""

        # Do nothing if the max # of records is zero or None
        # (i.e. this functionality is disabled)
        if not settings.USER_LOG_MAX_RECORDS:
            return

        assert user is not None, "A valid user must always be specified."
        if not end_datetime:  # must be done outside the function header (else becomes static)
            end_datetime = datetime.now()
        activity_type = cls.get_activity_int(activity_type)

        cur_user_log_entry = get_object_or_None(cls,
                                                user=user,
                                                end_datetime=None)

        # No unstopped starts.  Start should have been called first!
        if not cur_user_log_entry:
            logging.warn(
                "%s: Had to create a user log entry, but STOPPING('%d')! @ %s"
                % (user.username, activity_type, end_datetime))
            cur_user_log_entry = cls.begin_user_activity(
                user=user,
                activity_type=activity_type,
                start_datetime=end_datetime)

        logging.debug("%s: Logging LOGOUT activity @ %s" %
                      (user.username, end_datetime))
        cur_user_log_entry.end_datetime = end_datetime
        cur_user_log_entry.save()  # total-seconds will be computed here.
示例#24
0
    def download_kmap_icons(knowledge_map):
        for key, value in knowledge_map["topics"].items():
            # Note: id here is retrieved from knowledge_map, so we're OK
            #   that we blew away ID in the topic tree earlier.
            if "icon_url" not in value:
                logging.warn("No icon URL for %s" % key)

            value["icon_url"] = iconfilepath + value["id"] + iconextension
            knowledge_map["topics"][key] = value

            out_path = data_path + "../" + value["icon_url"]
            if os.path.exists(out_path) and not force_icons:
                continue

            icon_khan_url = "http://www.khanacademy.org" + value["icon_url"]
            sys.stdout.write("Downloading icon %s from %s..." %
                             (value["id"], icon_khan_url))
            sys.stdout.flush()
            try:
                icon = requests.get(icon_khan_url)
            except Exception as e:
                sys.stdout.write("\n")  # complete the "downloading" output
                sys.stderr.write("Failed to download %-80s: %s\n" %
                                 (icon_khan_url, e))
                continue
            if icon.status_code == 200:
                iconfile = file(data_path + "../" + value["icon_url"], "w")
                iconfile.write(icon.content)
            else:
                sys.stdout.write(" [NOT FOUND]")
                value["icon_url"] = iconfilepath + defaulticon + iconextension
            sys.stdout.write(" done.\n")  # complete the "downloading" output
示例#25
0
def move_srts(lang_code):
    """
    Srts live in the locale directory, but that's not exposed at any URL.  So instead,
    we have to move the srts out to /static/subtitles/[lang_code]/
    """
    lang_code_ietf = lcode_to_ietf(lang_code)
    lang_code_django = lcode_to_django_dir(lang_code)

    subtitles_static_dir = os.path.join(settings.STATIC_ROOT, "subtitles")
    src_dir = os.path.join(LOCALE_ROOT, lang_code_django, "subtitles")
    dest_dir = get_srt_path(lang_code_django)
    ensure_dir(dest_dir)

    lang_subtitles = glob.glob(os.path.join(src_dir, "*.srt"))
    logging.info("Moving %d subtitles from %s to %s" % (len(lang_subtitles), src_dir, dest_dir))

    for fil in lang_subtitles:
        srt_dest_path = os.path.join(dest_dir, os.path.basename(fil))
        if os.path.exists(srt_dest_path):
            os.remove(srt_dest_path)  # we're going to replace any srt with a newer version
        shutil.move(fil, srt_dest_path)

    if not os.path.exists(src_dir):
        logging.info("No subtitles for language pack %s" % lang_code)
    elif os.listdir(src_dir):
        logging.warn("%s is not empty; will not remove.  Please check that all subtitles were moved." % src_dir)
    else:
        logging.info("Removing empty source directory (%s)." % src_dir)
        shutil.rmtree(src_dir)
示例#26
0
def add_syncing_models(models):
    """When sync is run, these models will be sync'd"""

    get_foreign_key_classes = lambda m: set([field.rel.to for field in m._meta.fields if isinstance(field, ForeignKey)])

    for model in models:
        if model in _syncing_models:
            logging.warn("We are already syncing model %s" % unicode(model))
            continue

        # When we add models to be synced, we need to make sure
        #   that models that depend on other models are synced AFTER
        #   the model it depends on has been synced.

        # Get the dependencies of the new model
        foreign_key_classes = get_foreign_key_classes(model)

        # Find all the existing models that this new model refers to.
        class_indices = [_syncing_models.index(cls) for cls in foreign_key_classes if cls in _syncing_models]

        # Insert just after the last dependency found,
        #   or at the front if no dependencies
        insert_after_idx = 1 + (max(class_indices) if class_indices else -1)

        # Before inserting, make sure that any models referencing *THIS* model
        # appear after this model.
        if [True for synmod in _syncing_models[0 : insert_after_idx - 1] if model in get_foreign_key_classes(synmod)]:
            raise Exception("Dependency loop detected in syncing models; cannot proceed.")

        # Now we're ready to insert.
        _syncing_models.insert(insert_after_idx + 1, model)
示例#27
0
    def recurse_nodes_to_extract_knowledge_map(node, node_cache):
        """
        Internal function for recursing the topic tree and building the knowledge map.
        Requires rebranding of metadata done by recurse_nodes function.
        """
        assert node["kind"] == "Topic"

        if node.get("in_knowledge_map", None):
            if node["slug"] not in knowledge_map["topics"]:
                logging.debug("Not in knowledge map: %s" % node["slug"])
                node["in_knowledge_map"] = False
                for node in node_cache["Topic"][node["slug"]]:
                    node["in_knowledge_map"] = False

            knowledge_topics[node["slug"]] = topic_tools.get_all_leaves(node, leaf_type="Exercise")

            if not knowledge_topics[node["slug"]]:
                sys.stderr.write("Removing topic from topic tree: no exercises. %s" % node["slug"])
                del knowledge_topics[node["slug"]]
                del knowledge_map["topics"][node["slug"]]
                node["in_knowledge_map"] = False
                for node in node_cache["Topic"][node["slug"]]:
                    node["in_knowledge_map"] = False
        else:
            if node["slug"] in knowledge_map["topics"]:
                sys.stderr.write("Removing topic from topic tree; does not belong. '%s'" % node["slug"])
                logging.warn("Removing from knowledge map: %s" % node["slug"])
                del knowledge_map["topics"][node["slug"]]

        for child in [n for n in node.get("children", []) if n["kind"] == "Topic"]:
            recurse_nodes_to_extract_knowledge_map(child, node_cache)
示例#28
0
    def compute_one_way(cls, zone, from_device, to_device):
        """
        """
        assert from_device.is_trusted() or from_device.get_zone() == zone
        # Trace back from this device to the zone-trusted device.
        chain = [{"device": from_device}]
        devices_in_chain = set([])

        for i in range(cls.MAX_CHAIN_LENGTH
                       ):  # max chain size: 1000 (avoids infinite loops)
            # We're going to traverse the chain backwards, until we get to
            #   the zone_owner (to_device), or a trusted device.
            cur_link = chain[-1]

            # Get a devicezone and/or zone invitation for the current device.
            cur_link["zone_invitation"] = get_object_or_None(
                ZoneInvitation,
                used_by=cur_link["device"].signed_by,
                revoked=False)
            if cur_link["zone_invitation"]:
                cur_link["zone_invitation"].verify(
                )  # make sure it's a valid invitation
            cur_link["device_zone"] = get_object_or_None(
                DeviceZone, device=cur_link["device"].signed_by, revoked=False)

            # Determine the next step.  Three terminal steps, one continuing step
            if not cur_link["zone_invitation"] and not cur_link["device_zone"]:
                # A break in the chain.  No connection between the device and the zone.
                break
            elif cur_link["device"] == to_device or cur_link[
                    "device"].is_trusted():
                logging.debug("Found end of chain!")
                break
            next_device = getattr(cur_link["zone_invitation"], "invited_by",
                                  None)
            next_device = next_device or getattr(cur_link["device_zone"],
                                                 "signed_by")
            if next_device in devices_in_chain:
                logging.warn("loop detected.")
                break
            else:
                # So far, we're OK--keep looking for the (valid) end of the chain
                assert next_device.is_trusted() or next_device.get_zone(
                ) == zone
                devices_in_chain.add(next_device)
                chain.append({"device": next_device})

        # Validate the chain of trust to the zone zone_owner
        terminal_link = chain[-1]
        terminal_device = terminal_link["device"]
        obj = terminal_link["zone_invitation"] or terminal_link["device_zone"]
        if obj and not (terminal_device.is_creator(obj)
                        or terminal_device.is_trusted()):
            logging.warn("Could not verify chain of trust.")
        return chain


# No device data gets "synced" through the same sync mechanism as data--it is only synced
#   through the special hand-shaking mechanism
示例#29
0
def update_all_distributed_callback(request):
    """
    """

    if request.method != "POST":
        raise PermissionDenied("Only POST allowed to this URL endpoint.")

    videos = json.loads(request.POST["video_logs"])
    exercises = json.loads(request.POST["exercise_logs"])
    user = FacilityUser.objects.get(id=request.POST["user_id"])
    node_cache = get_node_cache()
    # Save videos
    n_videos_uploaded = 0
    for video in videos:
        video_id = video['video_id']
        youtube_id = video['youtube_id']

        # Only save video logs for videos that we recognize.
        if video_id not in node_cache["Video"]:
            logging.warn("Skipping unknown video %s" % video_id)
            continue

        try:
            (vl, _) = VideoLog.get_or_initialize(user=user, video_id=video_id, youtube_id=youtube_id)
            for key,val in video.iteritems():
                setattr(vl, key, val)
            logging.debug("Saving video log for %s: %s" % (video_id, vl))
            vl.save()
            n_videos_uploaded += 1
        except KeyError:  #
            logging.error("Could not save video log for data with missing values: %s" % video)
        except Exception as e:
            error_message = "Unexpected error importing videos: %s" % e
            return JsonResponseMessageError(error_message)

    # Save exercises
    n_exercises_uploaded = 0
    for exercise in exercises:
        # Only save video logs for videos that we recognize.
        if exercise['exercise_id'] not in node_cache['Exercise']:
            logging.warn("Skipping unknown video %s" % exercise['exercise_id'])
            continue

        try:
            (el, _) = ExerciseLog.get_or_initialize(user=user, exercise_id=exercise["exercise_id"])
            for key,val in exercise.iteritems():
                setattr(el, key, val)
            logging.debug("Saving exercise log for %s: %s" % (exercise['exercise_id'], el))
            el.save()
            n_exercises_uploaded += 1
        except KeyError:
            logging.error("Could not save exercise log for data with missing values: %s" % exercise)
        except Exception as e:
            error_message = "Unexpected error importing exercises: %s" % e
            return JsonResponseMessageError(error_message)

    return JsonResponse({"success": "Uploaded %d exercises and %d videos" % (n_exercises_uploaded, n_videos_uploaded)})
示例#30
0
    def recurse_nodes(node, path=""):
        """
        Internal function for recursing over the topic tree, marking relevant metadata,
        and removing undesired attributes and children.
        """
        
        kind = node["kind"]

        # Only keep key data we can use
        for key in node.keys():
            if key not in attribute_whitelists[kind]:
                del node[key]

        # Fix up data
        if slug_key[kind] not in node:
            logging.warn("Could not find expected slug key (%s) on node: %s" % (slug_key[kind], node))
            node[slug_key[kind]] = node["id"]  # put it SOMEWHERE.
        node["slug"] = node[slug_key[kind]] if node[slug_key[kind]] != "root" else ""
        node["id"] = node["slug"]  # these used to be the same; now not. Easier if they stay the same (issue #233)

        node["path"] = path + topic_tools.kind_slugs[kind] + node["slug"] + "/"
        node["title"] = node[title_key[kind]]


        kinds = set([kind])

        # For each exercise, need to get related videos
        if kind == "Exercise":
            related_video_readable_ids = [vid["readable_id"] for vid in download_khan_data("http://www.khanacademy.org/api/v1/exercises/%s/videos" % node["name"], node["name"] + ".json")]
            node["related_video_readable_ids"] = related_video_readable_ids
            exercise = {
                "slug": node[slug_key[kind]],
                "title": node[title_key[kind]],
                "path": node["path"],
            }
            for video_id in node.get("related_video_readable_ids", []):
                related_exercise[video_id] = exercise

        # Recurse through children, remove any blacklisted items
        children_to_delete = []
        for i, child in enumerate(node.get("children", [])):
            child_kind = child.get("kind", None)
            if child_kind in kind_blacklist:
                children_to_delete.append(i)
                continue
            if child[slug_key[child_kind]] in slug_blacklist:
                children_to_delete.append(i)
                continue
            kinds = kinds.union(recurse_nodes(child, node["path"]))
        for i in reversed(children_to_delete):
            del node["children"][i]

        # Mark on topics whether they contain Videos, Exercises, or both
        if kind == "Topic":
            node["contains"] = list(kinds)

        return kinds
示例#31
0
    def get_shell_script(self, cmd_glob, location=None):
        if not location:
            location = self.working_dir + '/kalite'
        cmd_glob += system_script_extension()

        # Find the command
        cmd = glob.glob(location + "/" + cmd_glob)
        if len(cmd) > 1:
            raise CommandError("Multiple commands found (%s)?  Should choose based on platform, but ... how to do in Python?  Contact us to implement this!" % cmd_glob)
        elif len(cmd)==1:
            cmd = cmd[0]
        else:
            cmd = None
            logging.warn("No command found: (%s in %s)" % (cmd_glob, location))
        return cmd
示例#32
0
def get_file2lang_map(force=False):
    """Map from youtube_id to language code"""
    global YT2LANG_MAP
    if YT2LANG_MAP is None or force:
        YT2LANG_MAP = {}
        for lang_code, dic in get_dubbed_video_map().iteritems():
            for dubbed_youtube_id in dic.values():
                if dubbed_youtube_id in YT2LANG_MAP:
                    # Sanity check, but must be failsafe, since we don't control these data
                    if YT2LANG_MAP[dubbed_youtube_id] == lang_code:
                        logging.warn("Duplicate entry found in %s language map for dubbed video %s" % (lang_code, dubbed_youtube_id))
                    else:
                        logging.error("Conflicting entry found in language map for video %s; overwriting previous entry of %s to %s." % (dubbed_youtube_id, YT2LANG_MAP[dubbed_youtube_id], lang_code))
                YT2LANG_MAP[dubbed_youtube_id] = lang_code
    return YT2LANG_MAP
示例#33
0
    def clean_orphaned_polylines(knowledge_map):
        """
        We remove some topics (without leaves); need to remove polylines associated with these topics.
        """
        all_topic_points = [(km["x"],km["y"]) for km in knowledge_map["topics"].values()]

        polylines_to_delete = []
        for li, polyline in enumerate(knowledge_map["polylines"]):
            if any(["x" for pt in polyline["path"] if (pt["x"], pt["y"]) not in all_topic_points]):
                polylines_to_delete.append(li)

        logging.warn("Removing %s of %s polylines in top-level knowledge map" % (len(polylines_to_delete), len(knowledge_map["polylines"])))
        for i in reversed(polylines_to_delete):
            del knowledge_map["polylines"][i]

        return knowledge_map
示例#34
0
def create_cache(path=None, url_name=None, cache=None, force=False):
    """Create a cache entry"""
    
    assert (path or url_name) and not (path and url_name), "Must have path or url_name parameter, but not both"
    if not cache:
        cache = get_web_cache()

    if not path:
        path = reverse(url_name)
    if force and has_cache_key(path=path, cache=cache):
        expire_page(path=path)
        assert not has_cache_key(path=path, cache=cache)
    if not has_cache_key(path=path, cache=cache):
        Client().get(path)

    if not has_cache_key(path=path, cache=cache):
        logging.warn("Did not create cache entry for %s" % path)
示例#35
0
    def recurse_nodes_to_delete_exercise(node):
        """
        Internal function for recursing the topic tree and removing new exercises.
        Requires rebranding of metadata done by recurse_nodes function.
        Returns a list of exercise slugs for the exercises that were deleted.
        """
        # Stop recursing when we hit leaves
        if node["kind"] != "Topic":
            return []

        slugs_deleted = []

        children_to_delete = []
        for ci, child in enumerate(node.get("children", [])):
            # Mark all unrecognized exercises for deletion
            if child["kind"] == "Exercise":
                if not os.path.exists(exercise_path % child["slug"]):
                    children_to_delete.append(ci)

            # Recurse over children to delete
            elif child.get("children", None):
                slugs_deleted += recurse_nodes_to_delete_exercise(child)

                if not child.get("children", None):
                    # Delete children without children (all their children were removed)
                    logging.warn("Removing now-childless topic node '%s'" %
                                 child["slug"])
                    children_to_delete.append(ci)
                elif not any([
                        ch["kind"] == "Exercise"
                        or "Exercise" in ch.get("contains", [])
                        for ch in child["children"]
                ]):
                    # If there are no longer exercises, be honest about it
                    child["contains"] = list(
                        set(child["contains"]) - set(["Exercise"]))

        # Do the actual deletion
        for i in reversed(children_to_delete):
            logging.warn("Deleting unknown exercise %s" %
                         node["children"][i]["slug"])
            del node["children"][i]

        return slugs_deleted
示例#36
0
    def begin_user_activity(cls,
                            user,
                            activity_type="login",
                            start_datetime=None):
        """Helper function to create a user activity log entry."""

        # Do nothing if the max # of records is zero or None
        # (i.e. this functionality is disabled)
        if not settings.USER_LOG_MAX_RECORDS:
            return

        assert user is not None, "A valid user must always be specified."
        if not start_datetime:  # must be done outside the function header (else becomes static)
            start_datetime = datetime.now()
        activity_type = cls.get_activity_int(activity_type)
        cur_user_log_entry = get_object_or_None(cls,
                                                user=user,
                                                end_datetime=None)

        logging.debug("%s: BEGIN activity(%d) @ %s" %
                      (user.username, activity_type, start_datetime))

        # Seems we're logging in without logging out of the previous.
        #   Best thing to do is simulate a login
        #   at the previous last update time.
        #
        # Note: this can be a recursive call
        if cur_user_log_entry:
            logging.warn("%s: END activity on a begin @ %s" %
                         (user.username, start_datetime))
            cls.end_user_activity(
                user=user,
                activity_type=activity_type,
                end_datetime=cur_user_log_entry.last_active_datetime)

        # Create a new entry
        cur_user_log_entry = cls(user=user,
                                 activity_type=activity_type,
                                 start_datetime=start_datetime,
                                 last_active_datetime=start_datetime)

        cur_user_log_entry.save()
        return cur_user_log_entry
示例#37
0
def get_dubbed_video_map(lang_code=None, force=False):
    """
    Stores a key per language.  Value is a dictionary between video_id and (dubbed) youtube_id
    """
    global DUBBED_VIDEO_MAP, DUBBED_VIDEO_MAP_RAW, DUBBED_VIDEOS_MAPPING_FILEPATH

    if DUBBED_VIDEO_MAP is None or force:
        try:
            if not os.path.exists(DUBBED_VIDEOS_MAPPING_FILEPATH) or force:
                try:
                    if settings.CENTRAL_SERVER:
                        # Never call commands that could fail from the distributed server.
                        #   Always create a central server API to abstract things (see below)
                        logging.debug("Generating dubbed video mappings.")
                        call_command("generate_dubbed_video_mappings", force=force)
                    else:
                        # Generate from the spreadsheet
                        response = requests.get("http://%s/api/i18n/videos/dubbed_video_map" % (settings.CENTRAL_SERVER_HOST))
                        response.raise_for_status()
                        with open(DUBBED_VIDEOS_MAPPING_FILEPATH, "wb") as fp:
                            fp.write(response.content.decode('utf-8'))  # wait until content has been confirmed before opening file.
                except Exception as e:
                    if not os.path.exists(DUBBED_VIDEOS_MAPPING_FILEPATH):
                        # Unrecoverable error, so raise
                        raise
                    elif DUBBED_VIDEO_MAP:
                        # No need to recover--allow the downstream dude to catch the error.
                        raise
                    else:
                        # We can recover by NOT forcing reload.
                        logging.warn("%s" % e)

            DUBBED_VIDEO_MAP_RAW = softload_json(DUBBED_VIDEOS_MAPPING_FILEPATH, raises=True)
        except Exception as e:
            logging.info("Failed to get dubbed video mappings; defaulting to empty.")
            DUBBED_VIDEO_MAP_RAW = {}  # setting this will avoid triggering reload on every call

        DUBBED_VIDEO_MAP = {}
        for lang_name, video_map in DUBBED_VIDEO_MAP_RAW.iteritems():
            logging.debug("Adding dubbed video map entry for %s (name=%s)" % (get_langcode_map(lang_name), lang_name))
            DUBBED_VIDEO_MAP[get_langcode_map(lang_name)] = video_map

    return DUBBED_VIDEO_MAP.get(lang_code, {}) if lang_code else DUBBED_VIDEO_MAP
示例#38
0
    def recurse_nodes_to_remove_childless_nodes(node):
        """
        When we remove exercises, we remove dead-end topics.
        Khan just sends us dead-end topics, too.
        Let's remove those too.
        """
        children_to_delete = []
        for ci, child in enumerate(node.get("children", [])):
            # Mark all unrecognized exercises for deletion
            if child["kind"] != "Topic":
                continue

            recurse_nodes_to_remove_childless_nodes(child)

            if not child.get("children"):
                children_to_delete.append(ci)
                logging.warn("Removing KA childless topic: %s" % child["slug"])

        for ci in reversed(children_to_delete):
            del node["children"][ci]
示例#39
0
def add_syncing_models(models):
    """When sync is run, these models will be sync'd"""

    get_foreign_key_classes = lambda m: set([
        field.rel.to for field in m._meta.fields
        if isinstance(field, ForeignKey)
    ])

    for model in models:
        if model in _syncing_models:
            logging.warn("We are already syncing model %s" % unicode(model))
            continue

        # When we add models to be synced, we need to make sure
        #   that models that depend on other models are synced AFTER
        #   the model it depends on has been synced.

        # Get the dependencies of the new model
        foreign_key_classes = get_foreign_key_classes(model)

        # Find all the existing models that this new model refers to.
        class_indices = [
            _syncing_models.index(cls) for cls in foreign_key_classes
            if cls in _syncing_models
        ]

        # Insert just after the last dependency found,
        #   or at the front if no dependencies
        insert_after_idx = 1 + (max(class_indices) if class_indices else -1)

        # Before inserting, make sure that any models referencing *THIS* model
        # appear after this model.
        if [
                True for synmod in _syncing_models[0:insert_after_idx - 1]
                if model in get_foreign_key_classes(synmod)
        ]:
            raise Exception(
                "Dependency loop detected in syncing models; cannot proceed.")

        # Now we're ready to insert.
        _syncing_models.insert(insert_after_idx + 1, model)
示例#40
0
def validate_language_map(lang_codes):
    """
    This function will tell you any blockers that you'll hit while
    running this command.

    All srt languages must exist in the language map; missing languages
    will cause errors during command running (which can be long).
    This function avoids that problem by doing the above consistency check.
    """
    lang_codes = lang_codes or get_all_prepped_lang_codes()
    missing_langs = []
    for lang_code in lang_codes:
        try:
            get_language_name(lcode_to_ietf(lang_code), error_on_missing=True)
        except LanguageNotFoundError:
            missing_langs.append(lang_code)

    if missing_langs:
        logging.warn("Please add the following language codes to %s:\n\t%s" % (
            LANG_LOOKUP_FILEPATH, missing_langs,
        ))
示例#41
0
def get_new_counts(data_path, download_path, language_code):
    """Write a new dictionary of srt file counts in respective download folders"""

    language_subtitle_count = {}
    subtitles_path = "%s%s/subtitles/" % (download_path, language_code)
    lang_name = get_language_name(language_code)

    try:
        count = len(glob.glob("%s/*.srt" % subtitles_path))
        logging.info("%4d subtitles for %-20s" % (count, lang_name))

        language_subtitle_count[lang_name] = {}
        language_subtitle_count[lang_name]["count"] = count
        language_subtitle_count[lang_name]["code"] = language_code
    except LanguageNameDoesNotExist as ldne:
        logging.warn(ldne)
    except:
        logging.info("%-4s subtitles for %-20s" % ("No", lang_name))

    write_new_json(language_subtitle_count, data_path)
    update_language_list(language_subtitle_count, data_path)
示例#42
0
    def scrub_knowledge_map(knowledge_map, node_cache):
        """
        Some topics in the knowledge map, we don't keep in our topic tree / node cache.
        Eliminate them from the knowledge map here.
        """
        for slug in knowledge_map["topics"].keys():
            nodecache_node = node_cache["Topic"].get(slug)
            topictree_node = topic_tools.get_topic_by_path(
                node_cache["Topic"][slug]["path"], root_node=topictree)

            if not nodecache_node or not topictree_node:
                logging.warn("Removing unrecognized knowledge_map topic '%s'" %
                             slug)
            elif not topictree_node.get("children"):
                logging.warn(
                    "Removing knowledge_map topic '%s' with no children." %
                    slug)
            elif not "Exercise" in topictree_node.get("contains"):
                logging.warn(
                    "Removing knowledge_map topic '%s' with no exercises." %
                    slug)
            else:
                continue

            del knowledge_map["topics"][slug]
            nodecache_node["in_knowledge_map"] = False
            topictree_node["in_knowledge_map"] = False
示例#43
0
def get_new_counts(language_code,
                   data_path=settings.SUBTITLES_DATA_ROOT,
                   locale_root=LOCALE_ROOT):
    """Write a new dictionary of srt file counts in respective download folders"""
    language_subtitle_count = {}
    subtitles_path = get_srt_path(language_code)
    lang_name = get_language_name(language_code)

    try:
        count = len(glob.glob("%s/*.srt" % subtitles_path))
        logging.info("%4d subtitles for %-20s" % (count, lang_name))

        language_subtitle_count[lang_name] = {}
        language_subtitle_count[lang_name]["count"] = count
        language_subtitle_count[lang_name]["code"] = language_code
    except LanguageNameDoesNotExist as ldne:
        logging.warn(ldne)
    except:
        logging.info("%-4s subtitles for %-20s" % ("No", lang_name))

    write_new_json(language_subtitle_count, data_path)
    return language_subtitle_count[lang_name].get("count")
示例#44
0
    def recurse_nodes(node, path=""):
        """
        Internal function for recursing over the topic tree, marking relevant metadata,
        and removing undesired attributes and children.
        """

        kind = node["kind"]

        # Only keep key data we can use
        for key in node.keys():
            if key not in attribute_whitelists[kind]:
                del node[key]

        # Fix up data
        if slug_key[kind] not in node:
            logging.warn("Could not find expected slug key (%s) on node: %s" %
                         (slug_key[kind], node))
            node[slug_key[kind]] = node["id"]  # put it SOMEWHERE.
        node["slug"] = node[
            slug_key[kind]] if node[slug_key[kind]] != "root" else ""
        node["id"] = node[
            "slug"]  # these used to be the same; now not. Easier if they stay the same (issue #233)

        node["path"] = path + topic_tools.kind_slugs[kind] + node["slug"] + "/"
        node["title"] = node[title_key[kind]]

        kinds = set([kind])

        # For each exercise, need to get related videos
        #   and compute base points
        if kind == "Exercise":
            # compute base points
            # Paste points onto the exercise
            node["basepoints"] = ceil(7 *
                                      log(node["seconds_per_fast_problem"]))

            # Related videos
            related_video_readable_ids = [
                vid["readable_id"] for vid in download_khan_data(
                    "http://www.khanacademy.org/api/v1/exercises/%s/videos" %
                    node["name"], node["name"] + ".json")
            ]
            node["related_video_readable_ids"] = related_video_readable_ids

            exercise = {
                "slug": node[slug_key[kind]],
                "title": node[title_key[kind]],
                "path": node["path"],
            }
            for video_id in node.get("related_video_readable_ids", []):
                related_exercise[video_id] = exercise

        # Recurse through children, remove any blacklisted items
        children_to_delete = []
        for i, child in enumerate(node.get("children", [])):
            child_kind = child.get("kind", None)
            if child_kind in kind_blacklist:
                children_to_delete.append(i)
                continue
            if child[slug_key[child_kind]] in slug_blacklist:
                children_to_delete.append(i)
                continue
            if child_kind == "Video" and set(["mp4", "png"]) - set(
                    child.get("download_urls", {}).keys()):
                # for now, since we expect the missing videos to be filled in soon,
                #   we won't remove these nodes
                sys.stderr.write(
                    "WARNING: No download link for video: %s: authors='%s'\n" %
                    (child["youtube_id"], child["author_names"]))
                # children_to_delete.append(i)
                # continue
            kinds = kinds.union(recurse_nodes(child, node["path"]))
        for i in reversed(children_to_delete):
            del node["children"][i]

        # Mark on topics whether they contain Videos, Exercises, or both
        if kind == "Topic":
            node["contains"] = list(kinds)

        return kinds
示例#45
0
def update_all_distributed_callback(request):
    """
    """

    if request.method != "POST":
        raise PermissionDenied("Only POST allowed to this URL endpoint.")

    videos = json.loads(request.POST["video_logs"])
    exercises = json.loads(request.POST["exercise_logs"])
    user = FacilityUser.objects.get(id=request.POST["user_id"])

    # Save videos
    n_videos_uploaded = 0
    for video in videos:
        youtube_id = video['youtube_id']

        # Only save video logs for videos that we recognize.
        if youtube_id not in ID2SLUG_MAP:
            logging.warn("Skipping unknown video %s" % youtube_id)
            continue

        try:
            (vl,
             _) = VideoLog.get_or_initialize(user=user,
                                             youtube_id=video["youtube_id"])
            for key, val in video.iteritems():
                setattr(vl, key, val)
            logging.debug("Saving video log for %s: %s" % (youtube_id, vl))
            vl.save()
            n_videos_uploaded += 1
        except KeyError:  #
            logging.error(
                "Could not save video log for data with missing values: %s" %
                video)
        except Exception as e:
            error_message = "Unexpected error importing videos: %s" % e
            return JsonResponse({"error": error_message}, status=500)

    # Save exercises
    n_exercises_uploaded = 0
    for exercise in exercises:
        # Only save video logs for videos that we recognize.
        if exercise['exercise_id'] not in NODE_CACHE['Exercise']:
            logging.warn("Skipping unknown video %s" % exercise['exercise_id'])
            continue

        try:
            (el, _) = ExerciseLog.get_or_initialize(
                user=user, exercise_id=exercise["exercise_id"])
            for key, val in exercise.iteritems():
                setattr(el, key, val)
            logging.debug("Saving exercise log for %s: %s" %
                          (exercise['exercise_id'], el))
            el.save()
            n_exercises_uploaded += 1
        except KeyError:
            logging.error(
                "Could not save exercise log for data with missing values: %s"
                % exercise)
        except Exception as e:
            error_message = "Unexpected error importing exercises: %s" % e
            return JsonResponse({"error": error_message}, status=500)

    return JsonResponse({
        "success":
        "Uploaded %d exercises and %d videos" %
        (n_exercises_uploaded, n_videos_uploaded)
    })