Пример #1
0
    def __init__(self,
                 xmlstore,
                 course_id,
                 course_dir,
                 policy,
                 error_tracker,
                 parent_tracker,
                 load_error_modules=True,
                 **kwargs):
        """
        A class that handles loading from xml.  Does some munging to ensure that
        all elements have unique slugs.

        xmlstore: the XMLModuleStore to store the loaded modules in
        """
        self.unnamed = defaultdict(
            int)  # category -> num of new url_names for that category
        self.used_names = defaultdict(set)  # category -> set of used url_names
        self.org, self.course, self.url_name = course_id.split('/')
        # cdodge: adding the course_id as passed in for later reference rather than having to recomine the org/course/url_name
        self.course_id = course_id
        self.load_error_modules = load_error_modules

        def process_xml(xml):
            """Takes an xml string, and returns a XModuleDescriptor created from
            that xml.
            """
            def make_name_unique(xml_data):
                """
                Make sure that the url_name of xml_data is unique.  If a previously loaded
                unnamed descriptor stole this element's url_name, create a new one.

                Removes 'slug' attribute if present, and adds or overwrites the 'url_name' attribute.
                """
                # VS[compat]. Take this out once course conversion is done (perhaps leave the uniqueness check)

                # tags that really need unique names--they store (or should store) state.
                need_uniq_names = ('problem', 'sequential', 'video', 'course',
                                   'chapter', 'videosequence', 'poll_question',
                                   'timelimit')

                attr = xml_data.attrib
                tag = xml_data.tag
                id = lambda x: x
                # Things to try to get a name, in order  (key, cleaning function, remove key after reading?)
                lookups = [('url_name', id, False), ('slug', id, True),
                           ('name', Location.clean, False),
                           ('display_name', Location.clean, False)]

                url_name = None
                for key, clean, remove in lookups:
                    if key in attr:
                        url_name = clean(attr[key])
                        if remove:
                            del attr[key]
                        break

                def looks_like_fallback(url_name):
                    """Does this look like something that came from fallback_name()?"""
                    return (url_name is not None and url_name.startswith(tag)
                            and re.search('[0-9a-fA-F]{12}$', url_name))

                def fallback_name(orig_name=None):
                    """Return the fallback name for this module.  This is a function instead of a variable
                    because we want it to be lazy."""
                    if looks_like_fallback(orig_name):
                        # We're about to re-hash, in case something changed, so get rid of the tag_ and hash
                        orig_name = orig_name[len(tag) + 1:-12]
                    # append the hash of the content--the first 12 bytes should be plenty.
                    orig_name = "_" + orig_name if orig_name not in (
                        None, "") else ""
                    xml_bytes = xml.encode('utf8')
                    return tag + orig_name + "_" + hashlib.sha1(
                        xml_bytes).hexdigest()[:12]

                # Fallback if there was nothing we could use:
                if url_name is None or url_name == "":
                    url_name = fallback_name()
                    # Don't log a warning--we don't need this in the log.  Do
                    # put it in the error tracker--content folks need to see it.

                    if tag in need_uniq_names:
                        error_tracker(
                            "PROBLEM: no name of any kind specified for {tag}.  Student "
                            "state will not be properly tracked for this module.  Problem xml:"
                            " '{xml}...'".format(tag=tag, xml=xml[:100]))
                    else:
                        # TODO (vshnayder): We may want to enable this once course repos are cleaned up.
                        # (or we may want to give up on the requirement for non-state-relevant issues...)
                        # error_tracker("WARNING: no name specified for module. xml='{0}...'".format(xml[:100]))
                        pass

                # Make sure everything is unique
                if url_name in self.used_names[tag]:
                    # Always complain about modules that store state.  If it
                    # doesn't store state, don't complain about things that are
                    # hashed.
                    if tag in need_uniq_names:
                        msg = (
                            "Non-unique url_name in xml.  This may break state tracking for content."
                            "  url_name={0}.  Content={1}".format(
                                url_name, xml[:100]))
                        error_tracker("PROBLEM: " + msg)
                        log.warning(msg)
                        # Just set name to fallback_name--if there are multiple things with the same fallback name,
                        # they are actually identical, so it's fragile, but not immediately broken.

                        # TODO (vshnayder): if the tag is a pointer tag, this will
                        # break the content because we won't have the right link.
                        # That's also a legitimate attempt to reuse the same content
                        # from multiple places.  Once we actually allow that, we'll
                        # need to update this to complain about non-unique names for
                        # definitions, but allow multiple uses.
                        url_name = fallback_name(url_name)

                self.used_names[tag].add(url_name)
                xml_data.set('url_name', url_name)

            try:
                # VS[compat]
                # TODO (cpennington): Remove this once all fall 2012 courses
                # have been imported into the cms from xml
                xml = clean_out_mako_templating(xml)
                xml_data = etree.fromstring(xml)

                make_name_unique(xml_data)

                descriptor = XModuleDescriptor.load_from_xml(
                    etree.tostring(xml_data, encoding='unicode'), self,
                    self.org, self.course, xmlstore.default_class)
            except Exception as err:
                if not self.load_error_modules:
                    raise

                # Didn't load properly.  Fall back on loading as an error
                # descriptor.  This should never error due to formatting.

                msg = "Error loading from xml. " + str(err)[:200]
                log.warning(msg)
                # Normally, we don't want lots of exception traces in our logs from common
                # content problems.  But if you're debugging the xml loading code itself,
                # uncomment the next line.
                log.exception(msg)

                self.error_tracker(msg)
                err_msg = msg + "\n" + exc_info_to_str(sys.exc_info())
                descriptor = ErrorDescriptor.from_xml(xml, self, self.org,
                                                      self.course, err_msg)

            setattr(descriptor, 'data_dir', course_dir)

            xmlstore.modules[course_id][descriptor.location] = descriptor

            if hasattr(descriptor, 'children'):
                for child in descriptor.get_children():
                    parent_tracker.add_parent(child.location,
                                              descriptor.location)
            return descriptor

        render_template = lambda: ''
        # TODO (vshnayder): we are somewhat architecturally confused in the loading code:
        # load_item should actually be get_instance, because it expects the course-specific
        # policy to be loaded.  For now, just add the course_id here...
        load_item = lambda location: xmlstore.get_instance(course_id, location)
        resources_fs = OSFS(xmlstore.data_dir / course_dir)

        MakoDescriptorSystem.__init__(self, load_item, resources_fs,
                                      error_tracker, render_template, **kwargs)
        XMLParsingSystem.__init__(self, load_item, resources_fs, error_tracker,
                                  process_xml, policy, **kwargs)
Пример #2
0
    def __init__(self, xmlstore, course_id, course_dir,
                 policy, error_tracker, parent_tracker,
                 load_error_modules=True, **kwargs):
        """
        A class that handles loading from xml.  Does some munging to ensure that
        all elements have unique slugs.

        xmlstore: the XMLModuleStore to store the loaded modules in
        """
        self.unnamed = defaultdict(int)  # category -> num of new url_names for that category
        self.used_names = defaultdict(set)  # category -> set of used url_names
        self.org, self.course, self.url_name = course_id.split('/')
        # cdodge: adding the course_id as passed in for later reference rather than having to recomine the org/course/url_name
        self.course_id = course_id
        self.load_error_modules = load_error_modules

        def process_xml(xml):
            """Takes an xml string, and returns a XModuleDescriptor created from
            that xml.
            """

            def make_name_unique(xml_data):
                """
                Make sure that the url_name of xml_data is unique.  If a previously loaded
                unnamed descriptor stole this element's url_name, create a new one.

                Removes 'slug' attribute if present, and adds or overwrites the 'url_name' attribute.
                """
                # VS[compat]. Take this out once course conversion is done (perhaps leave the uniqueness check)

                # tags that really need unique names--they store (or should store) state.
                need_uniq_names = ('problem', 'sequential', 'video', 'course', 'chapter',
                                   'videosequence', 'poll_question', 'timelimit')

                attr = xml_data.attrib
                tag = xml_data.tag
                id = lambda x: x
                # Things to try to get a name, in order  (key, cleaning function, remove key after reading?)
                lookups = [('url_name', id, False),
                           ('slug', id, True),
                           ('name', Location.clean, False),
                           ('display_name', Location.clean, False)]

                url_name = None
                for key, clean, remove in lookups:
                    if key in attr:
                        url_name = clean(attr[key])
                        if remove:
                            del attr[key]
                        break

                def looks_like_fallback(url_name):
                    """Does this look like something that came from fallback_name()?"""
                    return (url_name is not None
                            and url_name.startswith(tag)
                            and re.search('[0-9a-fA-F]{12}$', url_name))

                def fallback_name(orig_name=None):
                    """Return the fallback name for this module.  This is a function instead of a variable
                    because we want it to be lazy."""
                    if looks_like_fallback(orig_name):
                        # We're about to re-hash, in case something changed, so get rid of the tag_ and hash
                        orig_name = orig_name[len(tag) + 1:-12]
                    # append the hash of the content--the first 12 bytes should be plenty.
                    orig_name = "_" + orig_name if orig_name not in (None, "") else ""
                    xml_bytes = xml.encode('utf8')
                    return tag + orig_name + "_" + hashlib.sha1(xml_bytes).hexdigest()[:12]

                # Fallback if there was nothing we could use:
                if url_name is None or url_name == "":
                    url_name = fallback_name()
                    # Don't log a warning--we don't need this in the log.  Do
                    # put it in the error tracker--content folks need to see it.

                    if tag in need_uniq_names:
                        error_tracker("PROBLEM: no name of any kind specified for {tag}.  Student "
                                      "state will not be properly tracked for this module.  Problem xml:"
                                      " '{xml}...'".format(tag=tag, xml=xml[:100]))
                    else:
                        # TODO (vshnayder): We may want to enable this once course repos are cleaned up.
                        # (or we may want to give up on the requirement for non-state-relevant issues...)
                        # error_tracker("WARNING: no name specified for module. xml='{0}...'".format(xml[:100]))
                        pass

                # Make sure everything is unique
                if url_name in self.used_names[tag]:
                    # Always complain about modules that store state.  If it
                    # doesn't store state, don't complain about things that are
                    # hashed.
                    if tag in need_uniq_names:
                        msg = ("Non-unique url_name in xml.  This may break state tracking for content."
                               "  url_name={0}.  Content={1}".format(url_name, xml[:100]))
                        error_tracker("PROBLEM: " + msg)
                        log.warning(msg)
                        # Just set name to fallback_name--if there are multiple things with the same fallback name,
                        # they are actually identical, so it's fragile, but not immediately broken.

                        # TODO (vshnayder): if the tag is a pointer tag, this will
                        # break the content because we won't have the right link.
                        # That's also a legitimate attempt to reuse the same content
                        # from multiple places.  Once we actually allow that, we'll
                        # need to update this to complain about non-unique names for
                        # definitions, but allow multiple uses.
                        url_name = fallback_name(url_name)

                self.used_names[tag].add(url_name)
                xml_data.set('url_name', url_name)

            try:
                # VS[compat]
                # TODO (cpennington): Remove this once all fall 2012 courses
                # have been imported into the cms from xml
                xml = clean_out_mako_templating(xml)
                xml_data = etree.fromstring(xml)

                make_name_unique(xml_data)

                descriptor = XModuleDescriptor.load_from_xml(
                    etree.tostring(xml_data, encoding='unicode'), self, self.org,
                    self.course, xmlstore.default_class)
            except Exception as err:
                if not self.load_error_modules:
                    raise

                # Didn't load properly.  Fall back on loading as an error
                # descriptor.  This should never error due to formatting.

                msg = "Error loading from xml. " + str(err)[:200]
                log.warning(msg)
                # Normally, we don't want lots of exception traces in our logs from common
                # content problems.  But if you're debugging the xml loading code itself,
                # uncomment the next line.
                log.exception(msg)

                self.error_tracker(msg)
                err_msg = msg + "\n" + exc_info_to_str(sys.exc_info())
                descriptor = ErrorDescriptor.from_xml(
                    xml,
                    self,
                    self.org,
                    self.course,
                    err_msg
                )

            setattr(descriptor, 'data_dir', course_dir)

            xmlstore.modules[course_id][descriptor.location] = descriptor

            if hasattr(descriptor, 'children'):
                for child in descriptor.get_children():
                    parent_tracker.add_parent(child.location, descriptor.location)
            return descriptor

        render_template = lambda: ''
        # TODO (vshnayder): we are somewhat architecturally confused in the loading code:
        # load_item should actually be get_instance, because it expects the course-specific
        # policy to be loaded.  For now, just add the course_id here...
        load_item = lambda location: xmlstore.get_instance(course_id, location)
        resources_fs = OSFS(xmlstore.data_dir / course_dir)

        MakoDescriptorSystem.__init__(self, load_item, resources_fs,
                                      error_tracker, render_template, **kwargs)
        XMLParsingSystem.__init__(self, load_item, resources_fs,
                                  error_tracker, process_xml, policy, **kwargs)