def __init__(self, xmlstore, course_id, course_dir, policy, error_tracker, parent_tracker, load_error_modules=True, **kwargs): """ A class that handles loading from xml. Does some munging to ensure that all elements have unique slugs. xmlstore: the XMLModuleStore to store the loaded modules in """ self.unnamed = defaultdict( int) # category -> num of new url_names for that category self.used_names = defaultdict(set) # category -> set of used url_names self.org, self.course, self.url_name = course_id.split('/') # cdodge: adding the course_id as passed in for later reference rather than having to recomine the org/course/url_name self.course_id = course_id self.load_error_modules = load_error_modules def process_xml(xml): """Takes an xml string, and returns a XModuleDescriptor created from that xml. """ def make_name_unique(xml_data): """ Make sure that the url_name of xml_data is unique. If a previously loaded unnamed descriptor stole this element's url_name, create a new one. Removes 'slug' attribute if present, and adds or overwrites the 'url_name' attribute. """ # VS[compat]. Take this out once course conversion is done (perhaps leave the uniqueness check) # tags that really need unique names--they store (or should store) state. need_uniq_names = ('problem', 'sequential', 'video', 'course', 'chapter', 'videosequence', 'poll_question', 'timelimit') attr = xml_data.attrib tag = xml_data.tag id = lambda x: x # Things to try to get a name, in order (key, cleaning function, remove key after reading?) lookups = [('url_name', id, False), ('slug', id, True), ('name', Location.clean, False), ('display_name', Location.clean, False)] url_name = None for key, clean, remove in lookups: if key in attr: url_name = clean(attr[key]) if remove: del attr[key] break def looks_like_fallback(url_name): """Does this look like something that came from fallback_name()?""" return (url_name is not None and url_name.startswith(tag) and re.search('[0-9a-fA-F]{12}$', url_name)) def fallback_name(orig_name=None): """Return the fallback name for this module. This is a function instead of a variable because we want it to be lazy.""" if looks_like_fallback(orig_name): # We're about to re-hash, in case something changed, so get rid of the tag_ and hash orig_name = orig_name[len(tag) + 1:-12] # append the hash of the content--the first 12 bytes should be plenty. orig_name = "_" + orig_name if orig_name not in ( None, "") else "" xml_bytes = xml.encode('utf8') return tag + orig_name + "_" + hashlib.sha1( xml_bytes).hexdigest()[:12] # Fallback if there was nothing we could use: if url_name is None or url_name == "": url_name = fallback_name() # Don't log a warning--we don't need this in the log. Do # put it in the error tracker--content folks need to see it. if tag in need_uniq_names: error_tracker( "PROBLEM: no name of any kind specified for {tag}. Student " "state will not be properly tracked for this module. Problem xml:" " '{xml}...'".format(tag=tag, xml=xml[:100])) else: # TODO (vshnayder): We may want to enable this once course repos are cleaned up. # (or we may want to give up on the requirement for non-state-relevant issues...) # error_tracker("WARNING: no name specified for module. xml='{0}...'".format(xml[:100])) pass # Make sure everything is unique if url_name in self.used_names[tag]: # Always complain about modules that store state. If it # doesn't store state, don't complain about things that are # hashed. if tag in need_uniq_names: msg = ( "Non-unique url_name in xml. This may break state tracking for content." " url_name={0}. Content={1}".format( url_name, xml[:100])) error_tracker("PROBLEM: " + msg) log.warning(msg) # Just set name to fallback_name--if there are multiple things with the same fallback name, # they are actually identical, so it's fragile, but not immediately broken. # TODO (vshnayder): if the tag is a pointer tag, this will # break the content because we won't have the right link. # That's also a legitimate attempt to reuse the same content # from multiple places. Once we actually allow that, we'll # need to update this to complain about non-unique names for # definitions, but allow multiple uses. url_name = fallback_name(url_name) self.used_names[tag].add(url_name) xml_data.set('url_name', url_name) try: # VS[compat] # TODO (cpennington): Remove this once all fall 2012 courses # have been imported into the cms from xml xml = clean_out_mako_templating(xml) xml_data = etree.fromstring(xml) make_name_unique(xml_data) descriptor = XModuleDescriptor.load_from_xml( etree.tostring(xml_data, encoding='unicode'), self, self.org, self.course, xmlstore.default_class) except Exception as err: if not self.load_error_modules: raise # Didn't load properly. Fall back on loading as an error # descriptor. This should never error due to formatting. msg = "Error loading from xml. " + str(err)[:200] log.warning(msg) # Normally, we don't want lots of exception traces in our logs from common # content problems. But if you're debugging the xml loading code itself, # uncomment the next line. log.exception(msg) self.error_tracker(msg) err_msg = msg + "\n" + exc_info_to_str(sys.exc_info()) descriptor = ErrorDescriptor.from_xml(xml, self, self.org, self.course, err_msg) setattr(descriptor, 'data_dir', course_dir) xmlstore.modules[course_id][descriptor.location] = descriptor if hasattr(descriptor, 'children'): for child in descriptor.get_children(): parent_tracker.add_parent(child.location, descriptor.location) return descriptor render_template = lambda: '' # TODO (vshnayder): we are somewhat architecturally confused in the loading code: # load_item should actually be get_instance, because it expects the course-specific # policy to be loaded. For now, just add the course_id here... load_item = lambda location: xmlstore.get_instance(course_id, location) resources_fs = OSFS(xmlstore.data_dir / course_dir) MakoDescriptorSystem.__init__(self, load_item, resources_fs, error_tracker, render_template, **kwargs) XMLParsingSystem.__init__(self, load_item, resources_fs, error_tracker, process_xml, policy, **kwargs)
def __init__(self, xmlstore, course_id, course_dir, policy, error_tracker, parent_tracker, load_error_modules=True, **kwargs): """ A class that handles loading from xml. Does some munging to ensure that all elements have unique slugs. xmlstore: the XMLModuleStore to store the loaded modules in """ self.unnamed = defaultdict(int) # category -> num of new url_names for that category self.used_names = defaultdict(set) # category -> set of used url_names self.org, self.course, self.url_name = course_id.split('/') # cdodge: adding the course_id as passed in for later reference rather than having to recomine the org/course/url_name self.course_id = course_id self.load_error_modules = load_error_modules def process_xml(xml): """Takes an xml string, and returns a XModuleDescriptor created from that xml. """ def make_name_unique(xml_data): """ Make sure that the url_name of xml_data is unique. If a previously loaded unnamed descriptor stole this element's url_name, create a new one. Removes 'slug' attribute if present, and adds or overwrites the 'url_name' attribute. """ # VS[compat]. Take this out once course conversion is done (perhaps leave the uniqueness check) # tags that really need unique names--they store (or should store) state. need_uniq_names = ('problem', 'sequential', 'video', 'course', 'chapter', 'videosequence', 'poll_question', 'timelimit') attr = xml_data.attrib tag = xml_data.tag id = lambda x: x # Things to try to get a name, in order (key, cleaning function, remove key after reading?) lookups = [('url_name', id, False), ('slug', id, True), ('name', Location.clean, False), ('display_name', Location.clean, False)] url_name = None for key, clean, remove in lookups: if key in attr: url_name = clean(attr[key]) if remove: del attr[key] break def looks_like_fallback(url_name): """Does this look like something that came from fallback_name()?""" return (url_name is not None and url_name.startswith(tag) and re.search('[0-9a-fA-F]{12}$', url_name)) def fallback_name(orig_name=None): """Return the fallback name for this module. This is a function instead of a variable because we want it to be lazy.""" if looks_like_fallback(orig_name): # We're about to re-hash, in case something changed, so get rid of the tag_ and hash orig_name = orig_name[len(tag) + 1:-12] # append the hash of the content--the first 12 bytes should be plenty. orig_name = "_" + orig_name if orig_name not in (None, "") else "" xml_bytes = xml.encode('utf8') return tag + orig_name + "_" + hashlib.sha1(xml_bytes).hexdigest()[:12] # Fallback if there was nothing we could use: if url_name is None or url_name == "": url_name = fallback_name() # Don't log a warning--we don't need this in the log. Do # put it in the error tracker--content folks need to see it. if tag in need_uniq_names: error_tracker("PROBLEM: no name of any kind specified for {tag}. Student " "state will not be properly tracked for this module. Problem xml:" " '{xml}...'".format(tag=tag, xml=xml[:100])) else: # TODO (vshnayder): We may want to enable this once course repos are cleaned up. # (or we may want to give up on the requirement for non-state-relevant issues...) # error_tracker("WARNING: no name specified for module. xml='{0}...'".format(xml[:100])) pass # Make sure everything is unique if url_name in self.used_names[tag]: # Always complain about modules that store state. If it # doesn't store state, don't complain about things that are # hashed. if tag in need_uniq_names: msg = ("Non-unique url_name in xml. This may break state tracking for content." " url_name={0}. Content={1}".format(url_name, xml[:100])) error_tracker("PROBLEM: " + msg) log.warning(msg) # Just set name to fallback_name--if there are multiple things with the same fallback name, # they are actually identical, so it's fragile, but not immediately broken. # TODO (vshnayder): if the tag is a pointer tag, this will # break the content because we won't have the right link. # That's also a legitimate attempt to reuse the same content # from multiple places. Once we actually allow that, we'll # need to update this to complain about non-unique names for # definitions, but allow multiple uses. url_name = fallback_name(url_name) self.used_names[tag].add(url_name) xml_data.set('url_name', url_name) try: # VS[compat] # TODO (cpennington): Remove this once all fall 2012 courses # have been imported into the cms from xml xml = clean_out_mako_templating(xml) xml_data = etree.fromstring(xml) make_name_unique(xml_data) descriptor = XModuleDescriptor.load_from_xml( etree.tostring(xml_data, encoding='unicode'), self, self.org, self.course, xmlstore.default_class) except Exception as err: if not self.load_error_modules: raise # Didn't load properly. Fall back on loading as an error # descriptor. This should never error due to formatting. msg = "Error loading from xml. " + str(err)[:200] log.warning(msg) # Normally, we don't want lots of exception traces in our logs from common # content problems. But if you're debugging the xml loading code itself, # uncomment the next line. log.exception(msg) self.error_tracker(msg) err_msg = msg + "\n" + exc_info_to_str(sys.exc_info()) descriptor = ErrorDescriptor.from_xml( xml, self, self.org, self.course, err_msg ) setattr(descriptor, 'data_dir', course_dir) xmlstore.modules[course_id][descriptor.location] = descriptor if hasattr(descriptor, 'children'): for child in descriptor.get_children(): parent_tracker.add_parent(child.location, descriptor.location) return descriptor render_template = lambda: '' # TODO (vshnayder): we are somewhat architecturally confused in the loading code: # load_item should actually be get_instance, because it expects the course-specific # policy to be loaded. For now, just add the course_id here... load_item = lambda location: xmlstore.get_instance(course_id, location) resources_fs = OSFS(xmlstore.data_dir / course_dir) MakoDescriptorSystem.__init__(self, load_item, resources_fs, error_tracker, render_template, **kwargs) XMLParsingSystem.__init__(self, load_item, resources_fs, error_tracker, process_xml, policy, **kwargs)