def __init__( self, store, user_id, data_dir, source_dirs=None, default_class='xmodule.raw_module.RawDescriptor', load_error_modules=True, static_content_store=None, target_id=None, verbose=False, do_import_static=True, create_if_not_present=False, raise_on_failure=False ): self.store = store self.user_id = user_id self.data_dir = data_dir self.source_dirs = source_dirs self.load_error_modules = load_error_modules self.static_content_store = static_content_store self.target_id = target_id self.verbose = verbose self.do_import_static = do_import_static self.create_if_not_present = create_if_not_present self.raise_on_failure = raise_on_failure self.xml_module_store = self.store_class( data_dir, default_class=default_class, source_dirs=source_dirs, load_error_modules=load_error_modules, xblock_mixins=store.xblock_mixins, xblock_select=store.xblock_select, target_course_id=target_id, ) self.logger, self.errors = make_error_tracker()
def try_load_course(self, course_dir, course_ids=None): """ Load a course, keeping track of errors as we go along. If course_ids is not None, then reject the course unless it's id is in course_ids. """ # Special-case code here, since we don't have a location for the # course before it loads. # So, make a tracker to track load-time errors, then put in the right # place after the course loads and we have its location errorlog = make_error_tracker() course_descriptor = None try: course_descriptor = self.load_course(course_dir, course_ids, errorlog.tracker) except Exception as exc: # pylint: disable=broad-except msg = "ERROR: Failed to load course '{0}': {1}".format(course_dir.encode("utf-8"), unicode(exc)) log.exception(msg) errorlog.tracker(msg) self.errored_courses[course_dir] = errorlog if course_descriptor is None: pass elif isinstance(course_descriptor, ErrorDescriptor): # Didn't load course. Instead, save the errors elsewhere. self.errored_courses[course_dir] = errorlog else: self.courses[course_dir] = course_descriptor self._course_errors[course_descriptor.id] = errorlog self.parent_trackers[course_descriptor.id].make_known(course_descriptor.scope_ids.usage_id)
def try_load_course(self, course_dir): ''' Load a course, keeping track of errors as we go along. ''' # Special-case code here, since we don't have a location for the # course before it loads. # So, make a tracker to track load-time errors, then put in the right # place after the course loads and we have its location errorlog = make_error_tracker() course_descriptor = None try: course_descriptor = self.load_course(course_dir, errorlog.tracker) except Exception as e: msg = "ERROR: Failed to load course '{0}': {1}".format(course_dir.encode("utf-8"), unicode(e)) log.exception(msg) errorlog.tracker(msg) if course_descriptor is not None and not isinstance(course_descriptor, ErrorDescriptor): self.courses[course_dir] = course_descriptor self._location_errors[course_descriptor.location] = errorlog self.parent_trackers[course_descriptor.id].make_known(course_descriptor.location) else: # Didn't load course. Instead, save the errors elsewhere. self.errored_courses[course_dir] = errorlog
def _get_errorlog(self, location): """ If we already have an errorlog for this location, return it. Otherwise, create one. """ location = Location(location) if location not in self._location_errors: self._location_errors[location] = make_error_tracker() return self._location_errors[location]
def import_course_draft( xml_module_store, store, draft_store, course_data_path, static_content_store, source_location_namespace, target_location_namespace): ''' This will import all the content inside of the 'drafts' folder, if it exists NOTE: This is not a full course import, basically in our current application only verticals (and downwards) can be in draft. Therefore, we need to use slightly different call points into the import process_xml as we can't simply call XMLModuleStore() constructor (like we do for importing public content) ''' draft_dir = course_data_path + "/drafts" if not os.path.exists(draft_dir): return # create a new 'System' object which will manage the importing errorlog = make_error_tracker() # The course_dir as passed to ImportSystem is expected to just be relative, not # the complete path including data_dir. ImportSystem will concatenate the two together. data_dir = xml_module_store.data_dir # Whether or not data_dir ends with a "/" differs in production vs. test. if not data_dir.endswith("/"): data_dir += "/" draft_course_dir = draft_dir.replace(data_dir, '', 1) system = ImportSystem( xmlstore=xml_module_store, course_id=target_location_namespace.course_id, course_dir=draft_course_dir, error_tracker=errorlog.tracker, parent_tracker=ParentTracker(), load_error_modules=False, mixins=xml_module_store.xblock_mixins, field_data=KvsFieldData(kvs=DictKeyValueStore()), ) # now walk the /vertical directory where each file in there # will be a draft copy of the Vertical # First it is necessary to order the draft items by their desired index in the child list # (order os.walk returns them in is not guaranteed). drafts = dict() for dirname, _dirnames, filenames in os.walk(draft_dir + "/vertical"): for filename in filenames: module_path = os.path.join(dirname, filename) with open(module_path, 'r') as f: try: # note, on local dev it seems like OSX will put # some extra files in the directory with "quarantine" # information. These files are binary files and will # throw exceptions when we try to parse the file # as an XML string. Let's make sure we're # dealing with a string before ingesting data = f.read() try: xml = data.decode('utf-8') except UnicodeDecodeError, err: # seems like on OSX localdev, the OS is making # quarantine files in the unzip directory # when importing courses so if we blindly try to # enumerate through the directory, we'll try # to process a bunch of binary quarantine files # (which are prefixed with a '._' character which # will dump a bunch of exceptions to the output, # although they are harmless. # # Reading online docs there doesn't seem to be # a good means to detect a 'hidden' file that works # well across all OS environments. So for now, I'm using # OSX's utilization of a leading '.' in the filename # to indicate a system hidden file. # # Better yet would be a way to figure out if this is # a binary file, but I haven't found a good way # to do this yet. if filename.startswith('._'): continue # Not a 'hidden file', then re-raise exception raise err descriptor = system.process_xml(xml) # HACK: since we are doing partial imports of drafts # the vertical doesn't have the 'url-name' set in the # attributes (they are normally in the parent object, # aka sequential), so we have to replace the location.name # with the XML filename that is part of the pack fn, fileExtension = os.path.splitext(filename) descriptor.location = descriptor.location.replace(name=fn) index = int(descriptor.xml_attributes['index_in_children_list']) if index in drafts: drafts[index].append(descriptor) else: drafts[index] = [descriptor] except Exception, e: logging.exception('There was an error. {err}'.format( err=unicode(e) ))
def _import_course_draft( xml_module_store, store, user_id, course_data_path, source_course_id, target_course_id, mongo_runtime ): ''' This will import all the content inside of the 'drafts' folder, if it exists NOTE: This is not a full course import, basically in our current application only verticals (and downwards) can be in draft. Therefore, we need to use slightly different call points into the import process_xml as we can't simply call XMLModuleStore() constructor (like we do for importing public content) ''' draft_dir = course_data_path + "/drafts" if not os.path.exists(draft_dir): return # create a new 'System' object which will manage the importing errorlog = make_error_tracker() # The course_dir as passed to ImportSystem is expected to just be relative, not # the complete path including data_dir. ImportSystem will concatenate the two together. data_dir = xml_module_store.data_dir # Whether or not data_dir ends with a "/" differs in production vs. test. if not data_dir.endswith("/"): data_dir += "/" draft_course_dir = draft_dir.replace(data_dir, '', 1) system = ImportSystem( xmlstore=xml_module_store, course_id=source_course_id, course_dir=draft_course_dir, error_tracker=errorlog.tracker, parent_tracker=ParentTracker(), load_error_modules=False, mixins=xml_module_store.xblock_mixins, field_data=KvsFieldData(kvs=DictKeyValueStore()), ) def _import_module(module): # IMPORTANT: Be sure to update the module location in the NEW namespace module_location = module.location.map_into_course(target_course_id) # Update the module's location to DRAFT revision # We need to call this method (instead of updating the location directly) # to ensure that pure XBlock field data is updated correctly. _update_module_location(module, module_location.replace(revision=MongoRevisionKey.draft)) parent_url = get_parent_url(module) index = index_in_children_list(module) # make sure our parent has us in its list of children # this is to make sure private only modules show up # in the list of children since they would have been # filtered out from the non-draft store export. if parent_url is not None and index is not None: course_key = descriptor.location.course_key parent_location = course_key.make_usage_key_from_deprecated_string(parent_url) # IMPORTANT: Be sure to update the parent in the NEW namespace parent_location = parent_location.map_into_course(target_course_id) parent = store.get_item(parent_location, depth=0) non_draft_location = module.location.map_into_course(target_course_id) if not any(child.block_id == module.location.block_id for child in parent.children): parent.children.insert(index, non_draft_location) store.update_item(parent, user_id) _import_module_and_update_references( module, store, user_id, source_course_id, target_course_id, runtime=mongo_runtime, ) for child in module.get_children(): _import_module(child) # now walk the /vertical directory where each file in there # will be a draft copy of the Vertical # First it is necessary to order the draft items by their desired index in the child list # (order os.walk returns them in is not guaranteed). drafts = [] for dirname, _dirnames, filenames in os.walk(draft_dir): for filename in filenames: module_path = os.path.join(dirname, filename) with open(module_path, 'r') as f: try: # note, on local dev it seems like OSX will put # some extra files in the directory with "quarantine" # information. These files are binary files and will # throw exceptions when we try to parse the file # as an XML string. Let's make sure we're # dealing with a string before ingesting data = f.read() try: xml = data.decode('utf-8') except UnicodeDecodeError, err: # seems like on OSX localdev, the OS is making # quarantine files in the unzip directory # when importing courses so if we blindly try to # enumerate through the directory, we'll try # to process a bunch of binary quarantine files # (which are prefixed with a '._' character which # will dump a bunch of exceptions to the output, # although they are harmless. # # Reading online docs there doesn't seem to be # a good means to detect a 'hidden' file that works # well across all OS environments. So for now, I'm using # OSX's utilization of a leading '.' in the filename # to indicate a system hidden file. # # Better yet would be a way to figure out if this is # a binary file, but I haven't found a good way # to do this yet. if filename.startswith('._'): continue # Not a 'hidden file', then re-raise exception raise err # process_xml call below recursively processes all descendants. If # we call this on all verticals in a course with verticals nested below # the unit level, we try to import the same content twice, causing naming conflicts. # Therefore only process verticals at the unit level, assuming that any other # verticals must be descendants. if 'index_in_children_list' in xml: descriptor = system.process_xml(xml) # HACK: since we are doing partial imports of drafts # the vertical doesn't have the 'url-name' set in the # attributes (they are normally in the parent object, # aka sequential), so we have to replace the location.name # with the XML filename that is part of the pack filename, __ = os.path.splitext(filename) descriptor.location = descriptor.location.replace(name=filename) index = index_in_children_list(descriptor) parent_url = get_parent_url(descriptor, xml) draft_url = descriptor.location.to_deprecated_string() draft = draft_node_constructor( module=descriptor, url=draft_url, parent_url=parent_url, index=index ) drafts.append(draft) except Exception: # pylint: disable=broad-except logging.exception('Error while parsing course xml.')
def _import_course_draft( xml_module_store, store, user_id, course_data_path, source_course_id, target_id, mongo_runtime ): """ This method will import all the content inside of the 'drafts' folder, if content exists. NOTE: This is not a full course import! In our current application, only verticals (and blocks beneath) can be in draft. Therefore, different call points into the import process_xml are used as the XMLModuleStore() constructor cannot simply be called (as is done for importing public content). """ draft_dir = course_data_path + "/drafts" if not os.path.exists(draft_dir): return # create a new 'System' object which will manage the importing errorlog = make_error_tracker() # The course_dir as passed to ImportSystem is expected to just be relative, not # the complete path including data_dir. ImportSystem will concatenate the two together. data_dir = xml_module_store.data_dir # Whether or not data_dir ends with a "/" differs in production vs. test. if not data_dir.endswith("/"): data_dir += "/" # Remove absolute path, leaving relative <course_name>/drafts. draft_course_dir = draft_dir.replace(data_dir, '', 1) system = ImportSystem( xmlstore=xml_module_store, course_id=source_course_id, course_dir=draft_course_dir, error_tracker=errorlog.tracker, load_error_modules=False, mixins=xml_module_store.xblock_mixins, field_data=KvsFieldData(kvs=DictKeyValueStore()), target_course_id=target_id, ) def _import_module(module): # IMPORTANT: Be sure to update the module location in the NEW namespace module_location = module.location.map_into_course(target_id) # Update the module's location to DRAFT revision # We need to call this method (instead of updating the location directly) # to ensure that pure XBlock field data is updated correctly. _update_module_location(module, module_location.replace(revision=MongoRevisionKey.draft)) parent_url = get_parent_url(module) index = index_in_children_list(module) # make sure our parent has us in its list of children # this is to make sure private only modules show up # in the list of children since they would have been # filtered out from the non-draft store export. if parent_url is not None and index is not None: course_key = descriptor.location.course_key parent_location = course_key.make_usage_key_from_deprecated_string(parent_url) # IMPORTANT: Be sure to update the parent in the NEW namespace parent_location = parent_location.map_into_course(target_id) parent = store.get_item(parent_location, depth=0) non_draft_location = module.location.map_into_course(target_id) if not any(child.block_id == module.location.block_id for child in parent.children): parent.children.insert(index, non_draft_location) store.update_item(parent, user_id) _update_and_import_module( module, store, user_id, source_course_id, target_id, runtime=mongo_runtime, ) for child in module.get_children(): _import_module(child) # Now walk the /drafts directory. # Each file in the directory will be a draft copy of the vertical. # First it is necessary to order the draft items by their desired index in the child list, # since the order in which os.walk() returns the files is not guaranteed. drafts = [] for rootdir, __, filenames in os.walk(draft_dir): for filename in filenames: if filename.startswith('._'): # Skip any OSX quarantine files, prefixed with a '._'. continue module_path = os.path.join(rootdir, filename) with open(module_path, 'r') as f: try: xml = f.read().decode('utf-8') # The process_xml() call below recursively processes all descendants. If # we call this on all verticals in a course with verticals nested below # the unit level, we try to import the same content twice, causing naming conflicts. # Therefore only process verticals at the unit level, assuming that any other # verticals must be descendants. if 'index_in_children_list' in xml: descriptor = system.process_xml(xml) # HACK: since we are doing partial imports of drafts # the vertical doesn't have the 'url-name' set in the # attributes (they are normally in the parent object, # aka sequential), so we have to replace the location.name # with the XML filename that is part of the pack filename, __ = os.path.splitext(filename) descriptor.location = descriptor.location.replace(name=filename) index = index_in_children_list(descriptor) parent_url = get_parent_url(descriptor, xml) draft_url = unicode(descriptor.location) draft = draft_node_constructor( module=descriptor, url=draft_url, parent_url=parent_url, index=index ) drafts.append(draft) except Exception: # pylint: disable=broad-except logging.exception('Error while parsing course drafts xml.') # Sort drafts by `index_in_children_list` attribute. drafts.sort(key=lambda x: x.index) for draft in get_draft_subtree_roots(drafts): try: _import_module(draft.module) except Exception: # pylint: disable=broad-except logging.exception('while importing draft descriptor %s', draft.module)
def import_course_draft(xml_module_store, store, draft_store, course_data_path, static_content_store, target_location_namespace): ''' This will import all the content inside of the 'drafts' folder, if it exists NOTE: This is not a full course import, basically in our current application only verticals (and downwards) can be in draft. Therefore, we need to use slightly different call points into the import process_xml as we can't simply call XMLModuleStore() constructor (like we do for importing public content) ''' draft_dir = course_data_path + "/drafts" if not os.path.exists(draft_dir): return # create a new 'System' object which will manage the importing errorlog = make_error_tracker() system = ImportSystem( xml_module_store, target_location_namespace.course_id, draft_dir, {}, errorlog.tracker, ParentTracker(), None, ) # now walk the /vertical directory where each file in there will be a draft copy of the Vertical for dirname, dirnames, filenames in os.walk(draft_dir + "/vertical"): for filename in filenames: module_path = os.path.join(dirname, filename) with open(module_path) as f: try: xml = f.read().decode('utf-8') descriptor = system.process_xml(xml) def _import_module(module): module.location = module.location._replace(revision='draft') # make sure our parent has us in its list of children # this is to make sure private only verticals show up in the list of children since # they would have been filtered out from the non-draft store export if module.location.category == 'vertical': module.location = module.location._replace(revision=None) sequential_url = module.xml_attributes['parent_sequential_url'] index = int(module.xml_attributes['index_in_children_list']) seq_location = Location(sequential_url) # IMPORTANT: Be sure to update the sequential in the NEW namespace seq_location = seq_location._replace(org=target_location_namespace.org, course=target_location_namespace.course ) sequential = store.get_item(seq_location) if module.location.url() not in sequential.children: sequential.children.insert(index, module.location.url()) store.update_children(sequential.location, sequential.children) del module.xml_attributes['parent_sequential_url'] del module.xml_attributes['index_in_children_list'] import_module(module, draft_store, course_data_path, static_content_store, allow_not_found=True) for child in module.get_children(): _import_module(child) # HACK: since we are doing partial imports of drafts # the vertical doesn't have the 'url-name' set in the attributes (they are normally in the parent # object, aka sequential), so we have to replace the location.name with the XML filename # that is part of the pack fn, fileExtension = os.path.splitext(filename) descriptor.location = descriptor.location._replace(name=fn) _import_module(descriptor) except Exception, e: logging.exception('There was an error. {0}'.format(unicode(e))) pass
def import_course_draft(xml_module_store, store, draft_store, course_data_path, static_content_store, source_location_namespace, target_location_namespace): ''' This will import all the content inside of the 'drafts' folder, if it exists NOTE: This is not a full course import, basically in our current application only verticals (and downwards) can be in draft. Therefore, we need to use slightly different call points into the import process_xml as we can't simply call XMLModuleStore() constructor (like we do for importing public content) ''' draft_dir = course_data_path + "/drafts" if not os.path.exists(draft_dir): return # create a new 'System' object which will manage the importing errorlog = make_error_tracker() system = ImportSystem( xmlstore=xml_module_store, course_id=target_location_namespace.course_id, course_dir=draft_dir, policy={}, error_tracker=errorlog.tracker, parent_tracker=ParentTracker(), load_error_modules=False, ) # now walk the /vertical directory where each file in there will be a draft copy of the Vertical for dirname, dirnames, filenames in os.walk(draft_dir + "/vertical"): for filename in filenames: module_path = os.path.join(dirname, filename) with open(module_path, 'r') as f: try: # note, on local dev it seems like OSX will put some extra files in # the directory with "quarantine" information. These files are # binary files and will throw exceptions when we try to parse # the file as an XML string. Let's make sure we're # dealing with a string before ingesting data = f.read() try: xml = data.decode('utf-8') except UnicodeDecodeError, err: # seems like on OSX localdev, the OS is making quarantine files # in the unzip directory when importing courses # so if we blindly try to enumerate through the directory, we'll try # to process a bunch of binary quarantine files (which are prefixed with a '._' character # which will dump a bunch of exceptions to the output, although they are harmless. # # Reading online docs there doesn't seem to be a good means to detect a 'hidden' # file that works well across all OS environments. So for now, I'm using # OSX's utilization of a leading '.' in the filename to indicate a system hidden # file. # # Better yet would be a way to figure out if this is a binary file, but I # haven't found a good way to do this yet. # if filename.startswith('._'): continue # Not a 'hidden file', then re-raise exception raise err descriptor = system.process_xml(xml) def _import_module(module): module.location = module.location._replace(revision='draft') # make sure our parent has us in its list of children # this is to make sure private only verticals show up in the list of children since # they would have been filtered out from the non-draft store export if module.location.category == 'vertical': non_draft_location = module.location._replace(revision=None) sequential_url = module.xml_attributes['parent_sequential_url'] index = int(module.xml_attributes['index_in_children_list']) seq_location = Location(sequential_url) # IMPORTANT: Be sure to update the sequential in the NEW namespace seq_location = seq_location._replace(org=target_location_namespace.org, course=target_location_namespace.course ) sequential = store.get_item(seq_location, depth=0) if non_draft_location.url() not in sequential.children: sequential.children.insert(index, non_draft_location.url()) store.update_children(sequential.location, sequential.children) import_module(module, draft_store, course_data_path, static_content_store, source_location_namespace, target_location_namespace, allow_not_found=True) for child in module.get_children(): _import_module(child) # HACK: since we are doing partial imports of drafts # the vertical doesn't have the 'url-name' set in the attributes (they are normally in the parent # object, aka sequential), so we have to replace the location.name with the XML filename # that is part of the pack fn, fileExtension = os.path.splitext(filename) descriptor.location = descriptor.location._replace(name=fn) _import_module(descriptor) except Exception, e: logging.exception('There was an error. {0}'.format(unicode(e))) pass
def import_course_draft(xml_module_store, store, draft_store, course_data_path, static_content_store, target_location_namespace): ''' This will import all the content inside of the 'drafts' folder, if it exists NOTE: This is not a full course import, basically in our current application only verticals (and downwards) can be in draft. Therefore, we need to use slightly different call points into the import process_xml as we can't simply call XMLModuleStore() constructor (like we do for importing public content) ''' draft_dir = course_data_path + "/drafts" if not os.path.exists(draft_dir): return # create a new 'System' object which will manage the importing errorlog = make_error_tracker() system = ImportSystem( xml_module_store, target_location_namespace.course_id, draft_dir, {}, errorlog.tracker, ParentTracker(), None, ) # now walk the /vertical directory where each file in there will be a draft copy of the Vertical for dirname, dirnames, filenames in os.walk(draft_dir + "/vertical"): for filename in filenames: module_path = os.path.join(dirname, filename) with open(module_path) as f: try: xml = f.read().decode('utf-8') descriptor = system.process_xml(xml) def _import_module(module): module.location = module.location._replace( revision='draft') # make sure our parent has us in its list of children # this is to make sure private only verticals show up in the list of children since # they would have been filtered out from the non-draft store export if module.location.category == 'vertical': module.location = module.location._replace( revision=None) sequential_url = module.xml_attributes[ 'parent_sequential_url'] index = int( module.xml_attributes['index_in_children_list'] ) seq_location = Location(sequential_url) # IMPORTANT: Be sure to update the sequential in the NEW namespace seq_location = seq_location._replace( org=target_location_namespace.org, course=target_location_namespace.course) sequential = store.get_item(seq_location) if module.location.url( ) not in sequential.children: sequential.children.insert( index, module.location.url()) store.update_children(sequential.location, sequential.children) del module.xml_attributes['parent_sequential_url'] del module.xml_attributes['index_in_children_list'] import_module(module, draft_store, course_data_path, static_content_store, allow_not_found=True) for child in module.get_children(): _import_module(child) # HACK: since we are doing partial imports of drafts # the vertical doesn't have the 'url-name' set in the attributes (they are normally in the parent # object, aka sequential), so we have to replace the location.name with the XML filename # that is part of the pack fn, fileExtension = os.path.splitext(filename) descriptor.location = descriptor.location._replace(name=fn) _import_module(descriptor) except Exception, e: logging.exception('There was an error. {0}'.format( unicode(e))) pass