def _query_children_for_cache_children(self, course_key, items): # first get non-draft in a round-trip to_process_non_drafts = super(DraftModuleStore, self)._query_children_for_cache_children(course_key, items) to_process_dict = {} for non_draft in to_process_non_drafts: to_process_dict[Location._from_deprecated_son(non_draft["_id"], course_key.run)] = non_draft if self.get_branch_setting() == ModuleStoreEnum.Branch.draft_preferred: # now query all draft content in another round-trip query = [] for item in items: item_usage_key = course_key.make_usage_key_from_deprecated_string(item) if item_usage_key.category not in DIRECT_ONLY_CATEGORIES: query.append(as_draft(item_usage_key).to_deprecated_son()) if query: query = {'_id': {'$in': query}} to_process_drafts = list(self.collection.find(query)) # now we have to go through all drafts and replace the non-draft # with the draft. This is because the semantics of the DraftStore is to # always return the draft - if available for draft in to_process_drafts: draft_loc = Location._from_deprecated_son(draft["_id"], course_key.run) draft_as_non_draft_loc = as_published(draft_loc) # does non-draft exist in the collection # if so, replace it if draft_as_non_draft_loc in to_process_dict: to_process_dict[draft_as_non_draft_loc] = draft # convert the dict - which is used for look ups - back into a list queried_children = to_process_dict.values() return queried_children
def _query_children_for_cache_children(self, course_key, items): # first get non-draft in a round-trip to_process_non_drafts = super(DraftModuleStore, self)._query_children_for_cache_children(course_key, items) to_process_dict = {} for non_draft in to_process_non_drafts: to_process_dict[Location._from_deprecated_son(non_draft["_id"], course_key.run)] = non_draft # now query all draft content in another round-trip query = { '_id': {'$in': [ as_draft(course_key.make_usage_key_from_deprecated_string(item)).to_deprecated_son() for item in items ]} } to_process_drafts = list(self.collection.find(query)) # now we have to go through all drafts and replace the non-draft # with the draft. This is because the semantics of the DraftStore is to # always return the draft - if available for draft in to_process_drafts: draft_loc = Location._from_deprecated_son(draft["_id"], course_key.run) draft_as_non_draft_loc = draft_loc.replace(revision=None) # does non-draft exist in the collection # if so, replace it if draft_as_non_draft_loc in to_process_dict: to_process_dict[draft_as_non_draft_loc] = draft # convert the dict - which is used for look ups - back into a list queried_children = to_process_dict.values() return queried_children
def _query_children_for_cache_children(self, course_key, items): # first get non-draft in a round-trip to_process_non_drafts = super(DraftModuleStore, self)._query_children_for_cache_children(course_key, items) to_process_dict = {} for non_draft in to_process_non_drafts: to_process_dict[Location._from_deprecated_son(non_draft["_id"], course_key.run)] = non_draft if self.branch_setting_func() == ModuleStoreEnum.Branch.draft_preferred: # now query all draft content in another round-trip query = [] for item in items: item_usage_key = course_key.make_usage_key_from_deprecated_string(item) if item_usage_key.category not in DIRECT_ONLY_CATEGORIES: query.append(as_draft(item_usage_key).to_deprecated_son()) if query: query = {'_id': {'$in': query}} to_process_drafts = list(self.collection.find(query)) # now we have to go through all drafts and replace the non-draft # with the draft. This is because the semantics of the DraftStore is to # always return the draft - if available for draft in to_process_drafts: draft_loc = Location._from_deprecated_son(draft["_id"], course_key.run) draft_as_non_draft_loc = as_published(draft_loc) # does non-draft exist in the collection # if so, replace it if draft_as_non_draft_loc in to_process_dict: to_process_dict[draft_as_non_draft_loc] = draft # convert the dict - which is used for look ups - back into a list queried_children = to_process_dict.values() return queried_children
def get_parent_location(self, location, revision=ModuleStoreEnum.RevisionOption.published_only, **kwargs): ''' Find the location that is the parent of this location in this course. Returns: version agnostic location (revision always None) as per the rest of mongo. Args: revision: ModuleStoreEnum.RevisionOption.published_only - return only the PUBLISHED parent if it exists, else returns None ModuleStoreEnum.RevisionOption.draft_preferred - return either the DRAFT or PUBLISHED parent, preferring DRAFT, if parent(s) exists, else returns None ''' assert location.revision is None assert revision == ModuleStoreEnum.RevisionOption.published_only \ or revision == ModuleStoreEnum.RevisionOption.draft_preferred # create a query with tag, org, course, and the children field set to the given location query = self._course_key_to_son(location.course_key) query['definition.children'] = location.to_deprecated_string() # if only looking for the PUBLISHED parent, set the revision in the query to None if revision == ModuleStoreEnum.RevisionOption.published_only: query['_id.revision'] = MongoRevisionKey.published # query the collection, sorting by DRAFT first parents = self.collection.find(query, {'_id': True}, sort=[SORT_REVISION_FAVOR_DRAFT]) if parents.count() == 0: # no parents were found return None if revision == ModuleStoreEnum.RevisionOption.published_only: if parents.count() > 1: # should never have multiple PUBLISHED parents raise ReferentialIntegrityError( u"{} parents claim {}".format(parents.count(), location) ) else: # return the single PUBLISHED parent return Location._from_deprecated_son(parents[0]['_id'], location.course_key.run) else: # there could be 2 different parents if # (1) the draft item was moved or # (2) the parent itself has 2 versions: DRAFT and PUBLISHED # since we sorted by SORT_REVISION_FAVOR_DRAFT, the 0'th parent is the one we want found_id = parents[0]['_id'] # don't disclose revision outside modulestore return as_published(Location._from_deprecated_son(found_id, location.course_key.run))
def _get_raw_parent_locations(self, location, key_revision): """ Get the parents but don't unset the revision in their locations. Intended for internal use but not restricted. Args: location (UsageKey): assumes the location's revision is None; so, uses revision keyword solely key_revision: MongoRevisionKey.draft - return only the draft parent MongoRevisionKey.published - return only the published parent ModuleStoreEnum.RevisionOption.all - return both draft and published parents """ _verify_revision_is_published(location) # create a query to find all items in the course that have the given location listed as a child query = self._course_key_to_son(location.course_key) query['definition.children'] = location.to_deprecated_string() # find all the items that satisfy the query parents = self.collection.find(query, {'_id': True}, sort=[SORT_REVISION_FAVOR_DRAFT]) # return only the parent(s) that satisfy the request return [ Location._from_deprecated_son(parent['_id'], location.course_key.run) for parent in parents if ( # return all versions of the parent if revision is ModuleStoreEnum.RevisionOption.all key_revision == ModuleStoreEnum.RevisionOption.all or # return this parent if it's direct-only, regardless of which revision is requested parent['_id']['category'] in DIRECT_ONLY_CATEGORIES or # return this parent only if its revision matches the requested one parent['_id']['revision'] == key_revision) ]
def _cache_children(self, course_key, items, depth=0): """ Returns a dictionary mapping Location -> item data, populated with json data for all descendents of items up to the specified depth. (0 = no descendents, 1 = children, 2 = grandchildren, etc) If depth is None, will load all the children. This will make a number of queries that is linear in the depth. """ data = {} to_process = list(items) while to_process and depth is None or depth >= 0: children = [] for item in to_process: self._clean_item_data(item) children.extend(item.get('definition', {}).get('children', [])) data[Location._from_deprecated_son(item['location'], course_key.run)] = item if depth == 0: break # Load all children by id. See # http://www.mongodb.org/display/DOCS/Advanced+Queries#AdvancedQueries-%24or # for or-query syntax to_process = [] if children: to_process = self._query_children_for_cache_children(course_key, children) # If depth is None, then we just recurse until we hit all the descendents if depth is not None: depth -= 1 return data
def _get_raw_parent_locations(self, location, key_revision): """ Get the parents but don't unset the revision in their locations. Intended for internal use but not restricted. Args: location (UsageKey): assumes the location's revision is None; so, uses revision keyword solely key_revision: MongoRevisionKey.draft - return only the draft parent MongoRevisionKey.published - return only the published parent ModuleStoreEnum.RevisionOption.all - return both draft and published parents """ _verify_revision_is_published(location) # create a query to find all items in the course that have the given location listed as a child query = self._course_key_to_son(location.course_key) query['definition.children'] = location.to_deprecated_string() # find all the items that satisfy the query parents = self.collection.find(query, {'_id': True}, sort=[SORT_REVISION_FAVOR_DRAFT]) # return only the parent(s) that satisfy the request return [ Location._from_deprecated_son(parent['_id'], location.course_key.run) for parent in parents if ( # return all versions of the parent if revision is ModuleStoreEnum.RevisionOption.all key_revision == ModuleStoreEnum.RevisionOption.all or # return this parent if it's direct-only, regardless of which revision is requested parent['_id']['category'] in DIRECT_ONLY_CATEGORIES or # return this parent only if its revision matches the requested one parent['_id']['revision'] == key_revision ) ]
def _load_item(self, course_key, item, data_cache, apply_cached_metadata=True): """ Load an XModuleDescriptor from item, using the children stored in data_cache """ location = Location._from_deprecated_son(item['location'], course_key.run) data_dir = getattr(item, 'data_dir', location.course) root = self.fs_root / data_dir root.makedirs_p() # create directory if it doesn't exist resource_fs = OSFS(root) cached_metadata = {} if apply_cached_metadata: cached_metadata = self._get_cached_metadata_inheritance_tree(course_key) services = {} if self.i18n_service: services["i18n"] = self.i18n_service system = CachingDescriptorSystem( modulestore=self, course_key=course_key, module_data=data_cache, default_class=self.default_class, resources_fs=resource_fs, error_tracker=self.error_tracker, render_template=self.render_template, cached_metadata=cached_metadata, mixins=self.xblock_mixins, select=self.xblock_select, services=services, ) return system.load_item(location)
def get_courses_for_wiki(self, wiki_slug): """ Return the list of courses which use this wiki_slug :param wiki_slug: the course wiki root slug :return: list of course locations """ courses = self.collection.find({'_id.category': 'course', 'definition.data.wiki_slug': wiki_slug}) # the course's run == its name. It's the only xblock for which that's necessarily true. return [Location._from_deprecated_son(course['_id'], course['_id']['name']) for course in courses]
def get_courses_for_wiki(self, wiki_slug): """ Return the list of courses which use this wiki_slug :param wiki_slug: the course wiki root slug :return: list of course locations """ courses = self.collection.find({'definition.data.wiki_slug': wiki_slug}) # the course's run == its name. It's the only xblock for which that's necessarily true. return [Location._from_deprecated_son(course['_id'], course['_id']['name']) for course in courses]
def get_orphans(self, course_key): """ Return an array of all of the locations (deprecated string format) for orphans in the course. """ course_key = self.fill_in_run(course_key) detached_categories = [name for name, __ in XBlock.load_tagged_classes("detached")] query = self._course_key_to_son(course_key) query["_id.category"] = {"$nin": detached_categories} all_items = self.collection.find(query) all_reachable = set() item_locs = set() for item in all_items: if item["_id"]["category"] != "course": # It would be nice to change this method to return UsageKeys instead of the deprecated string. item_locs.add( as_published(Location._from_deprecated_son(item["_id"], course_key.run)).to_deprecated_string() ) all_reachable = all_reachable.union(item.get("definition", {}).get("children", []))
def get_orphans(self, course_key): """ Return an array all of the locations (deprecated string format) for orphans in the course. """ detached_categories = [name for name, __ in XBlock.load_tagged_classes("detached")] query = self._course_key_to_son(course_key) query['_id.category'] = {'$nin': detached_categories} all_items = self.collection.find(query) all_reachable = set() item_locs = set() for item in all_items: if item['_id']['category'] != 'course': # It would be nice to change this method to return UsageKeys instead of the deprecated string. item_locs.add( Location._from_deprecated_son(item['_id'], course_key.run).replace(revision=None).to_deprecated_string() ) all_reachable = all_reachable.union(item.get('definition', {}).get('children', [])) item_locs -= all_reachable return list(item_locs)
def _compute_metadata_inheritance_tree(self, course_id): ''' TODO (cdodge) This method can be deleted when the 'split module store' work has been completed ''' # get all collections in the course, this query should not return any leaf nodes # note this is a bit ugly as when we add new categories of containers, we have to add it here block_types_with_children = set( name for name, class_ in XBlock.load_classes() if getattr(class_, 'has_children', False) ) query = SON([ ('_id.tag', 'i4x'), ('_id.org', course_id.org), ('_id.course', course_id.course), ('_id.category', {'$in': list(block_types_with_children)}) ]) # we just want the Location, children, and inheritable metadata record_filter = {'_id': 1, 'definition.children': 1} # just get the inheritable metadata since that is all we need for the computation # this minimizes both data pushed over the wire for field_name in InheritanceMixin.fields: record_filter['metadata.{0}'.format(field_name)] = 1 # call out to the DB resultset = self.collection.find(query, record_filter) # it's ok to keep these as deprecated strings b/c the overall cache is indexed by course_key and this # is a dictionary relative to that course results_by_url = {} root = None # now go through the results and order them by the location url for result in resultset: # manually pick it apart b/c the db has tag and we want revision = None regardless location = Location._from_deprecated_son(result['_id'], course_id.run).replace(revision=None) location_url = location.to_deprecated_string() if location_url in results_by_url: # found either draft or live to complement the other revision existing_children = results_by_url[location_url].get('definition', {}).get('children', []) additional_children = result.get('definition', {}).get('children', []) total_children = existing_children + additional_children results_by_url[location_url].setdefault('definition', {})['children'] = total_children results_by_url[location_url] = result if location.category == 'course': root = location_url # now traverse the tree and compute down the inherited metadata metadata_to_inherit = {} def _compute_inherited_metadata(url): """ Helper method for computing inherited metadata for a specific location url """ my_metadata = results_by_url[url].get('metadata', {}) # go through all the children and recurse, but only if we have # in the result set. Remember results will not contain leaf nodes for child in results_by_url[url].get('definition', {}).get('children', []): if child in results_by_url: new_child_metadata = copy.deepcopy(my_metadata) new_child_metadata.update(results_by_url[child].get('metadata', {})) results_by_url[child]['metadata'] = new_child_metadata metadata_to_inherit[child] = new_child_metadata _compute_inherited_metadata(child) else: # this is likely a leaf node, so let's record what metadata we need to inherit metadata_to_inherit[child] = my_metadata if root is not None: _compute_inherited_metadata(root) return metadata_to_inherit
query["_id.revision"] = MongoRevisionKey.published # query the collection, sorting by DRAFT first parents = self.collection.find(query, {"_id": True}, sort=[SORT_REVISION_FAVOR_DRAFT]) if parents.count() == 0: # no parents were found return None if revision == ModuleStoreEnum.RevisionOption.published_only: if parents.count() > 1: # should never have multiple PUBLISHED parents raise ReferentialIntegrityError(u"{} parents claim {}".format(parents.count(), location)) else: # return the single PUBLISHED parent return Location._from_deprecated_son(parents[0]["_id"], location.course_key.run) else: # there could be 2 different parents if # (1) the draft item was moved or # (2) the parent itself has 2 versions: DRAFT and PUBLISHED # since we sorted by SORT_REVISION_FAVOR_DRAFT, the 0'th parent is the one we want found_id = parents[0]["_id"] # don't disclose revision outside modulestore return Location._from_deprecated_son(found_id, location.course_key.run) def get_parent_location(self, location, revision=ModuleStoreEnum.RevisionOption.published_only, **kwargs): """ Find the location that is the parent of this location in this course. Returns: version agnostic location (revision always None) as per the rest of mongo.