def _associated_units_by_type_cursor(unit_type_id, criteria, associated_unit_ids): """ Retrieve a pymongo cursor for units associated with a repository of a give unit type that meet to the provided criteria. :type unit_type_id: str :type criteria: UnitAssociationCriteria :type associated_unit_ids: list :rtype: pymongo.cursor.Cursor """ collection = types_db.type_units_collection(unit_type_id) spec = criteria.unit_filters.copy() spec['_id'] = {'$in': associated_unit_ids} fields = copy.copy(criteria.unit_fields) # The _content_type_id is required for looking up the association. if fields is not None and '_content_type_id' not in fields: fields.append('_content_type_id') cursor = collection.find(spec, fields=fields) sort = criteria.unit_sort if sort is None: unit_key = types_db.type_units_unit_key(unit_type_id) if unit_key is not None: sort = [(u, SORT_ASCENDING) for u in unit_key] if sort is not None: cursor.sort(sort) return cursor
def _build_multi_keys_spec(content_type, unit_keys_dicts): """ Build a mongo db spec document for a query on the given content_type collection out of multiple content unit key dictionaries. @param content_type: unique id of the content type collection @type content_type: str @param unit_key_dict: list of key dictionaries whose key, value pairs can be used as unique identifiers for a single content unit @return: mongo db spec document for locating documents in a collection @rtype: dict @raise: ValueError if any of the key dictionaries do not match the unique fields of the collection """ # NOTE this is just about the coolest mongo db query construction method # you'll find in this entire code base. Not only is it correct in the sense # that it builds a spec doc that will find at most 1 content unit per keys # dictionary passed in, but it does duplicate value elimination and key # validation on every single key and value found in every keys dictionary. # The spec document returned allows us to find multiple documents in a # content type collection with only a single query to the database. # I will buy a meal (including drinks if wanted) for the first person that # explains to me why the returned spec document is correct. Here's a hint: # explain why the spec document finds at most one document per keys dict and # explain when the spec will fail to find a document for an arbitrary keys # dict. # keys dicts validation constants key_fields = [] _flatten_keys(key_fields, content_types_db.type_units_unit_key(content_type)) key_fields_set = set(key_fields) extra_keys_msg = _('keys dictionary found with superfluous keys %(a)s, valid keys are %(b)s') missing_keys_msg = _('keys dictionary missing keys %(a)s, required keys are %(b)s') keys_errors = [] # spec document valid keys and valid values, used as template to generate # actual spec document for mongo db queries spec_template = dict([(f, set()) for f in key_fields]) for keys_dict in unit_keys_dicts: # keys dict validation keys_dict_set = set(keys_dict) extra_keys = keys_dict_set.difference(key_fields_set) if extra_keys: keys_errors.append(extra_keys_msg % {'a': ','.join(extra_keys), 'b': ','.join(key_fields)}) missing_keys = key_fields_set.difference(keys_dict_set) if missing_keys: keys_errors.append(missing_keys_msg % {'a': ','.join(missing_keys), 'b': ','.join(key_fields)}) if extra_keys or missing_keys: continue # validation passed, store the keys and values in the template for k, v in keys_dict.items(): spec_template[k].add(v) if keys_errors: value_error_msg = '\n'.join(keys_errors) raise ValueError(value_error_msg) spec = dict([(k, {'$in': list(v)}) for k, v in spec_template.items()]) return spec
def test_type_units_unique_indexes_missing_def(self): """ Tests no error is raised when requesting the indexes on a type that does not exist. """ # Test indexes = types_db.type_units_unit_key('not_there') # Verify self.assertTrue(indexes is None)
def test_type_units_unit_key(self): """ Tests the syntactic sugar method for retrieving unit key on a type. """ # Setup type_def = TypeDefinition('rpm', 'RPM', 'RPM Packages', ['unique_1', 'unique_2'], ['name'], []) types_db._create_or_update_type(type_def) # Test unit_key = types_db.type_units_unit_key('rpm') # Verify self.assertEqual(type_def.unit_key, unit_key)
def test_type_units_unit_key(self): """ Tests the syntactic sugar method for retrieving unit key on a type. """ # Setup type_def = TypeDefinition("rpm", "RPM", "RPM Packages", ["unique_1", "unique_2"], ["name"], []) types_db._create_or_update_type(type_def) # Test unit_key = types_db.type_units_unit_key("rpm") # Verify self.assertEqual(type_def.unit_key, unit_key)
def _build_multi_keys_spec(content_type, unit_keys_dicts): """ Build a mongo db spec document for a query on the given content_type collection out of multiple content unit key dictionaries. :param content_type: unique id of the content type collection :type content_type: str :param unit_keys_dicts: list of key dictionaries whose key, value pairs can be used as unique identifiers for a single content unit :type unit_keys_dicts: list of dict :return: mongo db spec document for locating documents in a collection :rtype: dict :raises ValueError: if any of the key dictionaries do not match the unique fields of the collection """ # keys dicts validation constants key_fields = [] _flatten_keys(key_fields, content_types_db.type_units_unit_key(content_type)) key_fields_set = set(key_fields) extra_keys_msg = _( 'keys dictionary found with superfluous keys %(a)s, valid keys are %(b)s' ) missing_keys_msg = _( 'keys dictionary missing keys %(a)s, required keys are %(b)s') keys_errors = [] # Validate all of the keys in the unit_keys_dict for keys_dict in unit_keys_dicts: # keys dict validation keys_dict_set = set(keys_dict) extra_keys = keys_dict_set.difference(key_fields_set) if extra_keys: keys_errors.append(extra_keys_msg % { 'a': ','.join(extra_keys), 'b': ','.join(key_fields) }) missing_keys = key_fields_set.difference(keys_dict_set) if missing_keys: keys_errors.append(missing_keys_msg % { 'a': ','.join(missing_keys), 'b': ','.join(key_fields) }) if keys_errors: value_error_msg = '\n'.join(keys_errors) raise ValueError(value_error_msg) # Build the spec spec = {'$or': unit_keys_dicts} return spec
def get_content_unit_keys(self, content_type, unit_ids): """ Return the keys and values that will uniquely identify the content units that match the given unique ids. @param content_type: unique id of content collection @type content_type: str @param unit_ids: list of unique content unit ids @type unit_ids: list of str's @return: two tuples of the same length, one of ids the second of key dicts the same index in each tuple corresponds to a single content unit @rtype: tuple of (possibly empty) tuples """ key_fields = content_types_db.type_units_unit_key(content_type) if key_fields is None: raise InvalidValue(["content_type"]) all_fields = ["_id"] _flatten_keys(all_fields, key_fields) collection = content_types_db.type_units_collection(content_type) cursor = collection.find({"_id": {"$in": unit_ids}}, fields=all_fields) dicts = tuple(dict(d) for d in cursor) ids = tuple(d.pop("_id") for d in dicts) return (ids, dicts)
def get_content_unit_keys(self, content_type, unit_ids): """ Return the keys and values that will uniquely identify the content units that match the given unique ids. @param content_type: unique id of content collection @type content_type: str @param unit_ids: list of unique content unit ids @type unit_ids: list of str's @return: two tuples of the same length, one of ids the second of key dicts the same index in each tuple corresponds to a single content unit @rtype: tuple of (possibly empty) tuples """ key_fields = content_types_db.type_units_unit_key(content_type) if key_fields is None: raise InvalidValue(['content_type']) all_fields = ['_id'] _flatten_keys(all_fields, key_fields) collection = content_types_db.type_units_collection(content_type) cursor = collection.find({'_id': {'$in': unit_ids}}, fields=all_fields) dicts = tuple(dict(d) for d in cursor) ids = tuple(d.pop('_id') for d in dicts) return (ids, dicts)
def _build_multi_keys_spec(content_type, unit_keys_dicts): """ Build a mongo db spec document for a query on the given content_type collection out of multiple content unit key dictionaries. :param content_type: unique id of the content type collection :type content_type: str :param unit_keys_dicts: list of key dictionaries whose key, value pairs can be used as unique identifiers for a single content unit :type unit_keys_dicts: list of dict :return: mongo db spec document for locating documents in a collection :rtype: dict :raises ValueError: if any of the key dictionaries do not match the unique fields of the collection """ # keys dicts validation constants key_fields = [] _flatten_keys(key_fields, content_types_db.type_units_unit_key(content_type)) key_fields_set = set(key_fields) extra_keys_msg = _('keys dictionary found with superfluous keys %(a)s, valid keys are %(b)s') missing_keys_msg = _('keys dictionary missing keys %(a)s, required keys are %(b)s') keys_errors = [] # Validate all of the keys in the unit_keys_dict for keys_dict in unit_keys_dicts: # keys dict validation keys_dict_set = set(keys_dict) extra_keys = keys_dict_set.difference(key_fields_set) if extra_keys: keys_errors.append(extra_keys_msg % {'a': ','.join(extra_keys), 'b': ','.join(key_fields)}) missing_keys = key_fields_set.difference(keys_dict_set) if missing_keys: keys_errors.append(missing_keys_msg % {'a': ','.join(missing_keys), 'b': ','.join(key_fields)}) if keys_errors: value_error_msg = '\n'.join(keys_errors) raise ValueError(value_error_msg) # Build the spec spec = {'$or': unit_keys_dicts} return spec
def _associated_units_by_type_cursor(unit_type_id, criteria, associated_unit_ids): """ Retrieve a pymongo cursor for units associated with a repository of a give unit type that meet to the provided criteria. :type unit_type_id: str :type criteria: UnitAssociationCriteria :type associated_unit_ids: list :rtype: pymongo.cursor.Cursor """ collection = types_db.type_units_collection(unit_type_id) spec = criteria.unit_filters.copy() spec['_id'] = {'$in': associated_unit_ids} fields = criteria.unit_fields # The _content_type_id is required for looking up the association. if fields is not None and '_content_type_id' not in fields: fields = list(fields) fields.append('_content_type_id') cursor = collection.find(spec, fields=fields) sort = criteria.unit_sort if sort is None: unit_key = types_db.type_units_unit_key(unit_type_id) if unit_key is not None: sort = [(u, SORT_ASCENDING) for u in unit_key] if sort is not None: cursor.sort(sort) return cursor
def get_units_by_type(self, repo_id, type_id, criteria=None): """ Retrieves data describing units of the given type associated with the given repository. Information on the associations themselves is also provided. The sort fields may be from either the association data OR the unit fields. A mix of both is not supported. Multiple sort fields are supported as long as they come from the same area. If a sort is not provided, the units will be sorted ascending by each value in the unit key for the given type. @param repo_id: identifies the repository @type repo_id: str @param type_id: limits returned units to the given type @type type_id: str @param criteria: if specified will drive the query @type criteria: L{UnitAssociationCriteria} """ # For simplicity, create a criteria if one is not provided and use its defaults if criteria is None: criteria = UnitAssociationCriteria() # -- association collection lookup ------------------------------------ spec = {'repo_id' : repo_id, 'unit_type_id' : type_id} # Strip out the type ID and repo fields if they were accidentally specified in the criteria association_spec = criteria.association_filters association_spec.pop('unit_type_id', None) association_spec.pop('repo_id', None) # Merge in the given association filters spec.update(association_spec) cursor = RepoContentUnit.get_collection().find(spec, fields=criteria.association_fields) # If the sort clause applies to the association metadata, we # apply the limit and skips here as well. If the sort is not # provided, it will be defaulted at the unit type level. association_sorted = False # flag so we can know this later association_sort = criteria.association_sort if association_sort is not None: association_sorted = True cursor.sort(association_sort) if criteria.limit is not None: cursor.limit(criteria.limit) if criteria.skip is not None: cursor.skip(criteria.skip) unit_associations = list(cursor) # -- remove multiple associations ------------------------------------- if criteria.remove_duplicates: unit_associations = self._remove_duplicate_associations(unit_associations) # -- unit lookups ----------------------------------------------------- # If the sorting was not done on association fields, we do it here. If # specified, we can use those fields. If not, we default to the unit key. type_collection = types_db.type_units_collection(type_id) unit_spec = criteria.unit_filters # Depending on where the sort occurs, the algorithm proceeds in # drastically different ways. Both of these absolutely must be stress # tested individually and we need to make sure QE knows the role of # the sort in determining which code branch is followed. if association_sorted: # The units are already sorted, so we have to maintain the order in # the units list. We also haven't applied the unit filters to the # list yet, so we're not guaranteed that everything in unit_associations # is going to be part of the result. # The first step is to figure out which associations actually match the # unit filters. This only applies if there is unit filtering. if len(unit_spec) > 0: association_unit_ids = [u['unit_id'] for u in unit_associations] unit_id_spec = copy.copy(unit_spec) unit_id_spec['_id'] = {'$in' : association_unit_ids} matching_unit_id_cursor = type_collection.find(unit_id_spec, fields=['_id']) matching_unit_ids = [u['_id'] for u in matching_unit_id_cursor] # unpack mongo format # Remove all associations didn't match the units after the filter was applied unit_associations = [u for u in unit_associations if u['unit_id'] in matching_unit_ids] # Batch look up all of the units. This seems like it'd be rough on memory, but since # we have to ultimately return all of this data to the caller, it's going to end up there # anyway. all_unit_ids = [u['unit_id'] for u in unit_associations] spec = {'_id' : {'$in' : all_unit_ids}} all_metadata = type_collection.find(spec, fields=criteria.unit_fields) # Convert to dict by unit_id for simple lookup metadata_by_id = dict([(u['_id'], u) for u in all_metadata]) def merge_metadata(association): association['metadata'] = metadata_by_id[association['unit_id']] map(merge_metadata, unit_associations) return unit_associations else: # Sorting will be done in the units collection. Since the type is # consistent, we can rely on the unit's _id for uniqueness. That # means we can transform the associations into a simple dict lookup # by _id when we need to merge in the association data. # Restructure the associations by ID so we can look them up later and # so we have a list of all unit IDs to pass as a filter. associations_by_id = dict([(u['unit_id'], u) for u in unit_associations]) # We only want to return units with an association, so add in all of # the unit IDs we found earlier. unit_spec['_id'] = {'$in' : associations_by_id.keys()} cursor = type_collection.find(unit_spec, fields=criteria.unit_fields) # Determine what our sort criteria will look like if criteria.unit_sort is None: # Default the sort to the unit key unit_key_fields = types_db.type_units_unit_key(type_id) sort_spec = [(u, SORT_ASCENDING) for u in unit_key_fields] cursor.sort(sort_spec) else: cursor.sort(criteria.unit_sort) # Since the sorting is done here, this is the only place we can # apply the limit/skip. if criteria.limit is not None: cursor.limit(criteria.limit) if criteria.skip is not None: cursor.skip(criteria.skip) # This will load all of the units and they will be filtered, # limited, and sorted. units = list(cursor) # Now we just need to merge in the association data merged_units = [] for u in units: association = associations_by_id[u['_id']] association['metadata'] = u merged_units.append(association) return merged_units
def get_units_by_type(self, repo_id, type_id, criteria=None): """ Retrieves data describing units of the given type associated with the given repository. Information on the associations themselves is also provided. The sort fields may be from either the association data OR the unit fields. A mix of both is not supported. Multiple sort fields are supported as long as they come from the same area. If a sort is not provided, the units will be sorted ascending by each value in the unit key for the given type. @param repo_id: identifies the repository @type repo_id: str @param type_id: limits returned units to the given type @type type_id: str @param criteria: if specified will drive the query @type criteria: L{UnitAssociationCriteria} """ # For simplicity, create a criteria if one is not provided and use its defaults if criteria is None: criteria = UnitAssociationCriteria() # -- association collection lookup ------------------------------------ spec = {'repo_id': repo_id, 'unit_type_id': type_id} # Strip out the type ID and repo fields if they were accidentally specified in the criteria association_spec = criteria.association_filters association_spec.pop('unit_type_id', None) association_spec.pop('repo_id', None) # Merge in the given association filters spec.update(association_spec) cursor = RepoContentUnit.get_collection().find( spec, fields=criteria.association_fields) # If the sort clause applies to the association metadata, we # apply the limit and skips here as well. If the sort is not # provided, it will be defaulted at the unit type level. association_sorted = False # flag so we can know this later association_sort = criteria.association_sort if association_sort is not None: association_sorted = True cursor.sort(association_sort) if criteria.limit is not None: cursor.limit(criteria.limit) if criteria.skip is not None: cursor.skip(criteria.skip) unit_associations = list(cursor) # -- remove multiple associations ------------------------------------- if criteria.remove_duplicates: unit_associations = self._remove_duplicate_associations( unit_associations) # -- unit lookups ----------------------------------------------------- # If the sorting was not done on association fields, we do it here. If # specified, we can use those fields. If not, we default to the unit key. type_collection = types_db.type_units_collection(type_id) unit_spec = criteria.unit_filters # Depending on where the sort occurs, the algorithm proceeds in # drastically different ways. Both of these absolutely must be stress # tested individually and we need to make sure QE knows the role of # the sort in determining which code branch is followed. if association_sorted: # The units are already sorted, so we have to maintain the order in # the units list. We also haven't applied the unit filters to the # list yet, so we're not guaranteed that everything in unit_associations # is going to be part of the result. # The first step is to figure out which associations actually match the # unit filters. This only applies if there is unit filtering. if len(unit_spec) > 0: association_unit_ids = [ u['unit_id'] for u in unit_associations ] unit_id_spec = copy.copy(unit_spec) unit_id_spec['_id'] = {'$in': association_unit_ids} matching_unit_id_cursor = type_collection.find(unit_id_spec, fields=['_id']) matching_unit_ids = [ u['_id'] for u in matching_unit_id_cursor ] # unpack mongo format # Remove all associations didn't match the units after the filter was applied unit_associations = [ u for u in unit_associations if u['unit_id'] in matching_unit_ids ] # Batch look up all of the units. This seems like it'd be rough on memory, but since # we have to ultimately return all of this data to the caller, it's going to end up there # anyway. all_unit_ids = [u['unit_id'] for u in unit_associations] spec = {'_id': {'$in': all_unit_ids}} all_metadata = type_collection.find(spec, fields=criteria.unit_fields) # Convert to dict by unit_id for simple lookup metadata_by_id = dict([(u['_id'], u) for u in all_metadata]) def merge_metadata(association): association['metadata'] = metadata_by_id[ association['unit_id']] map(merge_metadata, unit_associations) return unit_associations else: # Sorting will be done in the units collection. Since the type is # consistent, we can rely on the unit's _id for uniqueness. That # means we can transform the associations into a simple dict lookup # by _id when we need to merge in the association data. # Restructure the associations by ID so we can look them up later and # so we have a list of all unit IDs to pass as a filter. associations_by_id = dict([(u['unit_id'], u) for u in unit_associations]) # We only want to return units with an association, so add in all of # the unit IDs we found earlier. unit_spec['_id'] = {'$in': associations_by_id.keys()} cursor = type_collection.find(unit_spec, fields=criteria.unit_fields) # Determine what our sort criteria will look like if criteria.unit_sort is None: # Default the sort to the unit key unit_key_fields = types_db.type_units_unit_key(type_id) sort_spec = [(u, SORT_ASCENDING) for u in unit_key_fields] cursor.sort(sort_spec) else: cursor.sort(criteria.unit_sort) # Since the sorting is done here, this is the only place we can # apply the limit/skip. if criteria.limit is not None: cursor.limit(criteria.limit) if criteria.skip is not None: cursor.skip(criteria.skip) # This will load all of the units and they will be filtered, # limited, and sorted. units = list(cursor) # Now we just need to merge in the association data merged_units = [] for u in units: association = associations_by_id[u['_id']] association['metadata'] = u merged_units.append(association) return merged_units
def _build_multi_keys_spec(content_type, unit_keys_dicts): """ Build a mongo db spec document for a query on the given content_type collection out of multiple content unit key dictionaries. @param content_type: unique id of the content type collection @type content_type: str @param unit_key_dict: list of key dictionaries whose key, value pairs can be used as unique identifiers for a single content unit @return: mongo db spec document for locating documents in a collection @rtype: dict @raise: ValueError if any of the key dictionaries do not match the unique fields of the collection """ # NOTE this is just about the coolest mongo db query construction method # you'll find in this entire code base. Not only is it correct in the sense # that it builds a spec doc that will find at most 1 content unit per keys # dictionary passed in, but it does duplicate value elimination and key # validation on every single key and value found in every keys dictionary. # The spec document returned allows us to find multiple documents in a # content type collection with only a single query to the database. # I will buy a meal (including drinks if wanted) for the first person that # explains to me why the returned spec document is correct. Here's a hint: # explain why the spec document finds at most one document per keys dict and # explain when the spec will fail to find a document for an arbitrary keys # dict. # keys dicts validation constants key_fields = [] _flatten_keys(key_fields, content_types_db.type_units_unit_key(content_type)) key_fields_set = set(key_fields) extra_keys_msg = _( 'keys dictionary found with superfluous keys %(a)s, valid keys are %(b)s' ) missing_keys_msg = _( 'keys dictionary missing keys %(a)s, required keys are %(b)s') keys_errors = [] # spec document valid keys and valid values, used as template to generate # actual spec document for mongo db queries spec_template = dict([(f, set()) for f in key_fields]) for keys_dict in unit_keys_dicts: # keys dict validation keys_dict_set = set(keys_dict) extra_keys = keys_dict_set.difference(key_fields_set) if extra_keys: keys_errors.append(extra_keys_msg % { 'a': ','.join(extra_keys), 'b': ','.join(key_fields) }) missing_keys = key_fields_set.difference(keys_dict_set) if missing_keys: keys_errors.append(missing_keys_msg % { 'a': ','.join(missing_keys), 'b': ','.join(key_fields) }) if extra_keys or missing_keys: continue # validation passed, store the keys and values in the template for k, v in keys_dict.items(): spec_template[k].add(v) if keys_errors: value_error_msg = '\n'.join(keys_errors) raise ValueError(value_error_msg) spec = dict([(k, {'$in': list(v)}) for k, v in spec_template.items()]) return spec