def get_unit_generator(self): """ Returns a generator of Named Tuples containing the original unit and the processing method that will be used to process that particular unit. """ # set the process unit method to categories criteria = UnitAssociationCriteria(type_ids=[TYPE_ID_PKG_CATEGORY]) category_generator = self.get_conduit().get_units(criteria, as_generator=True) UnitProcessor = namedtuple('UnitProcessor', 'unit process') for category in category_generator: yield UnitProcessor(category, self.comps_context.add_package_category_unit_metadata) # set the process unit method to groups criteria = UnitAssociationCriteria(type_ids=[TYPE_ID_PKG_GROUP]) groups_generator = self.get_conduit().get_units(criteria, as_generator=True) for group in groups_generator: yield UnitProcessor(group, self.comps_context.add_package_group_unit_metadata) # set the process unit method to environments criteria = UnitAssociationCriteria(type_ids=[TYPE_ID_PKG_ENVIRONMENT]) groups_generator = self.get_conduit().get_units(criteria, as_generator=True) for group in groups_generator: yield UnitProcessor(group, self.comps_context.add_package_environment_unit_metadata)
def _purge_unlinked_manifests(repo, manifest_list): # Find manifest digests referenced by removed manifest lists (orphaned) orphaned = set() for image_man in manifest_list.manifests: orphaned.add(image_man) if manifest_list.amd64_digest: orphaned.add(manifest_list.amd64_digest) if not orphaned: # nothing orphaned return # Find manifest digests still referenced by other manifest lists (adopted) adopted = set() criteria = UnitAssociationCriteria( type_ids=[constants.MANIFEST_LIST_TYPE_ID], unit_filters={'digest': { '$ne': manifest_list.digest }}) for man_list in unit_association.RepoUnitAssociationManager._units_from_criteria( repo, criteria): for image_man in man_list.manifests: adopted.add(image_man) if man_list.amd64_digest: adopted.add(man_list.amd64_digest) # Remove unreferenced manifests orphaned = orphaned.difference(adopted) if not orphaned: # all adopted return # Check if those manifests have tags, tagged manifests cannot be removed criteria = UnitAssociationCriteria(type_ids=[constants.TAG_TYPE_ID], unit_filters={ 'manifest_digest': { '$in': list(orphaned) }, 'manifest_type': constants.MANIFEST_IMAGE_TYPE }) for tag in unit_association.RepoUnitAssociationManager._units_from_criteria( repo, criteria): orphaned.remove(tag.manifest_digest) unit_filter = {'digest': {'$in': sorted(orphaned)}} criteria = UnitAssociationCriteria( type_ids=[constants.MANIFEST_TYPE_ID], unit_filters=unit_filter) manager = manager_factory.repo_unit_association_manager() manager.unassociate_by_criteria(repo_id=repo.repo_id, criteria=criteria, notify_plugins=False) for manifest in models.Manifest.objects.filter( digest__in=sorted(orphaned)): DockerImporter._purge_unlinked_blobs(repo, manifest)
def test_criteria_str(self): # Setup c1 = UnitAssociationCriteria() c2 = UnitAssociationCriteria(type_ids=['a'], association_filters={'a':'a'}, unit_filters={'b':'b'}, association_sort=['c'], unit_sort=['d'], limit=1, skip=2, association_fields=['e'], unit_fields=['f'], remove_duplicates=True) # Test no exceptions are raised str(c1) str(c2)
def test_get_units_limit(self): # Test low_criteria = UnitAssociationCriteria(limit=2) low_units = self.manager.get_units_across_types('repo-1', low_criteria) high_criteria = UnitAssociationCriteria(limit=10000) high_units = self.manager.get_units_across_types('repo-1', high_criteria) # Verify self.assertEqual(2, len(low_units)) self.assertEqual(self.repo_1_count, len(high_units)) # Make sure the limit was applied to the front of the results self.assertEqual(low_units[0], high_units[0]) self.assertEqual(low_units[1], high_units[1])
def test_resolve_dependencies_by_criteria(self): # Setup report = 'dep report' mock_plugins.MOCK_IMPORTER.resolve_dependencies.return_value = report unit_id_1 = manager_factory.content_manager().add_content_unit( 'type-1', None, {'key-1': 'unit-id-1'}) unit_id_2 = manager_factory.content_manager().add_content_unit( 'type-1', None, {'key-1': 'dep-1'}) association_manager = manager_factory.repo_unit_association_manager() association_manager.associate_unit_by_id(self.repo_id, 'type-1', unit_id_1) association_manager.associate_unit_by_id(self.repo_id, 'type-1', unit_id_2) criteria = UnitAssociationCriteria(type_ids=['type-1'], unit_filters={'key-1': 'unit-id-1'}) # Test result = self.manager.resolve_dependencies_by_criteria( self.repo_id, criteria, {}) # Verify self.assertEqual(report, result) self.assertEqual( 1, mock_plugins.MOCK_IMPORTER.resolve_dependencies.call_count) args = mock_plugins.MOCK_IMPORTER.resolve_dependencies.call_args[0] self.assertEqual(1, len(args[1]))
def test_4(self): """ Scenario: Standard repository size but all associations are duplicated (unlikely in the real world). Criteria set to remove duplicates. Test Properties: - 3000 units, single unit type - 2 associations per unit - Criteria removes duplicates """ if not GetUnitsStressTest.ENABLED: return # Setup repo_id = 'repo-3' for i in range(0, 3000): unit_id = 'unit_%d' % i metadata = {'key_1': unit_id} for j in range(0, 10): metadata['md_%d' % j] = 'value_%d' % i self.content_manager.add_content_unit('alpha', unit_id, metadata) self.association_manager.associate_unit_by_id( repo_id, 'alpha', unit_id, association_manager.OWNER_TYPE_IMPORTER, 'stress-importer') self.association_manager.associate_unit_by_id( repo_id, 'alpha', unit_id, association_manager.OWNER_TYPE_USER, 'admin') # Test criteria = UnitAssociationCriteria(remove_duplicates=True) units = self.manager.get_units_across_types(repo_id, criteria) self.assertEqual(3000, len(units))
def import_units(self, source_repo, dest_repo, import_conduit, config, units=None): """ Import content units into the given repository. This method will be called in a number of different situations: * A user is attempting to copy a content unit from one repository into the repository that uses this importer * A user is attempting to add an orphaned unit into a repository. This call has two options for handling the requested units: * Associate the given units with the destination repository. This will link the repository with the existing unit directly; changes to the unit will be reflected in all repositories that reference it. * Create a new unit and save it to the repository. This would act as a deep copy of sorts, creating a unique unit in the database. Keep in mind that the unit key must change in order for the unit to be considered different than the supplied one. The APIs for both approaches are similar to those in the sync conduit. In the case of a simple association, the init_unit step can be skipped and save_unit simply called on each specified unit. The units argument is optional. If None, all units in the source repository should be imported. The conduit is used to query for those units. If specified, only the units indicated should be imported (this is the case where the caller passed a filter to Pulp). :param source_repo: metadata describing the repository containing the units to import :type source_repo: pulp.plugins.model.Repository :param dest_repo: metadata describing the repository to import units into :type dest_repo: pulp.plugins.model.Repository :param import_conduit: provides access to relevant Pulp functionality :type import_conduit: pulp.plugins.conduits.unit_import.ImportUnitConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration :param units: optional list of pre-filtered units to import :type units: list of pulp.plugins.model.Unit :return: list of Unit instances that were saved to the destination repository :rtype: list """ if units is None: criteria = UnitAssociationCriteria( type_ids=[constants.IMAGE_TYPE_ID]) units = import_conduit.get_source_units(criteria=criteria) for u in units: import_conduit.associate_unit(u) return units
def get_units_by_type(self, repo_id, type_id, criteria=None, as_generator=False): """ Retrieves data describing units of the given type associated with the given repository. Information on the associations themselves is also provided. The sort fields may be from either the association data OR the unit fields. A mix of both is not supported. Multiple sort fields are supported as long as they come from the same area. If a sort is not provided, the units will be sorted ascending by each value in the unit key for the given type. :param repo_id: identifies the repository :type repo_id: str :param type_id: limits returned units to the given type :type type_id: str :param criteria: if specified will drive the query :type criteria: UnitAssociationCriteria :param as_generator: if true, return a generator; if false, a list :type as_generator: bool """ # Get_units now defaults to batch behavior, so use a list of length 1 to # specify the unit types and pass it through. criteria = criteria or UnitAssociationCriteria() # Just overwrite the type_ids if the user was dumb enough to provide # them in this call. criteria.type_ids = [type_id] return self.get_units(repo_id, criteria, as_generator)
def test_get_units_filter_created(self): # Test after_criteria = UnitAssociationCriteria(association_filters={'created' : {'$gt' : self.timestamps[0]}}) after_units = self.manager.get_units_across_types('repo-1', after_criteria) before_criteria = UnitAssociationCriteria(association_filters={'created' : {'$lt' : self.timestamps[1]}}) before_units = self.manager.get_units_across_types('repo-1', before_criteria) # Verify # The first association in each type/owner combination will be timestamps[0], # the second timestamps[1]. There are 4 such type/owner combinations, # however the user associations in gamma have timestamp offsets of i+1. self.assertEqual(self.repo_1_count - 3, len(after_units)) self.assertEqual(3, len(before_units))
def unassociate_all_by_ids(self, repo_id, unit_type_id, unit_id_list, notify_plugins=True): """ Removes the association between a repo and a number of units. Only the association made by the given owner will be removed. It is possible the repo will still have a manually created association will for the unit. @param repo_id: identifies the repo @type repo_id: str @param unit_type_id: identifies the type of units being removed @type unit_type_id: str @param unit_id_list: list of unique identifiers for units within the given type @type unit_id_list: list of str @param notify_plugins: if true, relevant plugins will be informed of the removal @type notify_plugins: bool """ association_filters = {'unit_id': {'$in': unit_id_list}} criteria = UnitAssociationCriteria( type_ids=[unit_type_id], association_filters=association_filters) return self.unassociate_by_criteria(repo_id, criteria, notify_plugins=notify_plugins)
def _purge_unlinked_blobs(repo, manifest): """ Purge blobs associated with the given Manifests when removing it would leave them no longer referenced by any remaining Manifests. :param repo: The affected repository. :type repo: pulp.server.db.model.Repository :param units: List of removed units. :type units: list of: pulp.plugins.model.AssociatedUnit """ # Find blob digests referenced by removed manifests (orphaned) orphaned = set() map((lambda layer: orphaned.add(layer.blob_sum)), manifest.fs_layers) # in manifest schema version 2 there is an additional blob layer called config_layer if manifest.config_layer: orphaned.add(manifest.config_layer) if not orphaned: # nothing orphaned return # Find blob digests still referenced by other manifests (adopted) adopted = set() criteria = UnitAssociationCriteria( type_ids=[constants.MANIFEST_TYPE_ID], unit_filters={'digest': { '$ne': manifest.digest }}) for manifest in unit_association.RepoUnitAssociationManager._units_from_criteria( repo, criteria): map((lambda layer: adopted.add(layer.blob_sum)), manifest.fs_layers) if manifest.config_layer: adopted.add(manifest.config_layer) # Remove unreferenced blobs orphaned = orphaned.difference(adopted) if not orphaned: # all adopted return unit_filter = {'digest': {'$in': sorted(orphaned)}} criteria = UnitAssociationCriteria(type_ids=[constants.BLOB_TYPE_ID], unit_filters=unit_filter) manager = manager_factory.repo_unit_association_manager() manager.unassociate_by_criteria(repo_id=repo.repo_id, criteria=criteria, notify_plugins=False)
def get_repo_units(self, repo_id, content_type_id, additional_unit_fields=None): """ Searches for units in the given repository with given content type and returns a plugin unit containing unit id, unit key and any additional fields requested. :param repo_id: repo id :type repo_id: str :param content_type_id: content type id of the units :type content_type_id: str :param additional_unit_fields: additional fields from the unit metadata to be added in the result :type additional_unit_fields: list of str :return: list of unit instances :rtype: list of pulp.plugins.model.Unit """ additional_unit_fields = additional_unit_fields or [] try: unit_key_fields = units_controller.get_unit_key_fields_for_type( content_type_id) # Query repo association manager to get all units of given type # associated with given repo. Limit data by requesting only the fields # that are needed. query_manager = managers.repo_unit_association_query_manager() unit_fields = list(unit_key_fields) + list(additional_unit_fields) criteria = UnitAssociationCriteria(association_fields=['unit_id'], unit_fields=unit_fields) units = query_manager.get_units_by_type(repo_id, content_type_id, criteria) # Convert units to plugin units with unit_key and required metadata values for each unit all_units = [] for unit in units: unit_key = {} metadata = {} for k in unit_key_fields: unit_key[k] = unit['metadata'].pop(k) # Add unit_id and any additional unit fields requested by plugins metadata['unit_id'] = unit.pop('unit_id') for field in additional_unit_fields: metadata[field] = unit['metadata'].pop(field, None) u = Unit(content_type_id, unit_key, metadata, None) all_units.append(u) return all_units except Exception, e: _logger.exception( _('Exception from server getting units from repo [%s]' % repo_id)) raise self.exception_class(e), None, sys.exc_info()[2]
def test_get_units_by_type_unit_metadata_sort_skip(self): # Test criteria = UnitAssociationCriteria(unit_sort=[('md_2', association_manager.SORT_DESCENDING)], skip=1) units = self.manager.get_units_by_type('repo-1', 'alpha', criteria) # Verify expected_count = len(self.units['alpha']) - 1 # skip the first self.assertEqual(expected_count, len(units))
def test_get_destination_units(self, mock_get): # Test criteria = UnitAssociationCriteria() self.conduit.get_destination_units(criteria=criteria) # Verify the correct propagation to the mixin method mock_get.assert_called_once_with(self.dest_repo_id, criteria, ImporterConduitException)
def test_get_units_by_type_filter_wildcard(self): # Test criteria = UnitAssociationCriteria(unit_filters={'key_1' : {'$regex' : 'aa.*'}}) units = self.manager.get_units_by_type('repo-1', 'alpha', criteria) # Verify self.assertEqual(1, len(units)) self.assertEqual('aardvark', units[0]['metadata']['key_1'])
def test_get_units_filter_owner_type(self): # Test criteria = UnitAssociationCriteria(association_filters={'owner_type' : OWNER_TYPE_IMPORTER}) units = self.manager.get_units_across_types('repo-1', criteria) # Verify expected_count = reduce(lambda x, y: x + len(self.units[y]), ['alpha', 'gamma'], 0) self.assertEqual(expected_count, len(units))
def test_get_units_by_type_remove_duplicates(self): # Test criteria = UnitAssociationCriteria(remove_duplicates=True) units = self.manager.get_units_by_type('repo-1', 'gamma', criteria) # only one association per gamma unit self.assertEqual(len(self.units['gamma']), len(units)) self.assertEqual(units[0]['unit_id'], 'garden')
def _do_import_modules(self, metadata): """ Actual logic of the import. This method will do a best effort per module; if an individual module fails it will be recorded and the import will continue. This method will only raise an exception in an extreme case where it cannot react and continue. """ def unit_key_str(unit_key_dict): """ Converts the unit key dict form into a single string that can be used as the key in a dict lookup. """ template = '%s-%s-%s' return template % (encode_unicode(unit_key_dict['name']), encode_unicode(unit_key_dict['version']), encode_unicode(unit_key_dict['author'])) downloader = self._create_downloader() self.downloader = downloader # Ease lookup of modules modules_by_key = dict([(unit_key_str(m.unit_key()), m) for m in metadata.modules]) # Collect information about the repository's modules before changing it module_criteria = UnitAssociationCriteria( type_ids=[constants.TYPE_PUPPET_MODULE]) existing_units = self.sync_conduit.get_units(criteria=module_criteria) existing_modules = [Module.from_unit(x) for x in existing_units] existing_module_keys = [ unit_key_str(m.unit_key()) for m in existing_modules ] new_unit_keys = self._resolve_new_units(existing_module_keys, modules_by_key.keys()) remove_unit_keys = self._resolve_remove_units(existing_module_keys, modules_by_key.keys()) # Once we know how many things need to be processed, we can update the # progress report self.progress_report.modules_total_count = len(new_unit_keys) self.progress_report.modules_finished_count = 0 self.progress_report.modules_error_count = 0 self.progress_report.update_progress() # Add new units for key in new_unit_keys: if self._canceled: break module = modules_by_key[key] try: self._add_new_module(downloader, module) self.progress_report.modules_finished_count += 1 except Exception, e: self.progress_report.add_failed_module(module, e, sys.exc_info()[2]) self.progress_report.update_progress()
def test_get_units(self): # Setup self.populate() # Test conduit = ProfilerConduit() criteria = UnitAssociationCriteria(type_ids=[self.TYPE_1_DEF.id]) units = conduit.get_units(self.REPO_ID, criteria) # Verify self.assertEquals(len(units), 9)
def test_get_units_by_type_remove_duplicates(self): # Test criteria = UnitAssociationCriteria(remove_duplicates=True) units = self.manager.get_units_by_type('repo-1', 'gamma', criteria) # Verify self.assertEqual(len(self.units['gamma']), len(units)) # only one association per gamma unit for u in units: self.assertEqual(u['owner_type'], association_manager.OWNER_TYPE_USER) # all user associations have earlier created date
def test_get_units_with_fields(self): # Test criteria = UnitAssociationCriteria(association_fields=['created']) units = self.manager.get_units_across_types('repo-1', criteria) # Verify for u in units: self.assertTrue('created' in u) self.assertFalse('updated' in u)
def test_get_units_by_type_association_filter(self): # Test criteria = UnitAssociationCriteria( association_filters={'updated': self.timestamps[1]}) units = self.manager.get_units_by_type('repo-1', 'gamma', criteria) # Verify self.assertEqual(1, len(units))
def test_get_units_by_type_association_sort_limit(self): # Test criteria = UnitAssociationCriteria(association_sort=[('owner_type', association_manager.SORT_DESCENDING)], limit=2) units = self.manager.get_units_by_type('repo-1', 'alpha', criteria) # Verify self.assertEqual(2, len(units)) for i in range(0, len(units) - 1): u1 = units[i] u2 = units[i+1] self.assertTrue(u1['owner_type'] >= u2['owner_type'])
def test_get_units_by_type_unit_metadata_filter(self): # Test criteria = UnitAssociationCriteria(unit_filters={'md_2' : 0}) units = self.manager.get_units_by_type('repo-1', 'alpha', criteria) # Verify expected = math.ceil(float(len(self.units['alpha'])) / float(2)) self.assertEqual(expected, len(units)) for u in units: self.assertEqual(u['metadata']['md_2'], 0)
def test_get_units_by_type_unit_id_filter(self): unit_id = 'dog' unit_filter = {'_id': {'$in': [unit_id]}} # Test criteria = UnitAssociationCriteria(unit_filters=unit_filter) units = self.manager.get_units_by_type('repo-2', 'delta', criteria) # Verify self.assertEqual(len(units), 1) self.assertEqual(units[0]['unit_id'], unit_id)
def _retrieve_repo_modules(self): """ Retrieves all modules in the repository. :return: list of modules in the repository; empty list if there are none :rtype: list of pulp.plugins.model.AssociatedUnit """ criteria = UnitAssociationCriteria( type_ids=[constants.TYPE_PUPPET_MODULE]) all_units = self.publish_conduit.get_units(criteria=criteria) return all_units
def test_get_units_filter_type(self): # Test criteria = UnitAssociationCriteria(type_ids=['alpha', 'beta']) units = self.manager.get_units_across_types('repo-1', criteria) # Verify expected_count = reduce(lambda x, y: x + len(self.units[y]), ['alpha', 'beta'], 0) self.assertEqual(expected_count, len(units)) for u in units: self._assert_unit_integrity(u) self.assertTrue(u['unit_type_id'] in ['alpha', 'beta'])
def test_get_units_sort(self): # Test order_criteria = UnitAssociationCriteria(association_sort=[('owner_type', association_manager.SORT_DESCENDING)]) # owner_type will produce a non-default sort order_units = self.manager.get_units_across_types('repo-1', order_criteria) # Verify self.assertEqual(self.repo_1_count, len(order_units)) for i in range(0, len(order_units) - 1): u1 = order_units[i] u2 = order_units[i+1] self.assertTrue(u1['owner_type'] >= u2['owner_type'])
def test_criteria_unit_fields(self, mock_find): """ Ensure that the criteria unit_fields are passed on to the find_repo_content_units function. """ criteria = UnitAssociationCriteria( unit_fields=['secret_location', 'pasword']) self.manager._units_from_criteria(self.repo, criteria) self.assertEqual(mock_find.call_count, 1) self.assertEqual(mock_find.mock_calls[0][2]['unit_fields'], ['secret_location', 'pasword'])
def test_get_units_by_type_sort_unit_data(self): # Test sort_criteria = UnitAssociationCriteria(unit_sort=[('md_2', association_manager.SORT_DESCENDING)]) sort_units = self.manager.get_units_by_type('repo-1', 'alpha', sort_criteria) # Verify self.assertEqual(len(self.units['alpha']), len(sort_units)) for i in range(0, len(sort_units) - 1): u1 = sort_units[i] u2 = sort_units[i+1] self.assertTrue(u1['metadata']['md_2'] >= u2['metadata']['md_2'])