def _do_import_modules(self, metadata): """ Actual logic of the import. This method will do a best effort per module; if an individual module fails it will be recorded and the import will continue. This method will only raise an exception in an extreme case where it cannot react and continue. """ downloader = self._create_downloader() self.downloader = downloader # Ease module lookup metadata_modules_by_key = dict([(m.unit_key_str, m) for m in metadata.modules]) # Collect information about the repository's modules before changing it existing_module_ids_by_key = {} modules = repo_controller.find_repo_content_units( self.repo.repo_obj, unit_fields=Module.unit_key_fields, yield_content_unit=True) for module in modules: existing_module_ids_by_key[module.unit_key_str] = module.id new_unit_keys = self._resolve_new_units( existing_module_ids_by_key.keys(), metadata_modules_by_key.keys()) # Once we know how many things need to be processed, we can update the progress report self.progress_report.modules_total_count = len(new_unit_keys) self.progress_report.modules_finished_count = 0 self.progress_report.modules_error_count = 0 self.progress_report.update_progress() # Add new units for key in new_unit_keys: if self._canceled: break module = metadata_modules_by_key[key] try: self._add_new_module(downloader, module) self.progress_report.modules_finished_count += 1 except Exception as e: self.progress_report.add_failed_module(module, e, sys.exc_info()[2]) self.progress_report.update_progress() # Remove missing units if the configuration indicates to do so if self._should_remove_missing(): remove_unit_keys = self._resolve_remove_units( existing_module_ids_by_key.keys(), metadata_modules_by_key.keys()) doomed_ids = [ existing_module_ids_by_key[key] for key in remove_unit_keys ] doomed_module_iterator = Module.objects.in_bulk( doomed_ids).itervalues() repo_controller.disassociate_units(self.repo, doomed_module_iterator) self.downloader = None
def remove_repo_duplicate_nevra(repo_id): """ Removes duplicate units that have same NEVRA from a repo, keeping only the most recent unit This function is for bulk operations on an entire repo, such as after syncing a repo. When operating on single units, consider using :py:func:`remove_unit_duplicate_nevra` instead. :param repo_id: ID of the repo from which units with duplicate nevra will be unassociated :type repo_id: str """ for unit_type in (models.RPM, models.SRPM, models.DRPM): for unit_ids in _duplicate_key_id_generator(unit_type): # q objects don't deal with order_by, so they can't be used with repo_controller funcs # disassociate_units only uses the unit_id, so limit the resultset to only that field rcus = model.RepositoryContentUnit.objects.filter( repo_id=repo_id, unit_id__in=unit_ids).order_by('-updated').only('unit_id') # 0 or 1 packages from the duplicate nevra search match this repo means no duplicates if rcus.count() < 2: continue repo = model.Repository.objects.get(repo_id=repo_id) # Since the repo_units queryset is ordered by the updated field (descending), the # first repo content unit is the latest. All other RCUs should be disassociated duplicate_units = (unit_type(id=rcu.unit_id) for rcu in rcus[1:]) repo_controller.disassociate_units(repo, duplicate_units)
def perform_sync(self): """ Perform the sync operation according to the config, and return a report. The sync progress will be reported through the sync_conduit. :return: The sync report :rtype: pulp.plugins.model.SyncReport """ # Get the manifest and download the ISOs that we are missing self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS try: manifest = self._download_manifest() except (IOError, ValueError): # The IOError will happen if the file can't be retrieved at all, and the ValueError will # happen if the PULP_MANIFEST file isn't in the expected format. return self.progress_report.build_final_report() # Discover what files we need to download and what we already have filtered_isos = self._filter_missing_isos(manifest, self.download_deferred) local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos # Associate units that are already in Pulp if local_available_isos: search_dicts = [unit.unit_key for unit in local_available_isos] self.sync_conduit.associate_existing( models.ISO._content_type_id.default, search_dicts) # Deferred downloading (Lazy) entries. self.add_catalog_entries(local_missing_isos) self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS # Download files and add units. if self.download_deferred: for iso in local_missing_isos: iso.downloaded = False try: iso.save() except NotUniqueError: iso = iso.__class__.objects.filter(**iso.unit_key).first() repo_controller.associate_single_unit(self.sync_conduit.repo, iso) else: self._download_isos(local_missing_isos) # Remove unwanted iso units if self._remove_missing_units: repo_controller.disassociate_units(self.sync_conduit.repo, remote_missing_isos) # Report that we are finished. Note that setting the # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the # progress report has collected any errors. See the progress_report's _set_state() method # for the implementation of this logic. self.progress_report.state = self.progress_report.STATE_COMPLETE report = self.progress_report.build_final_report() return report
def perform_sync(self): """ Perform the sync operation according to the config, and return a report. The sync progress will be reported through the sync_conduit. :return: The sync report :rtype: pulp.plugins.model.SyncReport """ # Get the manifest and download the ISOs that we are missing self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS try: manifest = self._download_manifest() except (IOError, ValueError): # The IOError will happen if the file can't be retrieved at all, and the ValueError will # happen if the PULP_MANIFEST file isn't in the expected format. return self.progress_report.build_final_report() # Discover what files we need to download and what we already have filtered_isos = self._filter_missing_isos(manifest, self.download_deferred) local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos # Associate units that are already in Pulp if local_available_isos: search_dicts = [unit.unit_key for unit in local_available_isos] self.sync_conduit.associate_existing(models.ISO._content_type_id.default, search_dicts) # Deferred downloading (Lazy) entries. self.add_catalog_entries(local_available_isos) self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS # Download files and add units. if self.download_deferred: for iso in local_missing_isos: iso.downloaded = False try: iso.save() except NotUniqueError: iso = iso.__class__.objects.filter(**iso.unit_key).first() else: self.add_catalog_entries([iso]) repo_controller.associate_single_unit(self.sync_conduit.repo, iso) else: self._download_isos(local_missing_isos) # Remove unwanted iso units if self._remove_missing_units: repo_controller.disassociate_units(self.sync_conduit.repo, remote_missing_isos) # Report that we are finished. Note that setting the # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the # progress report has collected any errors. See the progress_report's _set_state() method # for the implementation of this logic. self.progress_report.state = self.progress_report.STATE_COMPLETE report = self.progress_report.build_final_report() return report
def migrate(*args, **kwargs): """ For each puppet module check and if needed update module name format. There was a discrepancy in the way puppet module's name was stored in pulp, depending if it was synced from filesystem or uploaded. This migration finds puppet module units that have wrong format name and replaces it with a correct format name. """ modules = Module.objects.filter( Q(name__contains='/') | Q(name__contains='-')) repos_to_rebuild = set() for puppet_unit in modules: try: author, name = puppet_unit['name'].split('-', 1) except ValueError: # This is the forge format, but Puppet still allows it author, name = puppet_unit['name'].split('/', 1) try: puppet_unit.name = name puppet_unit.save() except NotUniqueError: # find all repos that have this unit repos_with_unit = model.RepositoryContentUnit.objects.filter( unit_id=puppet_unit.id) repos_to_rebuild.update(repos_with_unit) # find unit with correct name correct_unit = Module.objects.filter(name=name).first() for repo in repos_with_unit: # unassociate wrong unit repo_controller.disassociate_units(repo, [puppet_unit]) # associate correct unit to the list of the repos repo_controller.associate_single_unit(repo, correct_unit) repo_list = [] for repo in repos_to_rebuild: repo_obj = model.Repository.objects.get_repo_or_missing_resource( repo.repo_id) repo_controller.rebuild_content_unit_counts(repo_obj) repo_list.append(repo.repo_id) repos_to_republish = model.Distributor.objects.filter( repo_id__in=repo_list, last_publish__ne=None) # redirect output to file path = os.path.join('/var/lib/pulp', '0005_puppet_module_name_change.txt') f = open(path, 'w') f.write(str([repo.repo_id for repo in repos_to_republish])) f.close() msg = _( '***Note. You may want to re-publish the list of repos found in %s.\n' ' This migration fixed an issue with modules installation related to wrong ' 'puppet_module name.' % f.name) _log.info(msg)
def _do_import_modules(self, metadata): """ Actual logic of the import. This method will do a best effort per module; if an individual module fails it will be recorded and the import will continue. This method will only raise an exception in an extreme case where it cannot react and continue. """ downloader = self._create_downloader() self.downloader = downloader # Ease module lookup metadata_modules_by_key = dict([(m.unit_key_as_named_tuple, m) for m in metadata.modules]) # Collect information about the repository's modules before changing it existing_module_ids_by_key = {} modules = repo_controller.find_repo_content_units( self.repo.repo_obj, unit_fields=Module.unit_key_fields, yield_content_unit=True) for module in modules: existing_module_ids_by_key[module.unit_key_as_named_tuple] = module.id new_unit_keys = self._resolve_new_units(existing_module_ids_by_key.keys(), metadata_modules_by_key.keys()) # Once we know how many things need to be processed, we can update the progress report self.progress_report.modules_total_count = len(new_unit_keys) self.progress_report.modules_finished_count = 0 self.progress_report.modules_error_count = 0 self.progress_report.update_progress() # Add new units for key in new_unit_keys: if self._canceled: break module = metadata_modules_by_key[key] try: self._add_new_module(downloader, module) self.progress_report.modules_finished_count += 1 except Exception as e: self.progress_report.add_failed_module(module, e, sys.exc_info()[2]) self.progress_report.update_progress() # Remove missing units if the configuration indicates to do so if self._should_remove_missing(): remove_unit_keys = self._resolve_remove_units(existing_module_ids_by_key.keys(), metadata_modules_by_key.keys()) doomed_ids = [existing_module_ids_by_key[key] for key in remove_unit_keys] doomed_module_iterator = Module.objects.in_bulk(doomed_ids).itervalues() repo_controller.disassociate_units(self.repo.repo_obj, doomed_module_iterator) self.downloader = None
def _remove_missing(self, existing_module_ids_by_key, remote_unit_keys): """ Removes units from the local repository if they are missing from the remote repository. :param existing_module_ids_by_key: A dict keyed on Module unit key associated with the current repository. The values are the mongoengine id of the corresponding Module. :type existing_module_ids_by_key: dict of Module.id values keyed on unit_key_str :param remote_unit_keys: A list of all the Module keys in the remote repository :type remote_unit_keys: list of strings """ keys_to_remove = list(set(existing_module_ids_by_key.keys()) - set(remote_unit_keys)) doomed_ids = [existing_module_ids_by_key[key] for key in keys_to_remove] doomed_module_iterator = Module.objects.in_bulk(doomed_ids).itervalues() repo_controller.disassociate_units(self.repo, doomed_module_iterator)
def migrate(*args, **kwargs): """ For each puppet module check and if needed update module name format. There was a discrepancy in the way puppet module's name was stored in pulp, depending if it was synced from filesystem or uploaded. This migration finds puppet module units that have wrong format name and replaces it with a correct format name. """ modules = Module.objects.filter(Q(name__contains="/") | Q(name__contains="-")) repos_to_rebuild = set() for puppet_unit in modules: try: author, name = puppet_unit["name"].split("-", 1) except ValueError: # This is the forge format, but Puppet still allows it author, name = puppet_unit["name"].split("/", 1) try: puppet_unit.name = name puppet_unit.save() except NotUniqueError: # find all repos that have this unit repos_with_unit = model.RepositoryContentUnit.objects.filter(unit_id=puppet_unit.id) repos_to_rebuild.update(repos_with_unit) # find unit with correct name correct_unit = Module.objects.filter(name=name).first() for repo in repos_with_unit: # unassociate wrong unit repo_controller.disassociate_units(repo, [puppet_unit]) # associate correct unit to the list of the repos repo_controller.associate_single_unit(repo, correct_unit) repo_list = [] for repo in repos_to_rebuild: repo_obj = model.Repository.objects.get_repo_or_missing_resource(repo.repo_id) repo_controller.rebuild_content_unit_counts(repo_obj) repo_list.append(repo.repo_id) repos_to_republish = model.Distributor.objects.filter(repo_id__in=repo_list, last_publish__ne=None) # redirect output to file path = os.path.join("/var/lib/pulp", "0005_puppet_module_name_change.txt") f = open(path, "w") f.write(str([repo.repo_id for repo in repos_to_republish])) f.close() msg = _( "***Note. You may want to re-publish the list of repos found in %s.\n" " This migration fixed an issue with modules installation related to wrong " "puppet_module name." % f.name ) _log.info(msg)
def _associate_unit(self, repo, unit): """ Associate an iso unit with a repository but first check if there's already any with the same name and if so, remove them. :param repo: An ISO repository that is being synced :type repo: pulp.server.db.model.Repository :param unit: An ISO unit to associate with repo :type unit: pulp_rpm.plugins.db.models.ISO """ if not self.repo_units: # store the existing repo units to prevent querying mongo multiple times self.repo_units = repo_controller.find_repo_content_units(repo, yield_content_unit=True) units_to_remove = [iso for iso in self.repo_units if iso['name'] == unit['name']] repo_controller.disassociate_units(repo, units_to_remove) repo_controller.associate_single_unit(repo, unit)
def remove_unit_duplicate_nevra(unit, repo): """ Removes units from the repo that have same NEVRA, ignoring the checksum and checksum type. :param unit: The unit whose NEVRA should be removed :type unit: ContentUnit :param repo: the repo from which units will be unassociated :type repo: pulp.server.db.model.Repository """ nevra_filters = unit.unit_key.copy() del nevra_filters['checksum'] del nevra_filters['checksumtype'] Q_filters = [Q(**{key: value}) for key, value in nevra_filters.iteritems()] Q_nevra_filter = reduce(operator.and_, Q_filters) Q_type_filter = Q(unit_type_id=unit._content_type_id) unit_iterator = repo_controller.find_repo_content_units(repo, repo_content_unit_q=Q_type_filter, units_q=Q_nevra_filter, yield_content_unit=True) repo_controller.disassociate_units(repo, unit_iterator)
def associate_copy_for_repo(unit, dest_repo, set_content=False): """ Associate a unit where it is required to make a copy of the unit first, and where the unit key includes the repo ID. :param unit: Unit to be copied :type unit: pulp_rpm.plugins.db.models.Package :param dest_repo: destination repo :type dest_repo: pulp.server.db.model.Repository :param set_content: if True, the set_storage_path() method will be called on the new unit. Default is False. :type set_content: bool :return: new unit that was saved and associated :rtype: pulp_rpm.plugins.db.models.Package """ new_unit = unit.clone() new_unit.repo_id = dest_repo.repo_id if set_content: # calculate a new storage path since the unit key has changed new_unit.set_storage_path(os.path.basename(unit._storage_path)) try: new_unit.save() except mongoengine.NotUniqueError: # It is possible that a previous copy exists as an orphan, in which case it can safely # be deleted and replaced with this new version. _LOGGER.debug( _('replacing pre-existing copy of %(u)s' % {'u': new_unit})) existing_unit_qs = new_unit.__class__.objects.filter( **new_unit.unit_key) repo_controller.disassociate_units(dest_repo, existing_unit_qs) existing_unit_qs.delete() new_unit.save() if set_content: new_unit.safe_import_content(unit._storage_path) repo_controller.associate_single_unit(repository=dest_repo, unit=new_unit) return new_unit
def _associate_unit(self, repo, unit): """ Associate an iso unit with a repository but first check if there's already any with the same name and if so, remove them. :param repo: An ISO repository that is being synced :type repo: pulp.server.db.model.Repository :param unit: An ISO unit to associate with repo :type unit: pulp_rpm.plugins.db.models.ISO """ if not self.repo_units: # store the existing repo units to prevent querying mongo multiple times self.repo_units = list( repo_controller.find_repo_content_units( repo, yield_content_unit=True)) units_to_remove = [ iso for iso in self.repo_units if iso['name'] == unit['name'] ] repo_controller.disassociate_units(repo, units_to_remove) repo_controller.associate_single_unit(repo, unit)
def associate_copy_for_repo(unit, dest_repo, set_content=False): """ Associate a unit where it is required to make a copy of the unit first, and where the unit key includes the repo ID. :param unit: Unit to be copied :type unit: pulp_rpm.plugins.db.models.Package :param dest_repo: destination repo :type dest_repo: pulp.server.db.model.Repository :param set_content: if True, the set_storage_path() method will be called on the new unit. Default is False. :type set_content: bool :return: new unit that was saved and associated :rtype: pulp_rpm.plugins.db.models.Package """ new_unit = unit.clone() new_unit.repo_id = dest_repo.repo_id if set_content: # calculate a new storage path since the unit key has changed new_unit.set_storage_path(os.path.basename(unit._storage_path)) try: new_unit.save() except mongoengine.NotUniqueError: # It is possible that a previous copy exists as an orphan, in which case it can safely # be deleted and replaced with this new version. _LOGGER.debug(_('replacing pre-existing copy of %(u)s' % {'u': new_unit})) existing_unit_qs = new_unit.__class__.objects.filter(**new_unit.unit_key) repo_controller.disassociate_units(dest_repo, existing_unit_qs) existing_unit_qs.delete() new_unit.save() if set_content: new_unit.safe_import_content(unit._storage_path) repo_controller.associate_single_unit(repository=dest_repo, unit=new_unit) return new_unit
class ISOImporter(Importer): """ All methods that are missing docstrings are documented in the Importer superclass. """ def import_units(self, source_repo, dest_repo, import_conduit, config, units=None): """ Import content units into the given repository. This method will be called in a number of different situations: * A user is attempting to copy a content unit from one repository into the repository that uses this importer * A user is attempting to add an orphaned unit into a repository. This call has two options for handling the requested units: * Associate the given units with the destination repository. This will link the repository with the existing unit directly; changes to the unit will be reflected in all repositories that reference it. * Create a new unit and save it to the repository. This would act as a deep copy of sorts, creating a unique unit in the database. Keep in mind that the unit key must change in order for the unit to be considered different than the supplied one. The APIs for both approaches are similar to those in the sync conduit. In the case of a simple association, the init_unit step can be skipped and save_unit simply called on each specified unit. The units argument is optional. If None, all units in the source repository should be imported. The conduit is used to query for those units. If specified, only the units indicated should be imported (this is the case where the caller passed a filter to Pulp). :param source_repo: metadata describing the repository containing the units to import :type source_repo: pulp.plugins.model.Repository :param dest_repo: metadata describing the repository to import units into :type dest_repo: pulp.plugins.model.Repository :param import_conduit: provides access to relevant Pulp functionality :type import_conduit: pulp.plugins.conduits.unit_import.ImportUnitConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration :param units: optional list of pre-filtered units to import :type units: list of pulp.plugins.model.Unit :return: list of Unit instances that were saved to the destination repository :rtype: list """ if units is None: criteria = UnitAssociationCriteria(type_ids=[ids.TYPE_ID_ISO]) units = import_conduit.get_source_units(criteria=criteria) units = list(units) for u in units: import_conduit.associate_unit(u) return units @classmethod def metadata(cls): return { 'id': ids.TYPE_ID_IMPORTER_ISO, 'display_name': 'ISO Importer', 'types': [ids.TYPE_ID_ISO] } def sync_repo(self, transfer_repo, sync_conduit, config): sync_conduit.repo = transfer_repo.repo_obj if config.get(importer_constants.KEY_FEED) is None: raise ValueError('Repository without feed cannot be synchronized') self.iso_sync = sync.ISOSyncRun(sync_conduit, config) report = self.iso_sync.perform_sync() self.iso_sync = None return report def upload_unit(self, transfer_repo, type_id, unit_key, metadata, file_path, conduit, config): """ Handles the creation and association of an ISO. :param repo: The repository to import the package into :type repo: pulp.server.db.model.Repository :param type_id: The type_id of the package being uploaded :type type_id: str :param unit_key: A dictionary of fields to overwrite introspected field values :type unit_key: dict :param metadata: A dictionary of fields to overwrite introspected field values, or None :type metadata: dict or None :param file_path: The path to the uploaded package :type file_path: str :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.upload.UploadConduit :param config: plugin configuration for the repository :type config: pulp.plugins.config.PluginCallConfiguration """ qs = models.ISO.objects.filter(**unit_key) if qs: # iso with this key already exists, use it iso = qs.first() else: # this is a new ISO, create it iso = models.ISO(**unit_key) validate = config.get_boolean(importer_constants.KEY_VALIDATE) validate = validate if validate is not None else constants.CONFIG_VALIDATE_DEFAULT try: # Let's validate the ISO. This will raise a # ValueError if the ISO does not validate correctly. iso.validate_iso(file_path, full_validation=validate) except ValueError, e: return { 'success_flag': False, 'summary': e.message, 'details': None } try: iso.save_and_import_content(file_path) except NotUniqueError: iso = iso.__class__.objects.get(**iso.unit_key) # remove any existing units with the same name units = repo_controller.find_repo_content_units( transfer_repo.repo_obj, units_q=Q(name=iso['name']), yield_content_unit=True) repo_controller.disassociate_units(transfer_repo.repo_obj, units) repo_controller.associate_single_unit(transfer_repo.repo_obj, iso) return {'success_flag': True, 'summary': None, 'details': None}