def delete_orphan_content_units_by_type(type_id): """ Delete the orphaned content units for the given content type. This method only applies to new style content units that are loaded via entry points NOTE: this method deletes the content unit's bits from disk, if applicable. :param type_id: id of the content type :type type_id: basestring """ # get the model matching the type content_model = plugin_api.get_unit_model_by_id(type_id) content_units = content_model.objects().only('id', 'storage_path') # Paginate the content units for units_group in plugin_misc.paginate(content_units): # Build the list of ids to search for an easier way to access units in the group by id unit_dict = dict() for unit in units_group: unit_dict[unit.id] = unit id_list = list(unit_dict.iterkeys()) # Clear the units that are currently associated from unit_dict non_orphan = model.RepositoryContentUnit.objects(unit_id__in=id_list)\ .distinct('unit_id') for non_orphan_id in non_orphan: unit_dict.pop(non_orphan_id) # Remove the unit and any references on disk for unit_to_delete in unit_dict.itervalues(): unit_to_delete.delete() if unit_to_delete.storage_path: OrphanManager.delete_orphaned_file(unit_to_delete.storage_path)
def _get_deferred_content_units(): """ Retrieve a list of units that have been added to the DeferredDownload collection. :return: A generator of content units that correspond to DeferredDownload entries. :rtype: generator of pulp.server.db.model.FileContentUnit """ for deferred_download in DeferredDownload.objects.filter(): try: unit_model = plugin_api.get_unit_model_by_id( deferred_download.unit_type_id) if unit_model is None: _logger.error( _('Unable to find the model object for the {type} type.'). format(type=deferred_download.unit_type_id)) else: unit = unit_model.objects.filter( id=deferred_download.unit_id).get() yield unit except DoesNotExist: # This is normal if the content unit in question has been purged during an # orphan cleanup. _logger.debug( _('Unable to find the {type}:{id} content unit.').format( type=deferred_download.unit_type_id, id=deferred_download.unit_id))
def get_unit_key_fields_for_type(type_id): """ Based on a unit type ID, determine the fields that compose that type's unit key. This supports both the new mongoengine models and the old "types_def" collection, so the caller need not worry about whether a type has been converted to use mongoengine or not. :param type_id: unique ID for a unit type :type type_id: str :return: tuple containing the name of each field in the unit key :rtype: tuple :raises ValueError: if the type ID is not found """ model_class = plugin_api.get_unit_model_by_id(type_id) if model_class is not None: return model_class.unit_key_fields type_def = types_db.type_definition(type_id) if type_def is not None: # this is an "old style" model return tuple(type_def['unit_key']) raise ValueError
def _resolve_new_units(self, existing, wanted): """ Decide what units are needed to be downloaded. Filter out units which are already in a repository, associate units which are already downloaded, :param existing: units which are already in a repository :type existing: list of unit keys as namedtuples :param wanted: units which should be imported into a repository :type wanted: list of unit keys as namedtuples :return: list of unit keys to download; empty list if all units are already downloaded :rtype: list of unit keys as namedtuples """ model = plugin_api.get_unit_model_by_id(constants.TYPE_PUPPET_MODULE) unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in wanted) still_wanted = set(wanted) for unit in units_controller.find_units(unit_generator): file_exists = unit._storage_path is not None and os.path.isfile(unit._storage_path) if file_exists: if unit.unit_key_as_named_tuple not in existing: repo_controller.associate_single_unit(self.repo.repo_obj, unit) still_wanted.discard(unit.unit_key_as_named_tuple) return list(still_wanted)
def _resolve_new_units(self, existing, wanted): """ Decide what units are needed to be downloaded. Filter out units which are already in a repository, associate units which are already downloaded, :param existing: units which are already in a repository :type existing: list of unit keys as namedtuples :param wanted: units which should be imported into a repository :type wanted: list of unit keys as namedtuples :return: list of unit keys to download; empty list if all units are already downloaded :rtype: list of unit keys as namedtuples """ model = plugin_api.get_unit_model_by_id(constants.TYPE_PUPPET_MODULE) unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in wanted) still_wanted = set(wanted) for unit in units_controller.find_units(unit_generator): file_exists = unit._storage_path is not None and os.path.isfile( unit._storage_path) if file_exists: if unit.unit_key_as_named_tuple not in existing: repo_controller.associate_single_unit( self.repo.repo_obj, unit) still_wanted.discard(unit.unit_key_as_named_tuple) return list(still_wanted)
def get_model_serializer_for_type(type_id): """ Get a ModelSerializer instance associated with a given unit type id Serializers are only needed with mongoengine models. This will return None for pymongo models or mongoengine models that do not have a serializer. :param type_id: unique ID for a unit type :type type_id: str :return: model serializer instance, if available :rtype: pulp.server.webservices.views.serializers.ModelSerializer or None :raises ValueError: if the type ID is not found """ model_class = plugin_api.get_unit_model_by_id(type_id) # mongoengine models have a SERIALIZER attr, which is exposed via the serializer # property as an instance with the associated model if model_class is not None and hasattr(model_class, 'SERIALIZER'): serializer = model_class.SERIALIZER # model serializer methods currently take the model class as an arg # so stash the model class on the serializer for now, and this all # gets made better with https://pulp.plan.io/issues/1555 serializer.model = model_class # instantiate the serializer before returning return serializer()
def delete_orphan_content_units_by_type(type_id): """ Delete the orphaned content units for the given content type. This method only applies to new style content units that are loaded via entry points NOTE: this method deletes the content unit's bits from disk, if applicable. :param type_id: id of the content type :type type_id: basestring """ # get the model matching the type content_model = plugin_api.get_unit_model_by_id(type_id) content_units = content_model.objects().only('id', 'storage_path') # Paginate the content units for units_group in plugin_misc.paginate(content_units): # Build the list of ids to search for an easier way to access units in the group by id unit_dict = dict() for unit in units_group: unit_dict[unit.id] = unit id_list = list(unit_dict.iterkeys()) # Clear the units that are currently associated from unit_dict non_orphan = model.RepositoryContentUnit.objects(unit_id__in=id_list)\ .distinct('unit_id') for non_orphan_id in non_orphan: unit_dict.pop(non_orphan_id) # Remove the unit and any references on disk for unit_to_delete in unit_dict.itervalues(): unit_to_delete.delete() if unit_to_delete.storage_path: OrphanManager.delete_orphaned_file( unit_to_delete.storage_path)
def _handle_group_category_comps(repo, type_id, unit_key, metadata, file_path, conduit, config): """ Handles the creation of a package group, category or environment. If a file was uploaded, treat this as upload of a comps.xml file. If no file was uploaded, the process only creates the unit. :param repo: The repository to import the package into :type repo: pulp.server.db.model.Repository :param type_id: The type_id of the package being uploaded :type type_id: str :param unit_key: A dictionary of fields to overwrite introspected field values :type unit_key: dict :param metadata: A dictionary of fields to overwrite introspected field values, or None :type metadata: dict or None :param file_path: The path to the uploaded package :type file_path: str :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.upload.UploadConduit :param config: plugin configuration for the repository :type config: pulp.plugins.config.PluginCallConfiguration """ model_class = plugin_api.get_unit_model_by_id(type_id) update_fields_inbound(model_class, unit_key or {}) update_fields_inbound(model_class, metadata or {}) if file_path is not None and os.path.getsize(file_path) > 0: # uploading a comps.xml _get_and_save_file_units(file_path, group.process_group_element, group.GROUP_TAG, conduit, repo) _get_and_save_file_units(file_path, group.process_category_element, group.CATEGORY_TAG, conduit, repo) _get_and_save_file_units(file_path, group.process_environment_element, group.ENVIRONMENT_TAG, conduit, repo) _get_and_save_file_units(file_path, group.process_langpacks_element, group.LANGPACKS_TAG, conduit, repo) else: # uploading a package group, package category or package environment unit_data = {} unit_data.update(metadata or {}) unit_data.update(unit_key or {}) try: unit = model_class(**unit_data) except TypeError: raise ModelInstantiationError() try: unit.save() except NotUniqueError: unit = unit.__class__.objects.filter(**unit.unit_key).first() repo_controller.associate_single_unit(repo, unit)
def _handle_group_category_comps(repo, type_id, unit_key, metadata, file_path, conduit, config): """ Handles the creation of a package group or category. If a file was uploaded, treat this as upload of a comps.xml file. If no file was uploaded, the process only creates the unit. :param repo: The repository to import the package into :type repo: pulp.server.db.model.Repository :param type_id: The type_id of the package being uploaded :type type_id: str :param unit_key: A dictionary of fields to overwrite introspected field values :type unit_key: dict :param metadata: A dictionary of fields to overwrite introspected field values, or None :type metadata: dict or None :param file_path: The path to the uploaded package :type file_path: str :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.upload.UploadConduit :param config: plugin configuration for the repository :type config: pulp.plugins.config.PluginCallConfiguration """ model_class = plugin_api.get_unit_model_by_id(type_id) update_fields_inbound(model_class, unit_key or {}) update_fields_inbound(model_class, metadata or {}) if file_path is not None and os.path.getsize(file_path) > 0: # uploading a comps.xml repo_id = repo.repo_id _get_and_save_file_units(file_path, group.process_group_element, group.GROUP_TAG, conduit, repo_id) _get_and_save_file_units(file_path, group.process_category_element, group.CATEGORY_TAG, conduit, repo_id) _get_and_save_file_units(file_path, group.process_environment_element, group.ENVIRONMENT_TAG, conduit, repo_id) else: # uploading a package group or package category unit_data = {} unit_data.update(metadata or {}) unit_data.update(unit_key or {}) try: unit = model_class(**unit_data) except TypeError: raise ModelInstantiationError() unit.save() if file_path: unit.set_storage_path(os.path.basename(file_path)) unit.safe_import_content(file_path) repo_controller.associate_single_unit(repo, unit)
def _handle_erratum(repo, type_id, unit_key, metadata, file_path, conduit, config): """ Handles the upload for an erratum. There is no file uploaded so the only steps are to save the metadata and optionally link the erratum to RPMs in the repository. NOTE: For now errata is handled differently than other units. Uploaded erratum should not overwrite the existing one if the latter exists, they should be merged. This is only because of the way erratum is stored in the MongoDB and it is in `our plans`_ to re-think how to do it correctly. .. _our plans: https://pulp.plan.io/issues/1803 :param repo: The repository to import the package into :type repo: pulp.server.db.model.Repository :param type_id: The type_id of the package being uploaded :type type_id: str :param unit_key: A dictionary of fields to overwrite introspected field values :type unit_key: dict :param metadata: A dictionary of fields to overwrite introspected field values, or None :type metadata: dict or None :param file_path: The path to the uploaded package :type file_path: str :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.upload.UploadConduit :param config: plugin configuration for the repository :type config: pulp.plugins.config.PluginCallConfiguration """ model_class = plugin_api.get_unit_model_by_id(type_id) update_fields_inbound(model_class, unit_key or {}) update_fields_inbound(model_class, metadata or {}) unit_data = {} unit_data.update(metadata or {}) unit_data.update(unit_key or {}) existing_unit = model_class.objects.filter(**unit_key).first() new_unit = model_class(**unit_data) # Add repo_id to each collection of the pkglist of the new erratum for collection in new_unit.pkglist: collection['_pulp_repo_id'] = repo.repo_id unit = new_unit if existing_unit: existing_unit.merge_errata(new_unit) unit = existing_unit unit.save() if not config.get_boolean(CONFIG_SKIP_ERRATUM_LINK): repo_controller.associate_single_unit(repo, unit)
def delete_orphan_content_units_by_type(type_id, content_unit_ids=None): """ Delete the orphaned content units for the given content type. This method only applies to new style content units that are loaded via entry points NOTE: this method deletes the content unit's bits from disk, if applicable. :param type_id: id of the content type :type type_id: basestring :param content_unit_ids: list of content unit ids to delete; None means delete them all :type content_unit_ids: iterable or None :return: count of units deleted :rtype: int """ # get the model matching the type content_model = plugin_api.get_unit_model_by_id(type_id) if content_unit_ids: query_sets = [] for page in plugin_misc.paginate(content_unit_ids): qs = content_model.objects(id__in=page).only( 'id', '_storage_path') query_sets.append(qs) content_units = itertools.chain(*query_sets) else: content_units = content_model.objects.only('id', '_storage_path') count = 0 # Paginate the content units for units_group in plugin_misc.paginate(content_units): # Build the list of ids to search for an easier way to access units in the group by id unit_dict = dict() for unit in units_group: unit_dict[unit.id] = unit id_list = list(unit_dict.iterkeys()) # Clear the units that are currently associated from unit_dict non_orphan = model.RepositoryContentUnit.objects(unit_id__in=id_list)\ .distinct('unit_id') for non_orphan_id in non_orphan: unit_dict.pop(non_orphan_id) # Remove the unit, lazy catalog entries, and any content in storage. for unit_to_delete in unit_dict.itervalues(): model.LazyCatalogEntry.objects( unit_id=str(unit_to_delete.id), unit_type_id=str(type_id)).delete() unit_to_delete.delete() if unit_to_delete._storage_path: OrphanManager.delete_orphaned_file( unit_to_delete._storage_path) count += 1 return count
def _download(self, catalog_entry, request, responder): """ Build a nectar downloader and download the content from the catalog entry. The download is performed by the alternate content container, so it is possible to use the streamer in conjunction with alternate content sources. :param catalog_entry: The catalog entry to download. :type catalog_entry: pulp.server.db.model.LazyCatalogEntry :param request: The client content request. :type request: twisted.web.server.Request :param responder: The file-like object that nectar should write to. :type responder: Responder """ # Configure the primary downloader for alternate content sources plugin_importer, config, db_importer = repo_controller.get_importer_by_id( catalog_entry.importer_id) # There is an unfortunate mess of configuration classes and attributes, and # multiple "models" floating around. The MongoEngine class that corresponds # to the database entry only contains the repository config. The ``config`` # variable above contains the repository configuration _and_ the plugin-wide # configuration, so here we override the db_importer.config because it doesn't # have the whole config. In the future the importer object should seemlessly # load and apply the plugin-wide configuration. db_importer.config = config.flatten() primary_downloader = plugin_importer.get_downloader_for_db_importer( db_importer, catalog_entry.url, working_dir='/tmp') pulp_request = request.getHeader(PULP_STREAM_REQUEST_HEADER) listener = StreamerListener(request, self.config, catalog_entry, pulp_request) primary_downloader.session = self.session primary_downloader.event_listener = listener # Build the alternate content source download request unit_model = plugins_api.get_unit_model_by_id(catalog_entry.unit_type_id) qs = unit_model.objects.filter(id=catalog_entry.unit_id).only(*unit_model.unit_key_fields) try: unit = qs.get() download_request = content_models.Request( catalog_entry.unit_type_id, unit.unit_key, catalog_entry.url, responder, ) alt_content_container = content_container.ContentContainer(threaded=False) alt_content_container.download(primary_downloader, [download_request], listener) except DoesNotExist: # A catalog entry is referencing a unit that doesn't exist which is bad. msg = _('The catalog entry for {path} references {unit_type}:{id}, but ' 'that unit is not in the database.') logger.error(msg.format(path=catalog_entry.path, unit_type=catalog_entry.unit_type_id, id=catalog_entry.unit_id)) request.setResponseCode(NOT_FOUND) finally: primary_downloader.config.finalize()
def delete_orphan_content_units_by_type(type_id, content_unit_ids=None): """ Delete the orphaned content units for the given content type. This method only applies to new style content units that are loaded via entry points NOTE: this method deletes the content unit's bits from disk, if applicable. :param type_id: id of the content type :type type_id: basestring :param content_unit_ids: list of content unit ids to delete; None means delete them all :type content_unit_ids: iterable or None :return: count of units deleted :rtype: int """ # get the model matching the type content_model = plugin_api.get_unit_model_by_id(type_id) if content_unit_ids: query_sets = [] for page in plugin_misc.paginate(content_unit_ids): qs = content_model.objects(id__in=page).only('id', '_storage_path') query_sets.append(qs) content_units = itertools.chain(*query_sets) else: content_units = content_model.objects.only('id', '_storage_path') count = 0 # Paginate the content units for units_group in plugin_misc.paginate(content_units): # Build the list of ids to search for an easier way to access units in the group by id unit_dict = dict() for unit in units_group: unit_dict[unit.id] = unit id_list = list(unit_dict.iterkeys()) # Clear the units that are currently associated from unit_dict non_orphan = model.RepositoryContentUnit.objects(unit_id__in=id_list)\ .distinct('unit_id') for non_orphan_id in non_orphan: unit_dict.pop(non_orphan_id) # Remove the unit, lazy catalog entries, and any content in storage. for unit_to_delete in unit_dict.itervalues(): model.LazyCatalogEntry.objects( unit_id=str(unit_to_delete.id), unit_type_id=str(type_id) ).delete() unit_to_delete.delete() if unit_to_delete._storage_path: OrphanManager.delete_orphaned_file(unit_to_delete._storage_path) count += 1 return count
def _download(self, catalog_entry, request, responder): """ Build a nectar downloader and download the content from the catalog entry. The download is performed by the alternate content container, so it is possible to use the streamer in conjunction with alternate content sources. :param catalog_entry: The catalog entry to download. :type catalog_entry: pulp.server.db.model.LazyCatalogEntry :param request: The client content request. :type request: twisted.web.server.Request :param responder: The file-like object that nectar should write to. :type responder: Responder """ # Configure the primary downloader for alternate content sources plugin_importer, config, db_importer = repo_controller.get_importer_by_id( catalog_entry.importer_id) primary_downloader = plugin_importer.get_downloader_for_db_importer( db_importer, catalog_entry.url, working_dir='/tmp') pulp_request = request.getHeader(PULP_STREAM_REQUEST_HEADER) listener = StreamerListener(request, self.config, catalog_entry, pulp_request) primary_downloader.session = self.session primary_downloader.event_listener = listener # Build the alternate content source download request unit_model = plugins_api.get_unit_model_by_id( catalog_entry.unit_type_id) qs = unit_model.objects.filter(id=catalog_entry.unit_id).only( *unit_model.unit_key_fields) try: unit = qs.get() download_request = content_models.Request( catalog_entry.unit_type_id, unit.unit_key, catalog_entry.url, responder, ) alt_content_container = content_container.ContentContainer( threaded=False) alt_content_container.download(primary_downloader, [download_request], listener) except DoesNotExist: # A catalog entry is referencing a unit that doesn't exist which is bad. msg = _( 'The catalog entry for {path} references {unit_type}:{id}, but ' 'that unit is not in the database.') logger.error( msg.format(path=catalog_entry.path, unit_type=catalog_entry.unit_type_id, id=catalog_entry.unit_id)) request.setResponseCode(NOT_FOUND) finally: primary_downloader.config.finalize()
def check_all_and_associate(wanted, conduit, config, download_deferred, catalog): """ Given a set of unit keys as namedtuples, this function checks if a unit already exists in Pulp and returns the set of tuples that were not found. This checks for the unit in the db as well as for the actual file on the filesystem. If a unit exists in the db and the filesystem, this function also associates the unit to the given repo. Note that the check for the actual file is performed only for the supported unit types. :param wanted: iterable of units as namedtuples :type wanted: iterable :param conduit: repo sync conduit :type conduit: pulp.plugins.conduits.repo_sync.RepoSync :param config: configuration instance passed to the importer :type config: pulp.plugins.config.PluginCallConfiguration :param download_deferred: indicates downloading is deferred (or not). :type download_deferred: bool :param catalog: Deferred downloading catalog. :type catalog: pulp_rpm.plugins.importers.yum.sync.PackageCatalog :return: set of unit keys as namedtuples, identifying which of the named tuples received as input were not found on the server. :rtype: set """ sorted_units = _sort_by_type(wanted) for unit_type, values in sorted_units.iteritems(): model = plugin_api.get_unit_model_by_id(unit_type) # FIXME "fields" does not get used, but it should # fields = model.unit_key_fields + ('_storage_path',) unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in values.copy()) for unit in units_controller.find_units(unit_generator): # Existing RPMs, DRPMs and SRPMs are disqualified when the associated # package file does not exist and downloading is not deferred. if not download_deferred and unit_type in ( ids.TYPE_ID_RPM, ids.TYPE_ID_SRPM, ids.TYPE_ID_DRPM): if unit._storage_path is None or not os.path.isfile(unit._storage_path): continue catalog.add(unit) if rpm_parse.signature_enabled(config): try: rpm_parse.filter_signature(unit, config) except PulpCodedException as e: _LOGGER.debug(e) continue repo_controller.associate_single_unit(conduit.repo, unit) values.discard(unit.unit_key_as_named_tuple) still_wanted = set() still_wanted.update(*sorted_units.values()) return still_wanted
def delete_orphans_by_type(content_type_id, content_unit_ids=None): """ Delete the orphaned content units for the given content type. If the content_unit_ids parameter is not None, is acts as a filter of the specific orphaned content units that may be deleted. NOTE: this method deletes the content unit's bits from disk, if applicable. :param content_type_id: id of the content type :type content_type_id: basestring :param content_unit_ids: list of content unit ids to delete; None means delete them all :type content_unit_ids: iterable or None :return: count of units deleted :rtype: int """ content_units_collection = content_types_db.type_units_collection( content_type_id) content_model = plugin_api.get_unit_model_by_id(content_type_id) try: unit_key_fields = units_controller.get_unit_key_fields_for_type( content_type_id) except ValueError: raise MissingResource(content_type_id=content_type_id) fields = ('_id', '_storage_path') + unit_key_fields count = 0 for content_unit in OrphanManager.generate_orphans_by_type( content_type_id, fields=fields): if content_unit_ids is not None and content_unit[ '_id'] not in content_unit_ids: continue model.LazyCatalogEntry.objects( unit_id=content_unit['_id'], unit_type_id=content_type_id).delete() content_units_collection.remove(content_unit['_id']) if hasattr(content_model, 'do_post_delete_actions'): content_model.do_post_delete_actions(content_unit) storage_path = content_unit.get('_storage_path', None) if storage_path is not None: OrphanManager.delete_orphaned_file(storage_path) count += 1 return count
def check_repo(wanted): """ Given an iterable of units as namedtuples, this function will search for them using the given search method and return the set of tuples that were not found. This checks for the unit in the db as well as for the actual file on the filesystem. Note that the check for the actual file is performed only for the supported unit types. This is useful in a case where you know what units you want to have in a repo, but need to know which you need to actually download by eliminating the ones you already have. :param wanted: iterable of units as namedtuples :type wanted: iterable :param sync_conduit: :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :return: set of unit keys as namedtuples, identifying which of the named tuples received as input were not found by the search method. :rtype: set """ # sort by type sorted_units = _sort_by_type(wanted) # UAQ for each type for unit_type, values in sorted_units.iteritems(): model = plugin_api.get_unit_model_by_id(unit_type) fields = model.unit_key_fields + ('_storage_path', ) rpm_srpm_drpm = unit_type in (ids.TYPE_ID_RPM, ids.TYPE_ID_SRPM, ids.TYPE_ID_DRPM) # FIXME this function being called doesn't have a fields parameter unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in values.copy()) for unit in units_controller.find_units(unit_generator, fields=fields): if rpm_srpm_drpm: # For RPMs, SRPMs and DRPMs, also check if the file exists on the filesystem. # If not, we do not want to skip downloading the unit. if unit._storage_path is None or not os.path.isfile( unit._storage_path): continue values.discard(unit.unit_key_as_named_tuple) ret = set() ret.update(*sorted_units.values()) return ret
def check_repo(wanted): """ Given an iterable of units as namedtuples, this function will search for them using the given search method and return the set of tuples that were not found. This checks for the unit in the db as well as for the actual file on the filesystem. Note that the check for the actual file is performed only for the supported unit types. This is useful in a case where you know what units you want to have in a repo, but need to know which you need to actually download by eliminating the ones you already have. :param wanted: iterable of units as namedtuples :type wanted: iterable :param sync_conduit: :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :return: set of unit keys as namedtuples, identifying which of the named tuples received as input were not found by the search method. :rtype: set """ # sort by type sorted_units = _sort_by_type(wanted) # UAQ for each type for unit_type, values in sorted_units.iteritems(): model = plugin_api.get_unit_model_by_id(unit_type) fields = model.unit_key_fields + ('_storage_path',) rpm_srpm_drpm = unit_type in (ids.TYPE_ID_RPM, ids.TYPE_ID_SRPM, ids.TYPE_ID_DRPM) # FIXME this function being called doesn't have a fields parameter unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in values.copy()) for unit in units_controller.find_units(unit_generator, fields=fields): if rpm_srpm_drpm: # For RPMs, SRPMs and DRPMs, also check if the file exists on the filesystem. # If not, we do not want to skip downloading the unit. if unit._storage_path is None or not os.path.isfile(unit._storage_path): continue values.discard(unit.unit_key_as_named_tuple) ret = set() ret.update(*sorted_units.values()) return ret
def _download(self, catalog_entry, request, responder): """ Build a nectar downloader and download the content from the catalog entry. The download is performed by the alternate content container, so it is possible to use the streamer in conjunction with alternate content sources. :param catalog_entry: The catalog entry to download. :type catalog_entry: pulp.server.db.model.LazyCatalogEntry :param request: The client content request. :type request: twisted.web.server.Request :param responder: The file-like object that nectar should write to. :type responder: Responder """ # Configure the primary downloader for alternate content sources plugin_importer, config, db_importer = repo_controller.get_importer_by_id( catalog_entry.importer_id) primary_downloader = plugin_importer.get_downloader_for_db_importer( db_importer, catalog_entry.url, working_dir='/tmp') pulp_request = request.getHeader(PULP_STREAM_REQUEST_HEADER) listener = StreamerListener(request, self.config, catalog_entry, pulp_request) primary_downloader.session = self.session primary_downloader.event_listener = listener # Build the alternate content source download request unit_model = plugins_api.get_unit_model_by_id(catalog_entry.unit_type_id) qs = unit_model.objects.filter(id=catalog_entry.unit_id).only(*unit_model.unit_key_fields) try: unit = qs.get() download_request = content_models.Request( catalog_entry.unit_type_id, unit.unit_key, catalog_entry.url, responder, ) alt_content_container = content_container.ContentContainer(threaded=False) alt_content_container.download(primary_downloader, [download_request], listener) except DoesNotExist: # A catalog entry is referencing a unit that doesn't exist which is bad. msg = _('The catalog entry for {path} references {unit_type}:{id}, but ' 'that unit is not in the database.') logger.error(msg.format(path=catalog_entry.path, unit_type=catalog_entry.unit_type_id, id=catalog_entry.unit_id)) request.setResponseCode(NOT_FOUND) finally: primary_downloader.config.finalize()
def get_repo_unit_models(repo_id): """ Retrieve all the MongoEngine models for units in a given repository. If a unit type is in the repository and does not have a MongoEngine model, that unit type is excluded from the returned list. :param repo_id: ID of the repo whose unit models should be retrieved. :type repo_id: str :return: A list of sub-classes of ContentUnit that define a unit model. :rtype: list of pulp.server.db.model.ContentUnit """ unit_types = model.RepositoryContentUnit.objects( repo_id=repo_id).distinct('unit_type_id') unit_models = [plugin_api.get_unit_model_by_id(type_id) for type_id in unit_types] # Filter any non-MongoEngine content types. return filter(None, unit_models)
def download_succeeded(self, report): """ Marks the individual file for the unit as downloaded and moves it into its final storage location if its checksum value matches the value in the catalog entry (if present). Inherited from DownloadEventListener. :param report: the report associated with the download request. :type report: nectar.report.DownloadReport """ # Reload the content unit unit_model = plugin_api.get_unit_model_by_id(report.data[TYPE_ID]) unit_qs = unit_model.objects.filter(id=report.data[UNIT_ID]) content_unit = unit_qs.only('_content_type_id', 'id', '_last_updated').get() path_entry = report.data[UNIT_FILES][report.destination] # Validate the file and update the progress. catalog_entry = path_entry[CATALOG_ENTRY] try: self.validate_file(report.destination, catalog_entry.checksum_algorithm, catalog_entry.checksum) relative_path = os.path.relpath(catalog_entry.path, FileStorage.get_path(content_unit)) if len(report.data[UNIT_FILES]) == 1: # If the unit is single-file, update the storage path to point to the file content_unit.set_storage_path(relative_path) unit_qs.update_one( set___storage_path=content_unit._storage_path) content_unit.import_content(report.destination) else: content_unit.import_content(report.destination, location=relative_path) self.progress_successes += 1 path_entry[PATH_DOWNLOADED] = True except (InvalidChecksumType, VerificationException, IOError), e: _logger.debug( _('Download of {path} failed: {reason}.').format( path=catalog_entry.path, reason=str(e))) path_entry[PATH_DOWNLOADED] = False self.progress_failures += 1
def download_succeeded(self, report): """ Marks the individual file for the unit as downloaded and moves it into its final storage location if its checksum value matches the value in the catalog entry (if present). Inherited from DownloadEventListener. :param report: the report associated with the download request. :type report: nectar.report.DownloadReport """ # Reload the content unit unit_model = plugin_api.get_unit_model_by_id(report.data[TYPE_ID]) unit_qs = unit_model.objects.filter(id=report.data[UNIT_ID]) content_unit = unit_qs.only('_content_type_id', 'id', '_last_updated').get() path_entry = report.data[UNIT_FILES][report.destination] # Validate the file and update the progress. catalog_entry = path_entry[CATALOG_ENTRY] try: self.validate_file( report.destination, catalog_entry.checksum_algorithm, catalog_entry.checksum ) relative_path = os.path.relpath( catalog_entry.path, FileStorage.get_path(content_unit) ) if len(report.data[UNIT_FILES]) == 1: # If the unit is single-file, update the storage path to point to the file content_unit.set_storage_path(relative_path) unit_qs.update_one(set___storage_path=content_unit._storage_path) content_unit.import_content(report.destination) else: content_unit.import_content(report.destination, location=relative_path) self.progress_successes += 1 path_entry[PATH_DOWNLOADED] = True except (InvalidChecksumType, VerificationException, IOError), e: _logger.debug(_('Download of {path} failed: {reason}.').format( path=catalog_entry.path, reason=str(e))) path_entry[PATH_DOWNLOADED] = False self.progress_failures += 1
def check_all_and_associate(wanted, conduit, download_deferred, catalog): """ Given a set of unit keys as namedtuples, this function checks if a unit already exists in Pulp and returns the set of tuples that were not found. This checks for the unit in the db as well as for the actual file on the filesystem. If a unit exists in the db and the filesystem, this function also associates the unit to the given repo. Note that the check for the actual file is performed only for the supported unit types. :param wanted: iterable of units as namedtuples :type wanted: iterable :param conduit: repo sync conduit :type conduit: pulp.plugins.conduits.repo_sync.RepoSync :param download_deferred: indicates downloading is deferred (or not). :type download_deferred: bool :param catalog: Deferred downloading catalog. :type catalog: pulp_rpm.plugins.importers.yum.sync.PackageCatalog :return: set of unit keys as namedtuples, identifying which of the named tuples received as input were not found on the server. :rtype: set """ sorted_units = _sort_by_type(wanted) for unit_type, values in sorted_units.iteritems(): model = plugin_api.get_unit_model_by_id(unit_type) # FIXME "fields" does not get used, but it should # fields = model.unit_key_fields + ('_storage_path',) unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in values.copy()) for unit in units_controller.find_units(unit_generator): # Existing RPMs, DRPMs and SRPMs are disqualified when the associated # package file does not exist and downloading is not deferred. if not download_deferred and unit_type in (ids.TYPE_ID_RPM, ids.TYPE_ID_SRPM, ids.TYPE_ID_DRPM): if unit._storage_path is None or not os.path.isfile( unit._storage_path): continue catalog.add(unit) repo_controller.associate_single_unit(conduit.repo, unit) values.discard(unit.unit_key_as_named_tuple) still_wanted = set() still_wanted.update(*sorted_units.values()) return still_wanted
def _handle_erratum(repo, type_id, unit_key, metadata, file_path, conduit, config): """ Handles the upload for an erratum. There is no file uploaded so the only steps are to save the metadata and optionally link the erratum to RPMs in the repository. :param repo: The repository to import the package into :type repo: pulp.server.db.model.Repository :param type_id: The type_id of the package being uploaded :type type_id: str :param unit_key: A dictionary of fields to overwrite introspected field values :type unit_key: dict :param metadata: A dictionary of fields to overwrite introspected field values, or None :type metadata: dict or None :param file_path: The path to the uploaded package :type file_path: str :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.upload.UploadConduit :param config: plugin configuration for the repository :type config: pulp.plugins.config.PluginCallConfiguration """ model_class = plugin_api.get_unit_model_by_id(type_id) update_fields_inbound(model_class, unit_key or {}) update_fields_inbound(model_class, metadata or {}) unit_data = {} unit_data.update(metadata or {}) unit_data.update(unit_key or {}) unit = model_class(**unit_data) unit.save() if not config.get_boolean(CONFIG_SKIP_ERRATUM_LINK): repo_controller.associate_single_unit(repo, unit)
def _handle_erratum(repo, type_id, unit_key, metadata, file_path, conduit, config): """ Handles the upload for an erratum. There is no file uploaded so the only steps are to save the metadata and optionally link the erratum to RPMs in the repository. :param repo: The repository to import the package into :type repo: pulp.server.db.model.Repository :param type_id: The type_id of the package being uploaded :type type_id: str :param unit_key: A dictionary of fields to overwrite introspected field values :type unit_key: dict :param metadata: A dictionary of fields to overwrite introspected field values, or None :type metadata: dict or None :param file_path: The path to the uploaded package :type file_path: str :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.upload.UploadConduit :param config: plugin configuration for the repository :type config: pulp.plugins.config.PluginCallConfiguration """ model_class = plugin_api.get_unit_model_by_id(type_id) update_fields_inbound(model_class, unit_key or {}) update_fields_inbound(model_class, metadata or {}) unit_data = {} unit_data.update(metadata or {}) unit_data.update(unit_key or {}) unit = model_class(**unit_data) unit.save() if not config.get_boolean(CONFIG_SKIP_ERRATUM_LINK): for model_type in [models.RPM, models.SRPM]: pass # TODO Find out if the unit exists, if it does, associated, if not, create
def _handle_yum_metadata_file(repo, type_id, unit_key, metadata, file_path, conduit, config): """ Handles the upload for a Yum repository metadata file. :param repo: The repository to import the package into :type repo: pulp.server.db.model.Repository :param type_id: The type_id of the package being uploaded :type type_id: str :param unit_key: A dictionary of fields to overwrite introspected field values :type unit_key: dict :param metadata: A dictionary of fields to overwrite introspected field values, or None :type metadata: dict or None :param file_path: The path to the uploaded package :type file_path: str :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.upload.UploadConduit :param config: plugin configuration for the repository :type config: pulp.plugins.config.PluginCallConfiguration """ model_class = plugin_api.get_unit_model_by_id(type_id) update_fields_inbound(model_class, unit_key or {}) update_fields_inbound(model_class, metadata or {}) unit_data = {} unit_data.update(metadata or {}) unit_data.update(unit_key or {}) model = models.YumMetadataFile(**unit_data) model.set_storage_path(os.path.basename(file_path)) try: model.save_and_import_content(file_path) except NotUniqueError: model = model.__class__.objects.get(**model.unit_key) repo_controller.associate_single_unit(conduit.repo, model)
def check_all_and_associate(wanted, sync_conduit): """ Given a set of unit keys as namedtuples, this function checks if a unit already exists in Pulp and returns the set of tuples that were not found. This checks for the unit in the db as well as for the actual file on the filesystem. If a unit exists in the db and the filesystem, this function also associates the unit to the given repo. Note that the check for the actual file is performed only for the supported unit types. :param wanted: iterable of units as namedtuples :type wanted: iterable :param sync_conduit: repo sync conduit :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSync :return: set of unit keys as namedtuples, identifying which of the named tuples received as input were not found on the server. :rtype: set """ sorted_units = _sort_by_type(wanted) for unit_type, values in sorted_units.iteritems(): model = plugin_api.get_unit_model_by_id(unit_type) # FIXME "fields" does not get used, but it should # fields = model.unit_key_fields + ('_storage_path',) rpm_srpm_drpm = unit_type in (ids.TYPE_ID_RPM, ids.TYPE_ID_SRPM, ids.TYPE_ID_DRPM) unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in values) for unit in units_controller.find_units(unit_generator): if rpm_srpm_drpm: # For RPMs, SRPMs and DRPMs, also check if the file exists on the filesystem. # If not, we do not want to skip downloading the unit. if unit._storage_path is None or not os.path.isfile(unit._storage_path): continue # Add the existing unit to the repository repo_controller.associate_single_unit(sync_conduit.repo, unit) values.discard(unit.unit_key_as_named_tuple) ret = set() ret.update(*sorted_units.values()) return ret
def _get_deferred_content_units(): """ Retrieve a list of units that have been added to the DeferredDownload collection. :return: A generator of content units that correspond to DeferredDownload entries. :rtype: generator of pulp.server.db.model.FileContentUnit """ for deferred_download in DeferredDownload.objects.filter(): try: unit_model = plugin_api.get_unit_model_by_id(deferred_download.unit_type_id) if unit_model is None: _logger.error(_('Unable to find the model object for the {type} type.').format( type=deferred_download.unit_type_id)) else: unit = unit_model.objects.filter(id=deferred_download.unit_id).get() yield unit except DoesNotExist: # This is normal if the content unit in question has been purged during an # orphan cleanup. _logger.debug(_('Unable to find the {type}:{id} content unit.').format( type=deferred_download.unit_type_id, id=deferred_download.unit_id))
def _get_unit(entry): """ Get the content unit referenced by the catalog entry. :param entry: A catalog entry. :type entry: LazyCatalogEntry :return: The unit. :raises DoesNotExist: when not found. """ try: model = plugin_api.get_unit_model_by_id(entry.unit_type_id) q_set = model.objects.filter(id=entry.unit_id) q_set = q_set.only(*model.unit_key_fields) return q_set.get() except DoesNotExist: msg = _('The catalog entry for {path} references unknown unit: {unit_type}:{id}') logger.error(msg.format( path=entry.path, unit_type=entry.unit_type_id, id=entry.unit_id)) raise
def remap_fields_with_serializer(content_unit): """Remap fields in place in a pymongo object using a mongoengine serializer :param content_unit: Content unit to modify :type content_unit: dict :param remove_remapped: If True, remove remapped keys from content unit when remapping. Remapped keys are left in place and copied by default :type remove_remapped: bool This is a small workaround to help in cases where REST views are returning the older objects coming out of pymongo, but still need to have their fields remapped according to the rules of the pymongo serializer. As a workaround, this is a "best effort" function, so serialization failures will be written to the debug log and not raise exeptions. Usage of pymongo objects is deprecated. Since this function is only concerned with serializing pymongo objects, its usage is also deprecated. Furthermore, this function is only intended to be used in the final serialization of objects before presentation in the REST API. """ try: content_type_id = content_unit['_content_type_id'] except KeyError: # content unit didn't have a content type id, usually means we're testing... _logger.debug('No _content_type_id found in content unit when remapping fields: ' '{0!r}'.format(content_unit)) return cu_document = api.get_unit_model_by_id(content_type_id) if hasattr(cu_document, 'SERIALIZER'): for original_field, remapped_field in cu_document.SERIALIZER()._remapped_fields.items(): try: content_unit[remapped_field] = content_unit.pop(original_field) except KeyError: # If the original field doesn't exist, log and move on _logger.debug('original field not found when attempting to remap: {0}' '{0}'.format(original_field)) continue
def _download(self, catalog_entry, request, responder): """ Build a nectar downloader and download the content from the catalog entry. The download is performed by the alternate content container, so it is possible to use the streamer in conjunction with alternate content sources. :param catalog_entry: The catalog entry to download. :type catalog_entry: pulp.server.db.model.LazyCatalogEntry :param request: The client content request. :type request: twisted.web.server.Request :param responder: The file-like object that nectar should write to. :type responder: Responder """ # Configure the primary downloader for alternate content sources importer, config = repo_controller.get_importer_by_id(catalog_entry.importer_id) primary_downloader = importer.get_downloader(config, catalog_entry.url, **catalog_entry.data) pulp_request = request.getHeader(PULP_STREAM_REQUEST_HEADER) listener = StreamerListener(request, self.config, catalog_entry, pulp_request) primary_downloader.session = self.session primary_downloader.event_listener = listener # Build the alternate content source download request unit_model = plugins_api.get_unit_model_by_id(catalog_entry.unit_type_id) qs = unit_model.objects.filter(id=catalog_entry.unit_id).only(*unit_model.unit_key_fields) unit = qs.get() download_request = content_models.Request( catalog_entry.unit_type_id, unit.unit_key, catalog_entry.url, responder ) alt_content_container = content_container.ContentContainer(threaded=False) alt_content_container.download(primary_downloader, [download_request], listener) primary_downloader.config.finalize()
def upload_unit(self, transfer_repo, type_id, unit_key, metadata, file_path, conduit, config): if type_id not in SUPPORTED_TYPES: return self.fail_report( "Unsupported unit type {0}".format(type_id)) model_class = plugin_api.get_unit_model_by_id(type_id) repo = transfer_repo.repo_obj conduit.repo = repo metadata = metadata or {} unit_data = {} unit_data.update(metadata or {}) unit_data.update(unit_key or {}) try: unit = model_class.from_file(file_path, unit_data) except models.Error as e: return self.fail_report(str(e)) unit = unit.save_and_associate(file_path, repo) return dict(success_flag=True, summary="", details=dict(unit=dict(unit_key=unit.unit_key, metadata=unit.all_properties)))
def upload_unit(self, transfer_repo, type_id, unit_key, metadata, file_path, conduit, config): if type_id not in SUPPORTED_TYPES: return self.fail_report( "Unsupported unit type {0}".format(type_id)) model_class = plugin_api.get_unit_model_by_id(type_id) repo = transfer_repo.repo_obj conduit.repo = repo metadata = metadata or {} unit_data = {} unit_data.update(metadata or {}) unit_data.update(unit_key or {}) try: unit = model_class.from_file(file_path, unit_data) except models.Error as e: return self.fail_report(str(e)) unit = unit.save_and_associate(file_path, repo) return dict(success_flag=True, summary="", details=dict( unit=dict(unit_key=unit.unit_key, metadata=unit.all_properties)))
def _handle_package(repo, type_id, unit_key, metadata, file_path, conduit, config): """ Handles the upload for an RPM, SRPM or DRPM. This inspects the package contents to determine field values. The unit_key and metadata fields overwrite field values determined through package inspection. :param repo: The repository to import the package into :type repo: pulp.server.db.model.Repository :param type_id: The type_id of the package being uploaded :type type_id: str :param unit_key: A dictionary of fields to overwrite introspected field values :type unit_key: dict :param metadata: A dictionary of fields to overwrite introspected field values, or None :type metadata: dict or None :param file_path: The path to the uploaded package :type file_path: str :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.upload.UploadConduit :param config: plugin configuration for the repository :type config: pulp.plugins.config.PluginCallConfiguration :raises PulpCodedException PLP1005: if the checksum type from the user is not recognized :raises PulpCodedException PLP1013: if the checksum value from the user does not validate """ try: if type_id == models.DRPM._content_type_id.default: rpm_data = _extract_drpm_data(file_path) else: rpm_data = _extract_rpm_data(type_id, file_path) except: _LOGGER.exception('Error extracting RPM metadata for [%s]' % file_path) raise # metadata can be None metadata = metadata or {} model_class = plugin_api.get_unit_model_by_id(type_id) update_fields_inbound(model_class, unit_key or {}) update_fields_inbound(model_class, metadata or {}) with open(file_path) as fp: sums = util.calculate_checksums(fp, models.RpmBase.DEFAULT_CHECKSUM_TYPES) # validate checksum if possible if metadata.get('checksum'): checksumtype = metadata.pop('checksum_type', util.TYPE_SHA256) checksumtype = util.sanitize_checksum_type(checksumtype) if checksumtype not in sums: raise PulpCodedException(error_code=error_codes.RPM1009, checksumtype=checksumtype) if metadata['checksum'] != sums[checksumtype]: raise PulpCodedException(error_code=platform_errors.PLP1013) _LOGGER.debug(_('Upload checksum matches.')) # Save all uploaded RPMs with sha256 in the unit key, since we can now publish with other # types, regardless of what is in the unit key. rpm_data['checksumtype'] = util.TYPE_SHA256 rpm_data['checksum'] = sums[util.TYPE_SHA256] # keep all available checksum values on the model rpm_data['checksums'] = sums # Update the RPM-extracted data with anything additional the user specified. # Allow the user-specified values to override the extracted ones. rpm_data.update(metadata or {}) rpm_data.update(unit_key or {}) # Validate the user specified data by instantiating the model try: unit = model_class(**rpm_data) except TypeError: raise ModelInstantiationError() if type_id != models.DRPM._content_type_id.default: # Extract/adjust the repodata snippets repodata = rpm_parse.get_package_xml(file_path, sumtype=unit.checksumtype) _update_provides_requires(unit, repodata) _update_files(unit, repodata) unit.modify_xml(repodata) # check if the unit has duplicate nevra purge.remove_unit_duplicate_nevra(unit, repo) unit.set_storage_path(os.path.basename(file_path)) try: unit.save_and_import_content(file_path) except NotUniqueError: unit = unit.__class__.objects.filter(**unit.unit_key).first() if rpm_parse.signature_enabled(config): rpm_parse.filter_signature(unit, config) repo_controller.associate_single_unit(repo, unit)
def find_repo_content_units( repository, repo_content_unit_q=None, units_q=None, unit_fields=None, limit=None, skip=None, yield_content_unit=False): """ Search content units associated with a given repository. If yield_content_unit is not specified, or is set to false, then the RepositoryContentUnit representing the association will be returned with an attribute "unit" set to the actual ContentUnit. If yield_content_unit is set to true then the ContentUnit will be yielded instead of the RepoContentUnit. :param repository: The repository to search. :type repository: pulp.server.db.model.Repository :param repo_content_unit_q: Any query filters to apply to the RepoContentUnits. :type repo_content_unit_q: mongoengine.Q :param units_q: Any query filters to apply to the ContentUnits. :type units_q: mongoengine.Q :param unit_fields: List of fields to fetch for the unit objects, defaults to all fields. :type unit_fields: List of str :param limit: The maximum number of items to return for the given query. :type limit: int :param skip: The starting offset. :type skip: int :param yield_content_unit: Whether we should yield a ContentUnit or RepositoryContentUnit. If True then a ContentUnit will be yielded. Defaults to False :type yield_content_unit: bool :return: Content unit assoociations matching the query. :rtype: generator of pulp.server.db.model.ContentUnit or pulp.server.db.model.RepositoryContentUnit """ qs = model.RepositoryContentUnit.objects(q_obj=repo_content_unit_q, repo_id=repository.repo_id) type_map = {} content_units = {} yield_count = 1 skip_count = 0 for repo_content_unit in qs: id_set = type_map.setdefault(repo_content_unit.unit_type_id, set()) id_set.add(repo_content_unit.unit_id) content_unit_set = content_units.setdefault(repo_content_unit.unit_type_id, dict()) content_unit_set[repo_content_unit.unit_id] = repo_content_unit for unit_type, unit_ids in type_map.iteritems(): qs = plugin_api.get_unit_model_by_id(unit_type).objects( q_obj=units_q, __raw__={'_id': {'$in': list(unit_ids)}}) if unit_fields: qs = qs.only(unit_fields) for unit in qs: if skip and skip_count < skip: skip_count += 1 continue if yield_content_unit: yield unit else: cu = content_units[unit_type][unit.id] cu.unit = unit yield cu if limit: if yield_count >= limit: return yield_count += 1
def _download(self, catalog_entry, request, responder): """ Build a nectar downloader and download the content from the catalog entry. The download is performed by the alternate content container, so it is possible to use the streamer in conjunction with alternate content sources. :param catalog_entry: The catalog entry to download. :type catalog_entry: pulp.server.db.model.LazyCatalogEntry :param request: The client content request. :type request: twisted.web.server.Request :param responder: The file-like object that nectar should write to. :type responder: Responder """ # Configure the primary downloader for alternate content sources plugin_importer, config, db_importer = repo_controller.get_importer_by_id( catalog_entry.importer_id) # There is an unfortunate mess of configuration classes and attributes, and # multiple "models" floating around. The MongoEngine class that corresponds # to the database entry only contains the repository config. The ``config`` # variable above contains the repository configuration _and_ the plugin-wide # configuration, so here we override the db_importer.config because it doesn't # have the whole config. In the future the importer object should seemlessly # load and apply the plugin-wide configuration. db_importer.config = config.flatten() primary_downloader = plugin_importer.get_downloader_for_db_importer( db_importer, catalog_entry.url, working_dir='/tmp') pulp_request = request.getHeader(PULP_STREAM_REQUEST_HEADER) listener = StreamerListener(request, self.config, catalog_entry, pulp_request) primary_downloader.session = self.session primary_downloader.event_listener = listener # Build the alternate content source download request unit_model = plugins_api.get_unit_model_by_id( catalog_entry.unit_type_id) qs = unit_model.objects.filter(id=catalog_entry.unit_id).only( *unit_model.unit_key_fields) try: unit = qs.get() download_request = content_models.Request( catalog_entry.unit_type_id, unit.unit_key, catalog_entry.url, responder, ) alt_content_container = content_container.ContentContainer( threaded=False) alt_content_container.download(primary_downloader, [download_request], listener) except DoesNotExist: # A catalog entry is referencing a unit that doesn't exist which is bad. msg = _( 'The catalog entry for {path} references {unit_type}:{id}, but ' 'that unit is not in the database.') logger.error( msg.format(path=catalog_entry.path, unit_type=catalog_entry.unit_type_id, id=catalog_entry.unit_id)) request.setResponseCode(NOT_FOUND) finally: primary_downloader.config.finalize()
def _handle_package(repo, type_id, unit_key, metadata, file_path, conduit, config): """ Handles the upload for an RPM or SRPM. This inspects the package contents to determine field values. The unit_key and metadata fields overwrite field values determined through package inspection. :param repo: The repository to import the package into :type repo: pulp.server.db.model.Repository :param type_id: The type_id of the package being uploaded :type type_id: str :param unit_key: A dictionary of fields to overwrite introspected field values :type unit_key: dict :param metadata: A dictionary of fields to overwrite introspected field values, or None :type metadata: dict or None :param file_path: The path to the uploaded package :type file_path: str :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.upload.UploadConduit :param config: plugin configuration for the repository :type config: pulp.plugins.config.PluginCallConfiguration :raises PulpCodedException PLP1005: if the checksum type from the user is not recognized :raises PulpCodedException PLP1013: if the checksum value from the user does not validate """ try: rpm_data = _extract_rpm_data(type_id, file_path) except: _LOGGER.exception('Error extracting RPM metadata for [%s]' % file_path) raise # metadata can be None metadata = metadata or {} model_class = plugin_api.get_unit_model_by_id(type_id) update_fields_inbound(model_class, unit_key or {}) update_fields_inbound(model_class, metadata or {}) with open(file_path) as fp: sums = util.calculate_checksums(fp, models.RpmBase.DEFAULT_CHECKSUM_TYPES) # validate checksum if possible if metadata.get('checksum'): checksumtype = metadata.pop('checksum_type', util.TYPE_SHA256) checksumtype = util.sanitize_checksum_type(checksumtype) if checksumtype not in sums: raise PulpCodedException(error_code=error_codes.RPM1009, checksumtype=checksumtype) if metadata['checksum'] != sums[checksumtype]: raise PulpCodedException(error_code=platform_errors.PLP1013) _LOGGER.debug(_('Upload checksum matches.')) # Save all uploaded RPMs with sha256 in the unit key, since we can now publish with other # types, regardless of what is in the unit key. rpm_data['checksumtype'] = util.TYPE_SHA256 rpm_data['checksum'] = sums[util.TYPE_SHA256] # keep all available checksum values on the model rpm_data['checksums'] = sums # Update the RPM-extracted data with anything additional the user specified. # Allow the user-specified values to override the extracted ones. rpm_data.update(metadata or {}) rpm_data.update(unit_key or {}) # Validate the user specified data by instantiating the model try: unit = model_class(**rpm_data) except TypeError: raise ModelInstantiationError() # Extract/adjust the repodata snippets unit.repodata = rpm_parse.get_package_xml(file_path, sumtype=unit.checksumtype) _update_provides_requires(unit) unit.modify_xml() # check if the unit has duplicate nevra purge.remove_unit_duplicate_nevra(unit, repo) unit.set_storage_path(os.path.basename(file_path)) try: unit.save_and_import_content(file_path) except NotUniqueError: unit = unit.__class__.objects.filter(**unit.unit_key).first() repo_controller.associate_single_unit(repo, unit)
def check_all_and_associate(wanted, conduit, config, download_deferred, catalog): """ Given a set of unit keys as namedtuples, this function checks if a unit already exists in Pulp and returns the set of tuples that were not found. This checks for the unit in the db as well as for the actual file on the filesystem. If a unit exists in the db and the filesystem, this function also associates the unit to the given repo. Note that the check for the actual file is performed only for the supported unit types. :param wanted: dict where keys are units as namedtuples, and values are WantedUnitInfo instances :type wanted: dict :param conduit: repo sync conduit :type conduit: pulp.plugins.conduits.repo_sync.RepoSync :param config: configuration instance passed to the importer :type config: pulp.plugins.config.PluginCallConfiguration :param download_deferred: indicates downloading is deferred (or not). :type download_deferred: bool :param catalog: Deferred downloading catalog. :type catalog: pulp_rpm.plugins.importers.yum.sync.PackageCatalog :return: set of unit keys as namedtuples, identifying which of the named tuples received as input were not found on the server. :rtype: set """ rpm_drpm_srpm = (ids.TYPE_ID_RPM, ids.TYPE_ID_SRPM, ids.TYPE_ID_DRPM) all_associated_units = set() for unit_type in rpm_drpm_srpm: units_generator = repo_controller.get_associated_unit_ids( conduit.repo.repo_id, unit_type) all_associated_units.update(units_generator) sorted_units = _sort_by_type(wanted.iterkeys()) for unit_type, values in sorted_units.iteritems(): model = plugin_api.get_unit_model_by_id(unit_type) # FIXME "fields" does not get used, but it should # fields = model.unit_key_fields + ('_storage_path',) unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in values.copy()) for unit in units_controller.find_units(unit_generator): is_rpm_drpm_srpm = unit_type in rpm_drpm_srpm file_exists = unit._storage_path is not None and os.path.isfile( unit._storage_path) if is_rpm_drpm_srpm: # no matter what is the download policy, if existing unit has a valid storage_path, # we need to set the downloaded flag to True if file_exists and not unit.downloaded: unit.downloaded = True unit.save() # Existing RPMs, DRPMs and SRPMs are disqualified when the associated # package file does not exist and downloading is not deferred. if not download_deferred and not file_exists: continue catalog.add(unit, wanted[unit.unit_key_as_named_tuple].download_path) if unit.id not in all_associated_units: if rpm_parse.signature_enabled(config): try: rpm_parse.filter_signature(unit, config) except PulpCodedException as e: _LOGGER.debug(e) continue repo_controller.associate_single_unit(conduit.repo, unit) values.discard(unit.unit_key_as_named_tuple) still_wanted = set() still_wanted.update(*sorted_units.values()) return still_wanted
def _handle_package(repo, type_id, unit_key, metadata, file_path, conduit, config): """ Handles the upload for an RPM, SRPM or DRPM. This inspects the package contents to determine field values. The unit_key and metadata fields overwrite field values determined through package inspection. :param repo: The repository to import the package into :type repo: pulp.server.db.model.Repository :param type_id: The type_id of the package being uploaded :type type_id: str :param unit_key: A dictionary of fields to overwrite introspected field values :type unit_key: dict :param metadata: A dictionary of fields to overwrite introspected field values, or None :type metadata: dict or None :param file_path: The path to the uploaded package :type file_path: str :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.upload.UploadConduit :param config: plugin configuration for the repository :type config: pulp.plugins.config.PluginCallConfiguration :raises PulpCodedException PLP1005: if the checksum type from the user is not recognized :raises PulpCodedException PLP1013: if the checksum value from the user does not validate """ try: if type_id == models.DRPM._content_type_id.default: unit = models.DRPM(**_extract_drpm_data(file_path)) else: repodata = rpm_parse.get_package_xml(file_path, sumtype=util.TYPE_SHA256) package_xml = (utils.fake_xml_element( repodata['primary'], constants.COMMON_NAMESPACE).find(primary.PACKAGE_TAG)) unit = primary.process_package_element(package_xml) except Exception: raise PulpCodedException(error_codes.RPM1016) # metadata can be None metadata = metadata or {} model_class = plugin_api.get_unit_model_by_id(type_id) update_fields_inbound(model_class, unit_key or {}) update_fields_inbound(model_class, metadata or {}) with open(file_path) as fp: sums = util.calculate_checksums(fp, models.RpmBase.DEFAULT_CHECKSUM_TYPES) # validate checksum if possible if metadata.get('checksum'): checksumtype = metadata.pop('checksum_type', util.TYPE_SHA256) checksumtype = util.sanitize_checksum_type(checksumtype) if checksumtype not in sums: raise PulpCodedException(error_code=error_codes.RPM1009, checksumtype=checksumtype) if metadata['checksum'] != sums[checksumtype]: raise PulpCodedException(error_code=platform_errors.PLP1013) _LOGGER.debug(_('Upload checksum matches.')) # Save all uploaded RPMs with sha256 in the unit key, since we can now publish with other # types, regardless of what is in the unit key. unit.checksumtype = util.TYPE_SHA256 unit.checksum = sums[util.TYPE_SHA256] # keep all available checksum values on the model unit.checksums = sums # Update the RPM-extracted data with anything additional the user specified. # Allow the user-specified values to override the extracted ones. for key, value in metadata.items(): setattr(unit, key, value) for key, value in unit_key.items(): setattr(unit, key, value) if type_id != models.DRPM._content_type_id.default: # Extract/adjust the repodata snippets unit.signing_key = rpm_parse.package_signature( rpm_parse.package_headers(file_path)) # construct filename from metadata (BZ #1101168) if type_id == models.SRPM._content_type_id.default: rpm_basefilename = "%s-%s-%s.src.rpm" % (unit.name, unit.version, unit.release) else: rpm_basefilename = "%s-%s-%s.%s.rpm" % (unit.name, unit.version, unit.release, unit.arch) unit.relativepath = rpm_basefilename unit.filename = rpm_basefilename _update_files(unit, repodata) unit.modify_xml(repodata) # check if the unit has duplicate nevra purge.remove_unit_duplicate_nevra(unit, repo) unit.set_storage_path(os.path.basename(file_path)) try: unit.save_and_import_content(file_path) except TypeError: raise ModelInstantiationError() except NotUniqueError: unit = unit.__class__.objects.filter(**unit.unit_key).first() if rpm_parse.signature_enabled(config): rpm_parse.filter_signature(unit, config) repo_controller.associate_single_unit(repo, unit)
def test_get_unit_model_by_id(self, mock_manager, mock_is_initialized): mock_is_initialized.return_value = True mock_manager.unit_models.get.return_value = 'apples' return_val = api.get_unit_model_by_id('foo') self.assertEquals(return_val, 'apples') mock_manager.unit_models.get.assert_called_once_with('foo')
def _handle_package(repo, type_id, unit_key, metadata, file_path, conduit, config): """ Handles the upload for an RPM or SRPM. This inspects the package contents to determine field values. The unit_key and metadata fields overwrite field values determined through package inspection. :param repo: The repository to import the package into :type repo: pulp.server.db.model.Repository :param type_id: The type_id of the package being uploaded :type type_id: str :param unit_key: A dictionary of fields to overwrite introspected field values :type unit_key: dict :param metadata: A dictionary of fields to overwrite introspected field values, or None :type metadata: dict or None :param file_path: The path to the uploaded package :type file_path: str :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.upload.UploadConduit :param config: plugin configuration for the repository :type config: pulp.plugins.config.PluginCallConfiguration :raises PulpCodedException PLP1005: if the checksum type from the user is not recognized :raises PulpCodedException PLP1013: if the checksum value from the user does not validate """ try: rpm_data = _extract_rpm_data(type_id, file_path) except: _LOGGER.exception('Error extracting RPM metadata for [%s]' % file_path) raise model_class = plugin_api.get_unit_model_by_id(type_id) update_fields_inbound(model_class, unit_key or {}) update_fields_inbound(model_class, metadata or {}) # set checksum and checksumtype if metadata: checksumtype = metadata.pop('checksumtype', verification.TYPE_SHA256) rpm_data['checksumtype'] = verification.sanitize_checksum_type(checksumtype) if 'checksum' in metadata: rpm_data['checksum'] = metadata.pop('checksum') try: with open(file_path) as dest_file: verification.verify_checksum(dest_file, rpm_data['checksumtype'], rpm_data['checksum']) except verification.VerificationException: raise PulpCodedException(error_code=platform_errors.PLP1013) else: rpm_data['checksum'] = _calculate_checksum(rpm_data['checksumtype'], file_path) else: rpm_data['checksumtype'] = verification.TYPE_SHA256 rpm_data['checksum'] = _calculate_checksum(rpm_data['checksumtype'], file_path) # Update the RPM-extracted data with anything additional the user specified. # Allow the user-specified values to override the extracted ones. rpm_data.update(metadata or {}) rpm_data.update(unit_key or {}) # Validate the user specified data by instantiating the model try: unit = model_class(**rpm_data) except TypeError: raise ModelInstantiationError() # Extract/adjust the repodata snippets unit.repodata = rpm_parse.get_package_xml(file_path, sumtype=unit.checksumtype) _update_provides_requires(unit) _update_location(unit) # check if the unit has duplicate nevra purge.remove_unit_duplicate_nevra(unit, repo) unit.set_storage_path(os.path.basename(file_path)) try: unit.save_and_import_content(file_path) except NotUniqueError: unit = unit.__class__.objects.filter(**unit.unit_key).first() repo_controller.associate_single_unit(repo, unit)
def _handle_package(repo, type_id, unit_key, metadata, file_path, conduit, config): """ Handles the upload for an RPM or SRPM. This inspects the package contents to determine field values. The unit_key and metadata fields overwrite field values determined through package inspection. :param repo: The repository to import the package into :type repo: pulp.server.db.model.Repository :param type_id: The type_id of the package being uploaded :type type_id: str :param unit_key: A dictionary of fields to overwrite introspected field values :type unit_key: dict :param metadata: A dictionary of fields to overwrite introspected field values, or None :type metadata: dict or None :param file_path: The path to the uploaded package :type file_path: str :param conduit: provides access to relevant Pulp functionality :type conduit: pulp.plugins.conduits.upload.UploadConduit :param config: plugin configuration for the repository :type config: pulp.plugins.config.PluginCallConfiguration :raises PulpCodedException PLP1005: if the checksum type from the user is not recognized :raises PulpCodedException PLP1013: if the checksum value from the user does not validate """ try: rpm_data = _extract_rpm_data(type_id, file_path) except: _LOGGER.exception('Error extracting RPM metadata for [%s]' % file_path) raise model_class = plugin_api.get_unit_model_by_id(type_id) update_fields_inbound(model_class, unit_key or {}) update_fields_inbound(model_class, metadata or {}) # set checksum and checksumtype if metadata: checksumtype = metadata.pop('checksumtype', verification.TYPE_SHA256) rpm_data['checksumtype'] = verification.sanitize_checksum_type(checksumtype) if 'checksum' in metadata: rpm_data['checksum'] = metadata.pop('checksum') try: with open(file_path) as dest_file: verification.verify_checksum(dest_file, rpm_data['checksumtype'], rpm_data['checksum']) except verification.VerificationException: raise PulpCodedException(error_code=platform_errors.PLP1013) else: rpm_data['checksum'] = _calculate_checksum(rpm_data['checksumtype'], file_path) else: rpm_data['checksumtype'] = verification.TYPE_SHA256 rpm_data['checksum'] = _calculate_checksum(rpm_data['checksumtype'], file_path) # Update the RPM-extracted data with anything additional the user specified. # Allow the user-specified values to override the extracted ones. rpm_data.update(metadata or {}) rpm_data.update(unit_key or {}) # Validate the user specified data by instantiating the model try: unit = model_class(**rpm_data) except TypeError: raise ModelInstantiationError() # Extract the repodata snippets unit.repodata = rpm_parse.get_package_xml(file_path, sumtype=unit.checksumtype) _update_provides_requires(unit) # check if the unit has duplicate nevra purge.remove_unit_duplicate_nevra(unit, repo) unit.set_storage_path(os.path.basename(file_path)) unit.save_and_import_content(file_path) repo_controller.associate_single_unit(repo, unit)