class DownloadStep(PluginStep, listener.DownloadEventListener): def __init__(self, step_type, downloads=None, repo=None, conduit=None, config=None, working_dir=None, plugin_type=None, description=''): """ Set the default parent and step_type for the Download step :param step_type: The id of the step this processes :type step_type: str :param downloads: A list of DownloadRequests :type downloads: list of nectar.request.DownloadRequest :param repo: The repo to be published :type repo: pulp.plugins.model.Repository :param conduit: The conduit for the repo :type conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: The publish configuration :type config: PluginCallConfiguration :param working_dir: The temp directory this step should use for processing :type working_dir: str :param plugin_type: The type of the plugin :type plugin_type: str :param description: The text description that will be displayed to users :type description: basestring """ super(DownloadStep, self).__init__(step_type, repo=repo, conduit=conduit, config=config, working_dir=working_dir, plugin_type=plugin_type) if downloads is not None: self._downloads = downloads else: self._downloads = [] self.step_type = step_type self.repo = repo self.conduit = conduit self.config = config self.working_dir = working_dir self.plugin_type = plugin_type self.description = description def initialize(self): """ Set up the nectar downloader Originally based on the ISO sync setup """ config = self.get_config() self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=True) self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use # urljoin to determine the path later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' downloader_config = importer_config_to_nectar_config(config.flatten()) # We will pass self as the event_listener, so that we can receive the # callbacks in this class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) @property def downloads(self): """ This lets the class be instantiated with "downloads" as a generator that gets lazily evaluated. This is helpful, because at the time of instantiation, it is probably not known what downloads will be required. :return: list of download requests (nectar.request.DownloadRequest) :rtype: list """ if not isinstance(self._downloads, list): self._downloads = list(self._downloads) return self._downloads def get_total(self): """ Get total number of items to download :returns: number of DownloadRequests :rtype: int """ return len(self.downloads) def _process_block(self): """ the main "do stuff" method. In this case, just kick off all the downloads. """ self.downloader.download(self.downloads) # from listener.DownloadEventListener def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when any individual download succeeds. Bump the successes counter and report progress. :param report: report (passed in from nectar but currently not used) :type report: pulp.plugins.model.PublishReport """ self.progress_successes += 1 self.report_progress() # from listener.DownloadEventListener def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. Bump the failure counter and report progress. :param report: report (passed in from nectar but currently not used) :type report: pulp.plugins.model.PublishReport """ self.progress_failures += 1 self.report_progress() def cancel(self): """ Cancel the current step """ super(DownloadStep, self).cancel() self.downloader.cancel()
class ISOSyncRun(listener.DownloadEventListener): """ This class maintains state for a single repository sync (do not reuse it). We need to keep the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener so it can pass itself to the downloader library and receive the callbacks when downloads are complete. """ def __init__(self, sync_conduit, config): """ Initialize an ISOSyncRun. :param sync_conduit: the sync conduit to use for this sync run. :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration """ self.sync_conduit = sync_conduit self._remove_missing_units = config.get( importer_constants.KEY_UNITS_REMOVE_MISSING, default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT) self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=constants.CONFIG_VALIDATE_DEFAULT) self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use urljoin to determine # the path to # PULP_MANIFEST later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' # Cast our config parameters to the correct types and use them to build a Downloader max_speed = config.get(importer_constants.KEY_MAX_SPEED) if max_speed is not None: max_speed = float(max_speed) max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS) if max_downloads is not None: max_downloads = int(max_downloads) else: max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION) ssl_validation = ssl_validation if ssl_validation is not None else \ constants.CONFIG_VALIDATE_DEFAULT downloader_config = { 'max_speed': max_speed, 'max_concurrent': max_downloads, 'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT), 'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY), 'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT), 'ssl_validation': ssl_validation, 'proxy_url': config.get(importer_constants.KEY_PROXY_HOST), 'proxy_port': config.get(importer_constants.KEY_PROXY_PORT), 'proxy_username': config.get(importer_constants.KEY_PROXY_USER), 'proxy_password': config.get(importer_constants.KEY_PROXY_PASS)} downloader_config = DownloaderConfig(**downloader_config) # We will pass self as the event_listener, so that we can receive the callbacks in this # class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) self.progress_report = SyncProgressReport(sync_conduit) def cancel_sync(self): """ This method will cancel a sync that is in progress. """ # We used to support sync cancellation, but the current downloader implementation does # not support it # and so for now we will just pass self.progress_report.state = self.progress_report.STATE_CANCELLED self.downloader.cancel() def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. """ # If we have a download failure during the manifest phase, we should set the report to # failed for that phase. msg = _('Failed to download %(url)s: %(error_msg)s.') msg = msg % {'url': report.url, 'error_msg': report.error_msg} logger.error(msg) if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS: self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED self.progress_report.error_message = report.error_report elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data self.progress_report.add_failed_iso(iso, report.error_report) self.progress_report.update_progress() def download_progress(self, report): """ We will get notified from time to time about some bytes we've downloaded. We can update our progress report with this information so the client can see the progress. :param report: The report of the file we are downloading :type report: nectar.report.DownloadReport """ if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded self.progress_report.finished_bytes += additional_bytes_downloaded iso.bytes_downloaded = report.bytes_downloaded self.progress_report.update_progress() def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when it succeeds in downloading a file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add the new ISO to the database. :param report: The report of the file we downloaded :type report: nectar.report.DownloadReport """ # If we are in the isos stage, then this must be one of our ISOs. if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: # This will update our bytes downloaded self.download_progress(report) iso = report.data try: if self._validate_downloads: iso.validate() iso.save_unit(self.sync_conduit) # We can drop this ISO from the url --> ISO map self.progress_report.num_isos_finished += 1 self.progress_report.update_progress() except ValueError: self.download_failed(report) def perform_sync(self): """ Perform the sync operation according to the config, and return a report. The sync progress will be reported through the sync_conduit. :return: The sync report :rtype: pulp.plugins.model.SyncReport """ # Get the manifest and download the ISOs that we are missing self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS try: manifest = self._download_manifest() except (IOError, ValueError): # The IOError will happen if the file can't be retrieved at all, and the ValueError will # happen if the PULP_MANIFEST file isn't in the expected format. return self.progress_report.build_final_report() # Discover what files we need to download and what we already have filtered_isos = self._filter_missing_isos(manifest) local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos # Associate units that are already in Pulp if local_available_isos: search_dicts = [unit.unit_key for unit in local_available_isos] self.sync_conduit.associate_existing(models.ISO.TYPE, search_dicts) # Go get them filez self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS self._download_isos(local_missing_isos) if self._remove_missing_units: self._remove_units(remote_missing_isos) # Report that we are finished. Note that setting the # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the # progress report has collected any errors. See the progress_report's _set_state() method # for the implementation of this logic. self.progress_report.state = self.progress_report.STATE_COMPLETE report = self.progress_report.build_final_report() return report def _download_isos(self, manifest): """ Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the Pulp database. :param manifest: The manifest containing a list of ISOs we want to download. :type manifest: pulp_rpm.plugins.db.models.ISOManifest """ self.progress_report.total_bytes = 0 self.progress_report.num_isos = len(manifest) # For each ISO in the manifest, we need to determine a relative path where we want # it to be stored, and initialize the Unit that will represent it for iso in manifest: iso.init_unit(self.sync_conduit) iso.bytes_downloaded = 0 # Set the total bytes onto the report self.progress_report.total_bytes += iso.size self.progress_report.update_progress() # We need to build a list of DownloadRequests download_requests = [request.DownloadRequest(iso.url, iso.storage_path, iso) for iso in manifest] self.downloader.download(download_requests) def _download_manifest(self): """ Download the manifest file, and process it to return an ISOManifest. :return: manifest of available ISOs :rtype: pulp_rpm.plugins.db.models.ISOManifest """ manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME) # I probably should have called this manifest destination, but I couldn't help myself manifest_destiny = StringIO() manifest_request = request.DownloadRequest(manifest_url, manifest_destiny) self.downloader.download([manifest_request]) # We can inspect the report status to see if we had an error when retrieving the manifest. if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED: raise IOError(_("Could not retrieve %(url)s") % {'url': manifest_url}) manifest_destiny.seek(0) try: manifest = models.ISOManifest(manifest_destiny, self._repo_url) except ValueError: self.progress_report.error_message = _('The PULP_MANIFEST file was not in the ' + 'expected format.') self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED raise ValueError(self.progress_report.error_message) return manifest def _filter_missing_isos(self, manifest): """ Use the sync_conduit and the manifest to determine which ISOs are at the feed_url that are not in our local store, as well as which ISOs are in our local store that are not available at the feed_url. :param manifest: An ISOManifest describing the ISOs that are available at the feed_url that we are synchronizing with :type manifest: pulp_rpm.plugins.db.models.ISOManifest :return: A 3-tuple. The first element of the tuple is a list of ISOs that we should retrieve from the feed_url. The second element of the tuple is a list of Units that are available locally already, but are not currently associated with the repository. The third element of the tuple is a list of Units that represent the ISOs that we have in our local repo that were not found in the remote repo. :rtype: tuple """ def _unit_key_str(iso): """ Return a simple string representation of the unit key of the ISO. :param iso: The ISO for which we want a unit key string representation :type iso: pulp_rpm.plugins.db.models.ISO """ return '%s-%s-%s' % (iso.name, iso.checksum, iso.size) # A list of all the ISOs we have in Pulp search_criteria = Criteria(fields=models.ISO.UNIT_KEY_ISO) existing_units = self.sync_conduit.search_all_units(models.ISO.TYPE, search_criteria) existing_units_by_key = dict([(_unit_key_str(models.ISO.from_unit(unit)), unit) for unit in existing_units]) existing_unit_keys = set([_unit_key_str(models.ISO.from_unit(unit)) for unit in existing_units]) # A list of units currently associated with the repository search_criteria = UnitAssociationCriteria(type_ids=[models.ISO.TYPE]) existing_repo_units = self.sync_conduit.get_units(search_criteria) existing_repo_units_by_key = dict([(_unit_key_str(models.ISO.from_unit(unit)), unit) for unit in existing_repo_units]) existing_repo_unit_keys = set([_unit_key_str(models.ISO.from_unit(unit)) for unit in existing_repo_units]) # A list of the ISOs in the remote repository available_isos_by_key = dict([(_unit_key_str(iso), iso) for iso in manifest]) available_iso_keys = set([_unit_key_str(iso) for iso in manifest]) # Content that is available locally and just needs to be associated with the repository local_available_iso_keys = set([iso for iso in available_iso_keys if iso in existing_unit_keys]) local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys local_available_units = [existing_units_by_key[k] for k in local_available_iso_keys] # Content that is missing locally and must be downloaded local_missing_iso_keys = list(available_iso_keys - existing_unit_keys) local_missing_isos = [available_isos_by_key[k] for k in local_missing_iso_keys] # Content that is missing from the remote repository that is present locally remote_missing_unit_keys = list(existing_repo_unit_keys - available_iso_keys) remote_missing_units = [existing_repo_units_by_key[k] for k in remote_missing_unit_keys] return local_missing_isos, local_available_units, remote_missing_units def _remove_units(self, units): """ Use the sync_conduit's remove_unit call for each unit in units. :param units: List of pulp.plugins.model.Units that we want to remove from the repository :type units: list """ for unit in units: self.sync_conduit.remove_unit(unit)
class LazyUnitDownloadStep(Step, DownloadEventListener): """ A Step that downloads all the given requests. The downloader is configured to download from the Pulp Streamer components. :ivar download_requests: The download requests the step will process. :type download_requests: list of nectar.request.DownloadRequest :ivar download_config: The keyword args used to initialize the Nectar downloader configuration. :type download_config: dict :ivar downloader: The Nectar downloader used to fetch the requests. :type downloader: nectar.downloaders.threaded.HTTPThreadedDownloader """ def __init__(self, step_type, step_description, lazy_status_conduit, download_requests): """ Initializes a Step that downloads all the download requests provided. :param lazy_status_conduit: Conduit used to update the task status. :type lazy_status_conduit: LazyStatusConduit :param download_requests: List of download requests to process. :type download_requests: list of nectar.request.DownloadRequest """ super(LazyUnitDownloadStep, self).__init__( step_type=step_type, status_conduit=lazy_status_conduit, ) self.description = step_description self.download_requests = download_requests self.download_config = { MAX_CONCURRENT: int(pulp_conf.get('lazy', 'download_concurrency')), HEADERS: { PULP_STREAM_REQUEST_HEADER: 'true' }, SSL_VALIDATION: True } self.downloader = HTTPThreadedDownloader( DownloaderConfig(**self.download_config), self) def _process_block(self, item=None): """ This block is called by the `process` loop. This is overridden because success and failure is determined during the EventListener callbacks, which will handle updating the progress. Since `item` is not used, this does not make use of `process_main` and simply calls the downloader. Inherited from Step. :param item: Unused. :type item: None """ self.downloader.download(self.download_requests) def get_total(self): """ The total number of download requests so progress reporting occurs at the file level. Inherited from Step. :return: The number of download requests this step will process. :rtype: int """ return len(self.download_requests) def download_started(self, report): """ Checks the filesystem for the file that we are about to download, and if it exists, raise an exception which will cause Nectar to skip the download. Inherited from DownloadEventListener. :param report: the report associated with the download request. :type report: nectar.report.DownloadReport :raises SkipLocation: if the file is already downloaded and matches the checksum stored in the catalog. """ _logger.debug(_('Starting download of {url}.').format(url=report.url)) # Remove the deferred entry now that the download has started. query_set = DeferredDownload.objects.filter( unit_id=report.data[UNIT_ID], unit_type_id=report.data[TYPE_ID]) query_set.delete() try: # If the file exists and the checksum is valid, don't download it path_entry = report.data[UNIT_FILES][report.destination] catalog_entry = path_entry[CATALOG_ENTRY] self.validate_file(catalog_entry.path, catalog_entry.checksum_algorithm, catalog_entry.checksum) path_entry[PATH_DOWNLOADED] = True self.progress_successes += 1 self.report_progress() msg = _('{path} has already been downloaded.').format( path=path_entry[CATALOG_ENTRY].path) _logger.debug(msg) raise SkipLocation() except (InvalidChecksumType, VerificationException, IOError): # It's either missing or incorrect, so download it pass def download_succeeded(self, report): """ Marks the individual file for the unit as downloaded and moves it into its final storage location if its checksum value matches the value in the catalog entry (if present). Inherited from DownloadEventListener. :param report: the report associated with the download request. :type report: nectar.report.DownloadReport """ # Reload the content unit unit_model = plugin_api.get_unit_model_by_id(report.data[TYPE_ID]) unit_qs = unit_model.objects.filter(id=report.data[UNIT_ID]) content_unit = unit_qs.only('_content_type_id', 'id', '_last_updated').get() path_entry = report.data[UNIT_FILES][report.destination] # Validate the file and update the progress. catalog_entry = path_entry[CATALOG_ENTRY] try: self.validate_file(report.destination, catalog_entry.checksum_algorithm, catalog_entry.checksum) relative_path = os.path.relpath(catalog_entry.path, FileStorage.get_path(content_unit)) if len(report.data[UNIT_FILES]) == 1: # If the unit is single-file, update the storage path to point to the file content_unit.set_storage_path(relative_path) unit_qs.update_one( set___storage_path=content_unit._storage_path) content_unit.import_content(report.destination) else: content_unit.import_content(report.destination, location=relative_path) self.progress_successes += 1 path_entry[PATH_DOWNLOADED] = True except (InvalidChecksumType, VerificationException, IOError), e: _logger.debug( _('Download of {path} failed: {reason}.').format( path=catalog_entry.path, reason=str(e))) path_entry[PATH_DOWNLOADED] = False self.progress_failures += 1 self.report_progress() # Mark the entire unit as downloaded, if necessary. download_flags = [ entry[PATH_DOWNLOADED] for entry in report.data[UNIT_FILES].values() ] if all(download_flags): _logger.debug( _('Marking content unit {type}:{id} as downloaded.').format( type=content_unit.type_id, id=content_unit.id)) unit_qs.update_one(set__downloaded=True)
class ISOSyncRun(listener.DownloadEventListener): """ This class maintains state for a single repository sync (do not reuse it). We need to keep the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener so it can pass itself to the downloader library and receive the callbacks when downloads are complete. """ def __init__(self, sync_conduit, config): """ Initialize an ISOSyncRun. :param sync_conduit: the sync conduit to use for this sync run. :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration """ self.sync_conduit = sync_conduit self.config = config self._remove_missing_units = config.get( importer_constants.KEY_UNITS_REMOVE_MISSING, default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT) self._validate_downloads = config.get( importer_constants.KEY_VALIDATE, default=constants.CONFIG_VALIDATE_DEFAULT) self._repo_url = encode_unicode(config.get( importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use urljoin to determine # the path to # PULP_MANIFEST later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' # Cast our config parameters to the correct types and use them to build a Downloader max_speed = config.get(importer_constants.KEY_MAX_SPEED) if max_speed is not None: max_speed = float(max_speed) max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS) if max_downloads is not None: max_downloads = int(max_downloads) else: max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT ssl_validation = config.get_boolean( importer_constants.KEY_SSL_VALIDATION) ssl_validation = ssl_validation if ssl_validation is not None else \ constants.CONFIG_VALIDATE_DEFAULT downloader_config = { 'max_speed': max_speed, 'max_concurrent': max_downloads, 'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT), 'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY), 'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT), 'ssl_validation': ssl_validation, 'proxy_url': config.get(importer_constants.KEY_PROXY_HOST), 'proxy_port': config.get(importer_constants.KEY_PROXY_PORT), 'proxy_username': config.get(importer_constants.KEY_PROXY_USER), 'proxy_password': config.get(importer_constants.KEY_PROXY_PASS), 'basic_auth_username': config.get(importer_constants.KEY_BASIC_AUTH_USER), 'basic_auth_password': config.get(importer_constants.KEY_BASIC_AUTH_PASS), 'working_dir': common_utils.get_working_directory() } downloader_config = DownloaderConfig(**downloader_config) # We will pass self as the event_listener, so that we can receive the callbacks in this # class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) self.progress_report = SyncProgressReport(sync_conduit) self.repo_units = [] @property def download_deferred(self): """ Test the download policy to determine if downloading is deferred. :return: True if deferred. :rtype: bool """ policy = self.config.get(importer_constants.DOWNLOAD_POLICY, importer_constants.DOWNLOAD_IMMEDIATE) return policy != importer_constants.DOWNLOAD_IMMEDIATE def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. """ # If we have a download failure during the manifest phase, we should set the report to # failed for that phase. msg = _('Failed to download %(url)s: %(error_msg)s.') msg = msg % {'url': report.url, 'error_msg': report.error_msg} _logger.error(msg) if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS: self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED self.progress_report.error_message = report.error_report elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data self.progress_report.add_failed_iso(iso, report.error_report) self.progress_report.update_progress() def download_progress(self, report): """ We will get notified from time to time about some bytes we've downloaded. We can update our progress report with this information so the client can see the progress. :param report: The report of the file we are downloading :type report: nectar.report.DownloadReport """ if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded self.progress_report.finished_bytes += additional_bytes_downloaded iso.bytes_downloaded = report.bytes_downloaded self.progress_report.update_progress() def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when it succeeds in downloading a file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add the new ISO to the database. :param report: The report of the file we downloaded :type report: nectar.report.DownloadReport """ # If we are in the isos stage, then this must be one of our ISOs. if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: # This will update our bytes downloaded self.download_progress(report) iso = report.data iso.set_storage_path(os.path.basename(report.destination)) try: if self._validate_downloads: iso.validate_iso(report.destination) try: iso.save() except NotUniqueError: iso = iso.__class__.objects.filter(**iso.unit_key).first() self._associate_unit(self.sync_conduit.repo, iso) iso.safe_import_content(report.destination) # We can drop this ISO from the url --> ISO map self.progress_report.num_isos_finished += 1 self.progress_report.update_progress() except ValueError: self.download_failed(report) def add_catalog_entries(self, units): """ Add entries to the deferred downloading (lazy) catalog. Skip entries which are not eligible for lazy catalog. (Don't have url attribute.) :param units: A list of: pulp_rpm.plugins.db.models.ISO. :type units: list """ for unit in units: # Unit is from pulp manifest if not hasattr(unit, "url"): continue if not unit.storage_path: unit.set_storage_path(unit.name) entry = LazyCatalogEntry() entry.path = unit.storage_path entry.importer_id = str(self.sync_conduit.importer_object_id) entry.unit_id = unit.id entry.unit_type_id = unit.type_id entry.url = unit.url entry.checksum = unit.checksum # The current ISO model does not define a checksum type, but appears to use sha256. # Once the model includes the checksum type, this should use that field. entry.checksum_algorithm = 'sha256' entry.save_revision() def perform_sync(self): """ Perform the sync operation according to the config, and return a report. The sync progress will be reported through the sync_conduit. :return: The sync report :rtype: pulp.plugins.model.SyncReport """ # Get the manifest and download the ISOs that we are missing self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS try: manifest = self._download_manifest() except (IOError, ValueError): # The IOError will happen if the file can't be retrieved at all, and the ValueError will # happen if the PULP_MANIFEST file isn't in the expected format. return self.progress_report.build_final_report() # Discover what files we need to download and what we already have filtered_isos = self._filter_missing_isos(manifest, self.download_deferred) local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos # Associate units that are already in Pulp if local_available_isos: for iso in local_available_isos: self._associate_unit(self.sync_conduit.repo, iso) # Deferred downloading (Lazy) entries. self.add_catalog_entries(local_available_isos) self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS # Download files and add units. if self.download_deferred: for iso in local_missing_isos: iso.downloaded = False try: iso.save() except NotUniqueError: iso = iso.__class__.objects.filter(**iso.unit_key).first() else: self.add_catalog_entries([iso]) self._associate_unit(self.sync_conduit.repo, iso) else: self._download_isos(local_missing_isos) # Remove unwanted iso units if self._remove_missing_units: repo_controller.disassociate_units(self.sync_conduit.repo, remote_missing_isos) for unit in remote_missing_isos: qs = LazyCatalogEntry.objects.filter(importer_id=str( self.sync_conduit.importer_object_id), unit_id=unit.id, unit_type_id=unit.type_id) qs.delete() # Report that we are finished. Note that setting the # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the # progress report has collected any errors. See the progress_report's _set_state() method # for the implementation of this logic. self.progress_report.state = self.progress_report.STATE_COMPLETE report = self.progress_report.build_final_report() return report def _download_isos(self, manifest): """ Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the Pulp database. :param manifest: The manifest containing a list of ISOs we want to download. :type manifest: pulp_rpm.plugins.db.models.ISOManifest """ self.progress_report.total_bytes = 0 self.progress_report.num_isos = len(manifest) # For each ISO in the manifest, we need to determine a relative path where we want # it to be stored, and initialize the Unit that will represent it for iso in manifest: iso.bytes_downloaded = 0 # Set the total bytes onto the report self.progress_report.total_bytes += iso.size self.progress_report.update_progress() # We need to build a list of DownloadRequests download_directory = common_utils.get_working_directory() download_requests = [] for iso in manifest: iso_tmp_dir = tempfile.mkdtemp(dir=download_directory) iso_name = os.path.basename(iso.url) iso_download_path = os.path.join(iso_tmp_dir, iso_name) download_requests.append( request.DownloadRequest(iso.url, iso_download_path, iso)) self.downloader.download(download_requests) def _download_manifest(self): """ Download the manifest file, and process it to return an ISOManifest. :return: manifest of available ISOs :rtype: pulp_rpm.plugins.db.models.ISOManifest """ manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME) # I probably should have called this manifest destination, but I couldn't help myself manifest_destiny = StringIO() manifest_request = request.DownloadRequest(manifest_url, manifest_destiny) self.downloader.download([manifest_request]) # We can inspect the report status to see if we had an error when retrieving the manifest. if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED: raise IOError( _("Could not retrieve %(url)s") % {'url': manifest_url}) manifest_destiny.seek(0) try: manifest = models.ISOManifest(manifest_destiny, self._repo_url) except ValueError: self.progress_report.error_message = _( 'The PULP_MANIFEST file was not in the ' + 'expected format.') self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED raise ValueError(self.progress_report.error_message) return manifest def _associate_unit(self, repo, unit): """ Associate an iso unit with a repository but first check if there's already any with the same name and if so, remove them. :param repo: An ISO repository that is being synced :type repo: pulp.server.db.model.Repository :param unit: An ISO unit to associate with repo :type unit: pulp_rpm.plugins.db.models.ISO """ if not self.repo_units: # store the existing repo units to prevent querying mongo multiple times self.repo_units = list( repo_controller.find_repo_content_units( repo, yield_content_unit=True)) units_to_remove = [ iso for iso in self.repo_units if iso['name'] == unit['name'] ] repo_controller.disassociate_units(repo, units_to_remove) repo_controller.associate_single_unit(repo, unit) def _filter_missing_isos(self, manifest, download_deferred): """ Use the sync_conduit and the manifest to determine which ISOs are at the feed_url that are not in our local store, as well as which ISOs are in our local store that are not available at the feed_url. :param manifest: An ISOManifest describing the ISOs that are available at the feed_url that we are synchronizing with :type manifest: pulp_rpm.plugins.db.models.ISOManifest :param download_deferred: indicates downloading is deferred (or not). :type download_deferred: bool :return: A 3-tuple. The first element of the tuple is a list of ISOs that we should retrieve from the feed_url. The second element of the tuple is a list of Units that are available locally already, but are not currently associated with the repository. The third element of the tuple is a list of Units that represent the ISOs that we have in our local repo that were not found in the remote repo. :rtype: tuple """ # A list of all the ISOs we have in Pulp existing_units = models.ISO.objects() existing_units_by_key = dict([ (unit.unit_key_str, unit) for unit in existing_units if not download_deferred and os.path.isfile(unit.storage_path) ]) existing_units.rewind() existing_unit_keys = set([ unit.unit_key_str for unit in existing_units if not download_deferred and os.path.isfile(unit.storage_path) ]) # A list of units currently associated with the repository existing_repo_units = repo_controller.find_repo_content_units( self.sync_conduit.repo, yield_content_unit=True) existing_repo_units = list(existing_repo_units) existing_repo_units_by_key = dict([(unit.unit_key_str, unit) for unit in existing_repo_units]) existing_repo_unit_keys = set( [unit.unit_key_str for unit in existing_repo_units]) # A list of the ISOs in the remote repository available_isos_by_key = dict([(iso.unit_key_str, iso) for iso in manifest]) available_iso_keys = set([iso.unit_key_str for iso in manifest]) # Content that is available locally and just needs to be associated with the repository local_available_iso_keys = set( [iso for iso in available_iso_keys if iso in existing_unit_keys]) local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys local_available_units = [ existing_units_by_key[k] for k in local_available_iso_keys ] # Content that is missing locally and must be downloaded local_missing_iso_keys = list(available_iso_keys - existing_unit_keys) local_missing_isos = [ available_isos_by_key[k] for k in local_missing_iso_keys ] # Content that is missing from the remote repository that is present locally remote_missing_unit_keys = list(existing_repo_unit_keys - available_iso_keys) remote_missing_units = [ existing_repo_units_by_key[k] for k in remote_missing_unit_keys ] return local_missing_isos, local_available_units, remote_missing_units
class LazyUnitDownloadStep(Step, DownloadEventListener): """ A Step that downloads all the given requests. The downloader is configured to download from the Pulp Streamer components. :ivar download_requests: The download requests the step will process. :type download_requests: list of nectar.request.DownloadRequest :ivar download_config: The keyword args used to initialize the Nectar downloader configuration. :type download_config: dict :ivar downloader: The Nectar downloader used to fetch the requests. :type downloader: nectar.downloaders.threaded.HTTPThreadedDownloader """ def __init__(self, step_type, step_description, lazy_status_conduit, download_requests): """ Initializes a Step that downloads all the download requests provided. :param lazy_status_conduit: Conduit used to update the task status. :type lazy_status_conduit: LazyStatusConduit :param download_requests: List of download requests to process. :type download_requests: list of nectar.request.DownloadRequest """ super(LazyUnitDownloadStep, self).__init__( step_type=step_type, status_conduit=lazy_status_conduit, ) self.description = step_description self.download_requests = download_requests self.download_config = { MAX_CONCURRENT: int(pulp_conf.get('lazy', 'download_concurrency')), HEADERS: {PULP_STREAM_REQUEST_HEADER: 'true'}, SSL_VALIDATION: True } self.downloader = HTTPThreadedDownloader( DownloaderConfig(**self.download_config), self ) def _process_block(self, item=None): """ This block is called by the `process` loop. This is overridden because success and failure is determined during the EventListener callbacks, which will handle updating the progress. Since `item` is not used, this does not make use of `process_main` and simply calls the downloader. Inherited from Step. :param item: Unused. :type item: None """ self.downloader.download(self.download_requests) def get_total(self): """ The total number of download requests so progress reporting occurs at the file level. Inherited from Step. :return: The number of download requests this step will process. :rtype: int """ return len(self.download_requests) def download_started(self, report): """ Checks the filesystem for the file that we are about to download, and if it exists, raise an exception which will cause Nectar to skip the download. Inherited from DownloadEventListener. :param report: the report associated with the download request. :type report: nectar.report.DownloadReport :raises SkipLocation: if the file is already downloaded and matches the checksum stored in the catalog. """ _logger.debug(_('Starting download of {url}.').format(url=report.url)) # Remove the deferred entry now that the download has started. query_set = DeferredDownload.objects.filter( unit_id=report.data[UNIT_ID], unit_type_id=report.data[TYPE_ID] ) query_set.delete() try: # If the file exists and the checksum is valid, don't download it path_entry = report.data[UNIT_FILES][report.destination] catalog_entry = path_entry[CATALOG_ENTRY] self.validate_file( catalog_entry.path, catalog_entry.checksum_algorithm, catalog_entry.checksum ) path_entry[PATH_DOWNLOADED] = True self.progress_successes += 1 self.report_progress() msg = _('{path} has already been downloaded.').format( path=path_entry[CATALOG_ENTRY].path) _logger.debug(msg) raise SkipLocation() except (InvalidChecksumType, VerificationException, IOError): # It's either missing or incorrect, so download it pass def download_succeeded(self, report): """ Marks the individual file for the unit as downloaded and moves it into its final storage location if its checksum value matches the value in the catalog entry (if present). Inherited from DownloadEventListener. :param report: the report associated with the download request. :type report: nectar.report.DownloadReport """ # Reload the content unit unit_model = plugin_api.get_unit_model_by_id(report.data[TYPE_ID]) unit_qs = unit_model.objects.filter(id=report.data[UNIT_ID]) content_unit = unit_qs.only('_content_type_id', 'id', '_last_updated').get() path_entry = report.data[UNIT_FILES][report.destination] # Validate the file and update the progress. catalog_entry = path_entry[CATALOG_ENTRY] try: self.validate_file( report.destination, catalog_entry.checksum_algorithm, catalog_entry.checksum ) relative_path = os.path.relpath( catalog_entry.path, FileStorage.get_path(content_unit) ) if len(report.data[UNIT_FILES]) == 1: # If the unit is single-file, update the storage path to point to the file content_unit.set_storage_path(relative_path) unit_qs.update_one(set___storage_path=content_unit._storage_path) content_unit.import_content(report.destination) else: content_unit.import_content(report.destination, location=relative_path) self.progress_successes += 1 path_entry[PATH_DOWNLOADED] = True except (InvalidChecksumType, VerificationException, IOError), e: _logger.debug(_('Download of {path} failed: {reason}.').format( path=catalog_entry.path, reason=str(e))) path_entry[PATH_DOWNLOADED] = False self.progress_failures += 1 self.report_progress() # Mark the entire unit as downloaded, if necessary. download_flags = [entry[PATH_DOWNLOADED] for entry in report.data[UNIT_FILES].values()] if all(download_flags): _logger.debug(_('Marking content unit {type}:{id} as downloaded.').format( type=content_unit.type_id, id=content_unit.id)) unit_qs.update_one(set__downloaded=True)
class ISOSyncRun(listener.DownloadEventListener): """ This class maintains state for a single repository sync (do not reuse it). We need to keep the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener so it can pass itself to the downloader library and receive the callbacks when downloads are complete. """ def __init__(self, sync_conduit, config): """ Initialize an ISOSyncRun. :param sync_conduit: the sync conduit to use for this sync run. :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration """ self.sync_conduit = sync_conduit self.config = config self._remove_missing_units = config.get( importer_constants.KEY_UNITS_REMOVE_MISSING, default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT) self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=constants.CONFIG_VALIDATE_DEFAULT) self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use urljoin to determine # the path to # PULP_MANIFEST later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' # Cast our config parameters to the correct types and use them to build a Downloader max_speed = config.get(importer_constants.KEY_MAX_SPEED) if max_speed is not None: max_speed = float(max_speed) max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS) if max_downloads is not None: max_downloads = int(max_downloads) else: max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION) ssl_validation = ssl_validation if ssl_validation is not None else \ constants.CONFIG_VALIDATE_DEFAULT downloader_config = { 'max_speed': max_speed, 'max_concurrent': max_downloads, 'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT), 'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY), 'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT), 'ssl_validation': ssl_validation, 'proxy_url': config.get(importer_constants.KEY_PROXY_HOST), 'proxy_port': config.get(importer_constants.KEY_PROXY_PORT), 'proxy_username': config.get(importer_constants.KEY_PROXY_USER), 'proxy_password': config.get(importer_constants.KEY_PROXY_PASS), 'basic_auth_username': config.get(importer_constants.KEY_BASIC_AUTH_USER), 'basic_auth_password': config.get(importer_constants.KEY_BASIC_AUTH_PASS), 'working_dir': common_utils.get_working_directory()} downloader_config = DownloaderConfig(**downloader_config) # We will pass self as the event_listener, so that we can receive the callbacks in this # class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) self.progress_report = SyncProgressReport(sync_conduit) self.repo_units = [] @property def download_deferred(self): """ Test the download policy to determine if downloading is deferred. :return: True if deferred. :rtype: bool """ policy = self.config.get( importer_constants.DOWNLOAD_POLICY, importer_constants.DOWNLOAD_IMMEDIATE) return policy != importer_constants.DOWNLOAD_IMMEDIATE def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. """ # If we have a download failure during the manifest phase, we should set the report to # failed for that phase. msg = _('Failed to download %(url)s: %(error_msg)s.') msg = msg % {'url': report.url, 'error_msg': report.error_msg} _logger.error(msg) if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS: self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED self.progress_report.error_message = report.error_report elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data self.progress_report.add_failed_iso(iso, report.error_report) self.progress_report.update_progress() def download_progress(self, report): """ We will get notified from time to time about some bytes we've downloaded. We can update our progress report with this information so the client can see the progress. :param report: The report of the file we are downloading :type report: nectar.report.DownloadReport """ if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded self.progress_report.finished_bytes += additional_bytes_downloaded iso.bytes_downloaded = report.bytes_downloaded self.progress_report.update_progress() def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when it succeeds in downloading a file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add the new ISO to the database. :param report: The report of the file we downloaded :type report: nectar.report.DownloadReport """ # If we are in the isos stage, then this must be one of our ISOs. if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: # This will update our bytes downloaded self.download_progress(report) iso = report.data iso.set_storage_path(os.path.basename(report.destination)) try: if self._validate_downloads: iso.validate_iso(report.destination) try: iso.save() except NotUniqueError: iso = iso.__class__.objects.filter(**iso.unit_key).first() self._associate_unit(self.sync_conduit.repo, iso) iso.safe_import_content(report.destination) # We can drop this ISO from the url --> ISO map self.progress_report.num_isos_finished += 1 self.progress_report.update_progress() except ValueError: self.download_failed(report) def add_catalog_entries(self, units): """ Add entries to the deferred downloading (lazy) catalog. Skip entries which are not eligible for lazy catalog. (Don't have url attribute.) :param units: A list of: pulp_rpm.plugins.db.models.ISO. :type units: list """ for unit in units: # Unit is from pulp manifest if not hasattr(unit, "url"): continue if not unit.storage_path: unit.set_storage_path(unit.name) entry = LazyCatalogEntry() entry.path = unit.storage_path entry.importer_id = str(self.sync_conduit.importer_object_id) entry.unit_id = unit.id entry.unit_type_id = unit.type_id entry.url = unit.url entry.checksum = unit.checksum # The current ISO model does not define a checksum type, but appears to use sha256. # Once the model includes the checksum type, this should use that field. entry.checksum_algorithm = 'sha256' entry.save_revision() def perform_sync(self): """ Perform the sync operation according to the config, and return a report. The sync progress will be reported through the sync_conduit. :return: The sync report :rtype: pulp.plugins.model.SyncReport """ # Get the manifest and download the ISOs that we are missing self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS try: manifest = self._download_manifest() except (IOError, ValueError): # The IOError will happen if the file can't be retrieved at all, and the ValueError will # happen if the PULP_MANIFEST file isn't in the expected format. return self.progress_report.build_final_report() # Discover what files we need to download and what we already have filtered_isos = self._filter_missing_isos(manifest, self.download_deferred) local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos # Associate units that are already in Pulp if local_available_isos: search_dicts = [unit.unit_key for unit in local_available_isos] self.sync_conduit.associate_existing(models.ISO._content_type_id.default, search_dicts) # Deferred downloading (Lazy) entries. self.add_catalog_entries(local_available_isos) self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS # Download files and add units. if self.download_deferred: for iso in local_missing_isos: iso.downloaded = False try: iso.save() except NotUniqueError: iso = iso.__class__.objects.filter(**iso.unit_key).first() else: self.add_catalog_entries([iso]) self._associate_unit(self.sync_conduit.repo, iso) else: self._download_isos(local_missing_isos) # Remove unwanted iso units if self._remove_missing_units: repo_controller.disassociate_units(self.sync_conduit.repo, remote_missing_isos) for unit in remote_missing_isos: qs = LazyCatalogEntry.objects.filter( importer_id=str(self.sync_conduit.importer_object_id), unit_id=unit.id, unit_type_id=unit.type_id) qs.delete() # Report that we are finished. Note that setting the # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the # progress report has collected any errors. See the progress_report's _set_state() method # for the implementation of this logic. self.progress_report.state = self.progress_report.STATE_COMPLETE report = self.progress_report.build_final_report() return report def _download_isos(self, manifest): """ Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the Pulp database. :param manifest: The manifest containing a list of ISOs we want to download. :type manifest: pulp_rpm.plugins.db.models.ISOManifest """ self.progress_report.total_bytes = 0 self.progress_report.num_isos = len(manifest) # For each ISO in the manifest, we need to determine a relative path where we want # it to be stored, and initialize the Unit that will represent it for iso in manifest: iso.bytes_downloaded = 0 # Set the total bytes onto the report self.progress_report.total_bytes += iso.size self.progress_report.update_progress() # We need to build a list of DownloadRequests download_directory = common_utils.get_working_directory() download_requests = [] for iso in manifest: iso_tmp_dir = tempfile.mkdtemp(dir=download_directory) iso_name = os.path.basename(iso.url) iso_download_path = os.path.join(iso_tmp_dir, iso_name) download_requests.append(request.DownloadRequest(iso.url, iso_download_path, iso)) self.downloader.download(download_requests) def _download_manifest(self): """ Download the manifest file, and process it to return an ISOManifest. :return: manifest of available ISOs :rtype: pulp_rpm.plugins.db.models.ISOManifest """ manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME) # I probably should have called this manifest destination, but I couldn't help myself manifest_destiny = StringIO() manifest_request = request.DownloadRequest(manifest_url, manifest_destiny) self.downloader.download([manifest_request]) # We can inspect the report status to see if we had an error when retrieving the manifest. if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED: raise IOError(_("Could not retrieve %(url)s") % {'url': manifest_url}) manifest_destiny.seek(0) try: manifest = models.ISOManifest(manifest_destiny, self._repo_url) except ValueError: self.progress_report.error_message = _('The PULP_MANIFEST file was not in the ' + 'expected format.') self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED raise ValueError(self.progress_report.error_message) return manifest def _associate_unit(self, repo, unit): """ Associate an iso unit with a repository but first check if there's already any with the same name and if so, remove them. :param repo: An ISO repository that is being synced :type repo: pulp.server.db.model.Repository :param unit: An ISO unit to associate with repo :type unit: pulp_rpm.plugins.db.models.ISO """ if not self.repo_units: # store the existing repo units to prevent querying mongo multiple times self.repo_units = repo_controller.find_repo_content_units(repo, yield_content_unit=True) units_to_remove = [iso for iso in self.repo_units if iso['name'] == unit['name']] repo_controller.disassociate_units(repo, units_to_remove) repo_controller.associate_single_unit(repo, unit) def _filter_missing_isos(self, manifest, download_deferred): """ Use the sync_conduit and the manifest to determine which ISOs are at the feed_url that are not in our local store, as well as which ISOs are in our local store that are not available at the feed_url. :param manifest: An ISOManifest describing the ISOs that are available at the feed_url that we are synchronizing with :type manifest: pulp_rpm.plugins.db.models.ISOManifest :param download_deferred: indicates downloading is deferred (or not). :type download_deferred: bool :return: A 3-tuple. The first element of the tuple is a list of ISOs that we should retrieve from the feed_url. The second element of the tuple is a list of Units that are available locally already, but are not currently associated with the repository. The third element of the tuple is a list of Units that represent the ISOs that we have in our local repo that were not found in the remote repo. :rtype: tuple """ # A list of all the ISOs we have in Pulp existing_units = models.ISO.objects() existing_units_by_key = dict([(unit.unit_key_str, unit) for unit in existing_units if not download_deferred and os.path.isfile(unit.storage_path)]) existing_units.rewind() existing_unit_keys = set([unit.unit_key_str for unit in existing_units if not download_deferred and os.path.isfile(unit.storage_path)]) # A list of units currently associated with the repository existing_repo_units = repo_controller.find_repo_content_units( self.sync_conduit.repo, yield_content_unit=True) existing_repo_units = list(existing_repo_units) existing_repo_units_by_key = dict([(unit.unit_key_str, unit) for unit in existing_repo_units]) existing_repo_unit_keys = set([unit.unit_key_str for unit in existing_repo_units]) # A list of the ISOs in the remote repository available_isos_by_key = dict([(iso.unit_key_str, iso) for iso in manifest]) available_iso_keys = set([iso.unit_key_str for iso in manifest]) # Content that is available locally and just needs to be associated with the repository local_available_iso_keys = set([iso for iso in available_iso_keys if iso in existing_unit_keys]) local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys local_available_units = [existing_units_by_key[k] for k in local_available_iso_keys] # Content that is missing locally and must be downloaded local_missing_iso_keys = list(available_iso_keys - existing_unit_keys) local_missing_isos = [available_isos_by_key[k] for k in local_missing_iso_keys] # Content that is missing from the remote repository that is present locally remote_missing_unit_keys = list(existing_repo_unit_keys - available_iso_keys) remote_missing_units = [existing_repo_units_by_key[k] for k in remote_missing_unit_keys] return local_missing_isos, local_available_units, remote_missing_units
class DownloadStep(PluginStep, listener.DownloadEventListener): def __init__(self, step_type, downloads=None, repo=None, conduit=None, config=None, working_dir=None, plugin_type=None, description=''): """ Set the default parent and step_type for the Download step :param step_type: The id of the step this processes :type step_type: str :param downloads: A list of DownloadRequests :type downloads: list of nectar.request.DownloadRequest :param repo: The repo to be published :type repo: pulp.plugins.model.Repository :param conduit: The conduit for the repo :type conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: The publish configuration :type config: PluginCallConfiguration :param working_dir: The temp directory this step should use for processing :type working_dir: str :param plugin_type: The type of the plugin :type plugin_type: str :param description: The text description that will be displayed to users :type description: basestring """ super(DownloadStep, self).__init__(step_type, repo=repo, conduit=conduit, config=config, working_dir=working_dir, plugin_type=plugin_type) if downloads is not None: self._downloads = downloads else: self._downloads = [] self.step_type = step_type self.repo = repo self.conduit = conduit self.config = config self.working_dir = working_dir self.plugin_type = plugin_type self.description = description def initialize(self): """ Set up the nectar downloader Originally based on the ISO sync setup """ config = self.get_config() self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=True) self._repo_url = encode_unicode(config.get( importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use # urljoin to determine the path later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' downloader_config = importer_config_to_nectar_config(config.flatten()) # We will pass self as the event_listener, so that we can receive the # callbacks in this class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) @property def downloads(self): """ This lets the class be instantiated with "downloads" as a generator that gets lazily evaluated. This is helpful, because at the time of instantiation, it is probably not known what downloads will be required. :return: list of download requests (nectar.request.DownloadRequest) :rtype: list """ if not isinstance(self._downloads, list): self._downloads = list(self._downloads) return self._downloads def get_total(self): """ Get total number of items to download :returns: number of DownloadRequests :rtype: int """ return len(self.downloads) def _process_block(self): """ the main "do stuff" method. In this case, just kick off all the downloads. """ self.downloader.download(self.downloads) # from listener.DownloadEventListener def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when any individual download succeeds. Bump the successes counter and report progress. :param report: report (passed in from nectar but currently not used) :type report: pulp.plugins.model.PublishReport """ self.progress_successes += 1 self.report_progress() # from listener.DownloadEventListener def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. Bump the failure counter and report progress. :param report: report (passed in from nectar but currently not used) :type report: pulp.plugins.model.PublishReport """ self.progress_failures += 1 self.report_progress() def cancel(self): """ Cancel the current step """ super(DownloadStep, self).cancel() self.downloader.cancel()
class ISOSyncRun(listener.DownloadEventListener): """ This class maintains state for a single repository sync (do not reuse it). We need to keep the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener so it can pass itself to the downloader library and receive the callbacks when downloads are complete. """ def __init__(self, sync_conduit, config): self.sync_conduit = sync_conduit self._remove_missing_units = config.get(importer_constants.KEY_UNITS_REMOVE_MISSING, default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT) self._validate_downloads = config.get(importer_constants.KEY_VALIDATE, default=constants.CONFIG_VALIDATE_DEFAULT) self._repo_url = encode_unicode(config.get(importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use urljoin to determine the path to # PULP_MANIFEST later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' # Cast our config parameters to the correct types and use them to build a Downloader max_speed = config.get(importer_constants.KEY_MAX_SPEED) if max_speed is not None: max_speed = float(max_speed) max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS) if max_downloads is not None: max_downloads = int(max_downloads) else: max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT ssl_validation = config.get_boolean(importer_constants.KEY_SSL_VALIDATION) ssl_validation = ssl_validation if ssl_validation is not None else constants.CONFIG_VALIDATE_DEFAULT downloader_config = { 'max_speed': max_speed, 'max_concurrent': max_downloads, 'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT), 'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY), 'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT), 'ssl_validation': ssl_validation, 'proxy_url': config.get(importer_constants.KEY_PROXY_HOST), 'proxy_port': config.get(importer_constants.KEY_PROXY_PORT), 'proxy_username': config.get(importer_constants.KEY_PROXY_USER), 'proxy_password': config.get(importer_constants.KEY_PROXY_PASS)} downloader_config = DownloaderConfig(**downloader_config) # We will pass self as the event_listener, so that we can receive the callbacks in this class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) self.progress_report = SyncProgressReport(sync_conduit) def cancel_sync(self): """ This method will cancel a sync that is in progress. """ # We used to support sync cancellation, but the current downloader implementation does not support it # and so for now we will just pass self.progress_report.state = self.progress_report.STATE_CANCELLED self.downloader.cancel() def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. """ # If we have a download failure during the manifest phase, we should set the report to # failed for that phase. if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS: self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED self.progress_report.error_message = report.error_report elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data self.progress_report.add_failed_iso(iso, report.error_report) self.progress_report.update_progress() def download_progress(self, report): """ We will get notified from time to time about some bytes we've downloaded. We can update our progress report with this information so the client can see the progress. :param report: The report of the file we are downloading :type report: nectar.report.DownloadReport """ if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded self.progress_report.finished_bytes += additional_bytes_downloaded iso.bytes_downloaded = report.bytes_downloaded self.progress_report.update_progress() def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when it succeeds in downloading a file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add the new ISO to the database. :param report: The report of the file we downloaded :type report: nectar.report.DownloadReport """ # If we are in the isos stage, then this must be one of our ISOs. if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: # This will update our bytes downloaded self.download_progress(report) iso = report.data try: if self._validate_downloads: iso.validate() iso.save_unit(self.sync_conduit) # We can drop this ISO from the url --> ISO map self.progress_report.num_isos_finished += 1 self.progress_report.update_progress() except ValueError: self.download_failed(report) def perform_sync(self): """ Perform the sync operation according to the config, and return a report. The sync progress will be reported through the sync_conduit. :return: The sync report :rtype: pulp.plugins.model.SyncReport """ # Get the manifest and download the ISOs that we are missing self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS try: manifest = self._download_manifest() except (IOError, ValueError): # The IOError will happen if the file can't be retrieved at all, and the ValueError will # happen if the PULP_MANIFEST file isn't in the expected format. return self.progress_report.build_final_report() # Go get them filez self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS local_missing_isos, remote_missing_isos = self._filter_missing_isos(manifest) self._download_isos(local_missing_isos) if self._remove_missing_units: self._remove_units(remote_missing_isos) # Report that we are finished. Note that setting the # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the # progress report has collected any errors. See the progress_report's _set_state() method # for the implementation of this logic. self.progress_report.state = self.progress_report.STATE_COMPLETE report = self.progress_report.build_final_report() return report def _download_isos(self, manifest): """ Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the Pulp database. :param manifest: The manifest containing a list of ISOs we want to download. :type manifest: list """ self.progress_report.total_bytes = 0 self.progress_report.num_isos = len(manifest) # For each ISO in the manifest, we need to determine a relative path where we want it to be stored, # and initialize the Unit that will represent it for iso in manifest: iso.init_unit(self.sync_conduit) iso.bytes_downloaded = 0 # Set the total bytes onto the report self.progress_report.total_bytes += iso.size self.progress_report.update_progress() # We need to build a list of DownloadRequests download_requests = [request.DownloadRequest(iso.url, iso.storage_path, iso) for iso in manifest] self.downloader.download(download_requests) def _download_manifest(self): """ Download the manifest file, and process it to return an ISOManifest. :return: manifest of available ISOs :rtype: pulp_rpm.common.models.ISOManifest """ manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME) # I probably should have called this manifest destination, but I couldn't help myself manifest_destiny = StringIO() manifest_request = request.DownloadRequest(manifest_url, manifest_destiny) self.downloader.download([manifest_request]) # We can inspect the report status to see if we had an error when retrieving the manifest. if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED: raise IOError(_("Could not retrieve %(url)s") % {'url': manifest_url}) manifest_destiny.seek(0) try: manifest = models.ISOManifest(manifest_destiny, self._repo_url) except ValueError, e: self.progress_report.error_message = _('The PULP_MANIFEST file was not in the ' +\ 'expected format.') self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED raise ValueError(self.progress_report.error_message) return manifest
class ISOSyncRun(listener.DownloadEventListener): """ This class maintains state for a single repository sync (do not reuse it). We need to keep the state so that we can cancel a sync that is in progress. It subclasses DownloadEventListener so it can pass itself to the downloader library and receive the callbacks when downloads are complete. """ def __init__(self, sync_conduit, config): """ Initialize an ISOSyncRun. :param sync_conduit: the sync conduit to use for this sync run. :type sync_conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginCallConfiguration """ self.sync_conduit = sync_conduit self._remove_missing_units = config.get( importer_constants.KEY_UNITS_REMOVE_MISSING, default=constants.CONFIG_UNITS_REMOVE_MISSING_DEFAULT) self._validate_downloads = config.get( importer_constants.KEY_VALIDATE, default=constants.CONFIG_VALIDATE_DEFAULT) self._repo_url = encode_unicode(config.get( importer_constants.KEY_FEED)) # The _repo_url must end in a trailing slash, because we will use urljoin to determine # the path to # PULP_MANIFEST later if self._repo_url[-1] != '/': self._repo_url = self._repo_url + '/' # Cast our config parameters to the correct types and use them to build a Downloader max_speed = config.get(importer_constants.KEY_MAX_SPEED) if max_speed is not None: max_speed = float(max_speed) max_downloads = config.get(importer_constants.KEY_MAX_DOWNLOADS) if max_downloads is not None: max_downloads = int(max_downloads) else: max_downloads = constants.CONFIG_MAX_DOWNLOADS_DEFAULT ssl_validation = config.get_boolean( importer_constants.KEY_SSL_VALIDATION) ssl_validation = ssl_validation if ssl_validation is not None else \ constants.CONFIG_VALIDATE_DEFAULT downloader_config = { 'max_speed': max_speed, 'max_concurrent': max_downloads, 'ssl_client_cert': config.get(importer_constants.KEY_SSL_CLIENT_CERT), 'ssl_client_key': config.get(importer_constants.KEY_SSL_CLIENT_KEY), 'ssl_ca_cert': config.get(importer_constants.KEY_SSL_CA_CERT), 'ssl_validation': ssl_validation, 'proxy_url': config.get(importer_constants.KEY_PROXY_HOST), 'proxy_port': config.get(importer_constants.KEY_PROXY_PORT), 'proxy_username': config.get(importer_constants.KEY_PROXY_USER), 'proxy_password': config.get(importer_constants.KEY_PROXY_PASS) } downloader_config = DownloaderConfig(**downloader_config) # We will pass self as the event_listener, so that we can receive the callbacks in this # class if self._repo_url.lower().startswith('file'): self.downloader = LocalFileDownloader(downloader_config, self) else: self.downloader = HTTPThreadedDownloader(downloader_config, self) self.progress_report = SyncProgressReport(sync_conduit) def cancel_sync(self): """ This method will cancel a sync that is in progress. """ # We used to support sync cancellation, but the current downloader implementation does # not support it # and so for now we will just pass self.progress_report.state = self.progress_report.STATE_CANCELLED self.downloader.cancel() def download_failed(self, report): """ This is the callback that we will get from the downloader library when any individual download fails. """ # If we have a download failure during the manifest phase, we should set the report to # failed for that phase. msg = _('Failed to download %(url)s: %(error_msg)s.') msg = msg % {'url': report.url, 'error_msg': report.error_msg} _logger.error(msg) if self.progress_report.state == self.progress_report.STATE_MANIFEST_IN_PROGRESS: self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED self.progress_report.error_message = report.error_report elif self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data self.progress_report.add_failed_iso(iso, report.error_report) self.progress_report.update_progress() def download_progress(self, report): """ We will get notified from time to time about some bytes we've downloaded. We can update our progress report with this information so the client can see the progress. :param report: The report of the file we are downloading :type report: nectar.report.DownloadReport """ if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: iso = report.data additional_bytes_downloaded = report.bytes_downloaded - iso.bytes_downloaded self.progress_report.finished_bytes += additional_bytes_downloaded iso.bytes_downloaded = report.bytes_downloaded self.progress_report.update_progress() def download_succeeded(self, report): """ This is the callback that we will get from the downloader library when it succeeds in downloading a file. This method will check to see if we are in the ISO downloading stage, and if we are, it will add the new ISO to the database. :param report: The report of the file we downloaded :type report: nectar.report.DownloadReport """ # If we are in the isos stage, then this must be one of our ISOs. if self.progress_report.state == self.progress_report.STATE_ISOS_IN_PROGRESS: # This will update our bytes downloaded self.download_progress(report) iso = report.data try: if self._validate_downloads: iso.validate() iso.save_unit(self.sync_conduit) # We can drop this ISO from the url --> ISO map self.progress_report.num_isos_finished += 1 self.progress_report.update_progress() except ValueError: self.download_failed(report) def perform_sync(self): """ Perform the sync operation according to the config, and return a report. The sync progress will be reported through the sync_conduit. :return: The sync report :rtype: pulp.plugins.model.SyncReport """ # Get the manifest and download the ISOs that we are missing self.progress_report.state = self.progress_report.STATE_MANIFEST_IN_PROGRESS try: manifest = self._download_manifest() except (IOError, ValueError): # The IOError will happen if the file can't be retrieved at all, and the ValueError will # happen if the PULP_MANIFEST file isn't in the expected format. return self.progress_report.build_final_report() # Discover what files we need to download and what we already have filtered_isos = self._filter_missing_isos(manifest) local_missing_isos, local_available_isos, remote_missing_isos = filtered_isos # Associate units that are already in Pulp if local_available_isos: search_dicts = [unit.unit_key for unit in local_available_isos] self.sync_conduit.associate_existing(models.ISO.TYPE, search_dicts) # Go get them filez self.progress_report.state = self.progress_report.STATE_ISOS_IN_PROGRESS self._download_isos(local_missing_isos) if self._remove_missing_units: self._remove_units(remote_missing_isos) # Report that we are finished. Note that setting the # state to STATE_ISOS_COMPLETE will automatically set the state to STATE_ISOS_FAILED if the # progress report has collected any errors. See the progress_report's _set_state() method # for the implementation of this logic. self.progress_report.state = self.progress_report.STATE_COMPLETE report = self.progress_report.build_final_report() return report def _download_isos(self, manifest): """ Makes the calls to retrieve the ISOs from the manifest, storing them on disk and recording them in the Pulp database. :param manifest: The manifest containing a list of ISOs we want to download. :type manifest: pulp_rpm.plugins.db.models.ISOManifest """ self.progress_report.total_bytes = 0 self.progress_report.num_isos = len(manifest) # For each ISO in the manifest, we need to determine a relative path where we want # it to be stored, and initialize the Unit that will represent it for iso in manifest: iso.init_unit(self.sync_conduit) iso.bytes_downloaded = 0 # Set the total bytes onto the report self.progress_report.total_bytes += iso.size self.progress_report.update_progress() # We need to build a list of DownloadRequests download_requests = [ request.DownloadRequest(iso.url, iso.storage_path, iso) for iso in manifest ] self.downloader.download(download_requests) def _download_manifest(self): """ Download the manifest file, and process it to return an ISOManifest. :return: manifest of available ISOs :rtype: pulp_rpm.plugins.db.models.ISOManifest """ manifest_url = urljoin(self._repo_url, models.ISOManifest.FILENAME) # I probably should have called this manifest destination, but I couldn't help myself manifest_destiny = StringIO() manifest_request = request.DownloadRequest(manifest_url, manifest_destiny) self.downloader.download([manifest_request]) # We can inspect the report status to see if we had an error when retrieving the manifest. if self.progress_report.state == self.progress_report.STATE_MANIFEST_FAILED: raise IOError( _("Could not retrieve %(url)s") % {'url': manifest_url}) manifest_destiny.seek(0) try: manifest = models.ISOManifest(manifest_destiny, self._repo_url) except ValueError: self.progress_report.error_message = _( 'The PULP_MANIFEST file was not in the ' + 'expected format.') self.progress_report.state = self.progress_report.STATE_MANIFEST_FAILED raise ValueError(self.progress_report.error_message) return manifest def _filter_missing_isos(self, manifest): """ Use the sync_conduit and the manifest to determine which ISOs are at the feed_url that are not in our local store, as well as which ISOs are in our local store that are not available at the feed_url. :param manifest: An ISOManifest describing the ISOs that are available at the feed_url that we are synchronizing with :type manifest: pulp_rpm.plugins.db.models.ISOManifest :return: A 3-tuple. The first element of the tuple is a list of ISOs that we should retrieve from the feed_url. The second element of the tuple is a list of Units that are available locally already, but are not currently associated with the repository. The third element of the tuple is a list of Units that represent the ISOs that we have in our local repo that were not found in the remote repo. :rtype: tuple """ def _unit_key_str(iso): """ Return a simple string representation of the unit key of the ISO. :param iso: The ISO for which we want a unit key string representation :type iso: pulp_rpm.plugins.db.models.ISO """ return '%s-%s-%s' % (iso.name, iso.checksum, iso.size) # A list of all the ISOs we have in Pulp search_criteria = Criteria(fields=models.ISO.UNIT_KEY_ISO) existing_units = self.sync_conduit.search_all_units( models.ISO.TYPE, search_criteria) existing_units_by_key = dict([ (_unit_key_str(models.ISO.from_unit(unit)), unit) for unit in existing_units ]) existing_unit_keys = set([ _unit_key_str(models.ISO.from_unit(unit)) for unit in existing_units ]) # A list of units currently associated with the repository search_criteria = UnitAssociationCriteria(type_ids=[models.ISO.TYPE]) existing_repo_units = self.sync_conduit.get_units(search_criteria) existing_repo_units_by_key = dict([ (_unit_key_str(models.ISO.from_unit(unit)), unit) for unit in existing_repo_units ]) existing_repo_unit_keys = set([ _unit_key_str(models.ISO.from_unit(unit)) for unit in existing_repo_units ]) # A list of the ISOs in the remote repository available_isos_by_key = dict([(_unit_key_str(iso), iso) for iso in manifest]) available_iso_keys = set([_unit_key_str(iso) for iso in manifest]) # Content that is available locally and just needs to be associated with the repository local_available_iso_keys = set( [iso for iso in available_iso_keys if iso in existing_unit_keys]) local_available_iso_keys = local_available_iso_keys - existing_repo_unit_keys local_available_units = [ existing_units_by_key[k] for k in local_available_iso_keys ] # Content that is missing locally and must be downloaded local_missing_iso_keys = list(available_iso_keys - existing_unit_keys) local_missing_isos = [ available_isos_by_key[k] for k in local_missing_iso_keys ] # Content that is missing from the remote repository that is present locally remote_missing_unit_keys = list(existing_repo_unit_keys - available_iso_keys) remote_missing_units = [ existing_repo_units_by_key[k] for k in remote_missing_unit_keys ] return local_missing_isos, local_available_units, remote_missing_units def _remove_units(self, units): """ Use the sync_conduit's remove_unit call for each unit in units. :param units: List of pulp.plugins.model.Units that we want to remove from the repository :type units: list """ for unit in units: self.sync_conduit.remove_unit(unit)