Exemplo n.º 1
0
    def __call__(self):
        """
        Invoke the callable object.

        All work is performed in the repository working directory and cleaned up after the call.

        :return: The final synchronization report.
        :rtype: SyncProgressReport
        """
        self.canceled = False
        self.report = SyncProgressReport(self.conduit)
        self.tmp_dir = mkdtemp(dir=self.repo.working_dir)
        try:
            manifest = self._fetch_manifest()
            if manifest is not None:
                module_paths = self._fetch_modules(manifest)
                self._import_modules(module_paths)
        finally:
            # Update the progress report one last time
            self.report.update_progress()

            shutil.rmtree(self.tmp_dir)
            self.tmp_dir = None

        return self.report
Exemplo n.º 2
0
    def __init__(self, repo, sync_conduit, config, is_cancelled_call):
        self.repo = repo
        self.sync_conduit = sync_conduit
        self.config = config
        self.is_cancelled_call = is_cancelled_call

        self.progress_report = SyncProgressReport(sync_conduit)
Exemplo n.º 3
0
    def __init__(self, repo, sync_conduit, config):
        self.repo = repo
        self.sync_conduit = sync_conduit
        self.config = config

        self.progress_report = SyncProgressReport(sync_conduit)
        self.downloader = None
        # Since SynchronizeWithPuppetForge creates a Nectar downloader for each unit, we cannot
        # rely on telling the current downloader to cancel. Therefore, we need another state
        # tracker to check in the download units loop.
        self._canceled = False
Exemplo n.º 4
0
    def __call__(self, repository):
        """
        Invoke the callable object.
        All work is performed in the repository working directory and
        cleaned up after the call.

        :param repository: A Pulp repository object.
        :type repository: pulp.server.plugins.model.Repository
        :return: The final synchronization report.
        :rtype: SyncProgressReport
        """
        self.canceled = False
        self.report = SyncProgressReport(self.conduit)
        self.tmp_dir = mkdtemp(dir=repository.working_dir)
        try:
            inventory = Inventory(self.conduit)
            self._run(inventory)
        finally:
            # Update the progress report one last time
            self.report.update_progress()

            shutil.rmtree(self.tmp_dir)
            self.tmp_dir = None

        return self.report
Exemplo n.º 5
0
    def __init__(self, repo, sync_conduit, config):
        self.repo = repo
        self.sync_conduit = sync_conduit
        self.config = config

        self.progress_report = SyncProgressReport(sync_conduit)
        self.downloader = None
Exemplo n.º 6
0
    def __init__(self, repo, sync_conduit, config, is_cancelled_call):
        self.repo = repo
        self.sync_conduit = sync_conduit
        self.config = config
        self.is_cancelled_call = is_cancelled_call

        self.progress_report = SyncProgressReport(sync_conduit)
Exemplo n.º 7
0
    def __call__(self):
        """
        Invoke the callable object.

        All work is performed in the repository working directory and cleaned up after the call.

        :return: The final synchronization report.
        :rtype: SyncProgressReport
        """
        self.canceled = False
        self.report = SyncProgressReport(self.conduit)
        self.tmp_dir = mkdtemp(dir=self.repo.working_dir)
        try:
            manifest = self._fetch_manifest()
            if manifest is not None:
                module_paths = self._fetch_modules(manifest)
                self._import_modules(module_paths)
        finally:
            # Update the progress report one last time
            self.report.update_progress()

            shutil.rmtree(self.tmp_dir)
            self.tmp_dir = None

        return self.report
    def setUp(self):
        super(PuppetStatusRendererTests, self).setUp()
        self.renderer = PuppetStatusRenderer(self.context)

        self.config['logging'] = {'filename' : 'test-extension-status.log'}

        self.sync_report = SyncProgressReport.from_progress_dict(IMPORTER_REPORT)
        self.publish_report = PublishProgressReport.from_progress_dict(DISTRIBUTOR_REPORT)
Exemplo n.º 9
0
    def setUp(self):
        super(PuppetStatusRendererTests, self).setUp()
        self.renderer = PuppetStatusRenderer(self.context)

        self.sync_report = SyncProgressReport.from_progress_dict(
            IMPORTER_REPORT)
        self.publish_report = PublishProgressReport.from_progress_dict(
            DISTRIBUTOR_REPORT)
Exemplo n.º 10
0
    def test_directory_synchronization(self, forge_call, mock_call):
        conduit = Mock()
        repository = Mock()
        config = {constants.CONFIG_FEED: 'http://host/tmp/%s' % constants.MANIFEST_FILENAME}
        progress_report = SyncProgressReport(conduit)
        progress_report.metadata_state = constants.STATE_SUCCESS
        progress_report.modules_state = constants.STATE_SUCCESS
        mock_call.return_value = progress_report

        # test

        plugin = PuppetModuleImporter()
        report = plugin.sync_repo(repository, conduit, config)

        # validation

        mock_call.assert_called_with(repository)
        self.assertEquals(report, conduit.build_success_report.return_value)
        self.assertFalse(forge_call.called)
Exemplo n.º 11
0
    def test_directory_synchronization(self, forge_call, mock_call):
        conduit = Mock()
        repository = Mock()
        config = {constants.CONFIG_FEED: 'http://host/tmp/%s' % constants.MANIFEST_FILENAME}
        progress_report = SyncProgressReport(conduit)
        progress_report.metadata_state = constants.STATE_SUCCESS
        progress_report.modules_state = constants.STATE_SUCCESS
        mock_call.return_value = progress_report

        # test

        plugin = PuppetModuleImporter()
        report = plugin.sync_repo(repository, conduit, config)

        # validation

        mock_call.assert_called_with()
        self.assertEquals(report, conduit.build_success_report.return_value)
        self.assertFalse(forge_call.called)
Exemplo n.º 12
0
    def setUp(self):
        super(PuppetStatusRendererTests, self).setUp()
        self.renderer = PuppetStatusRenderer(self.context)

        self.config['logging'] = {'filename': 'test-extension-status.log'}

        self.sync_report = SyncProgressReport.from_progress_dict(
            IMPORTER_REPORT)
        self.publish_report = PublishProgressReport.from_progress_dict(
            DISTRIBUTOR_REPORT)
Exemplo n.º 13
0
    def __init__(self, repo, sync_conduit, config):
        self.repo = repo
        self.sync_conduit = sync_conduit
        self.config = config

        self.progress_report = SyncProgressReport(sync_conduit)
        self.downloader = None
        # Since SynchronizeWithPuppetForge creates a Nectar downloader for each unit, we cannot
        # rely on telling the current downloader to cancel. Therefore, we need another state
        # tracker to check in the download units loop.
        self._canceled = False
Exemplo n.º 14
0
    def display_report(self, progress_report):

        # Sync Steps
        if constants.IMPORTER_ID in progress_report:
            sync_report = SyncProgressReport.from_progress_dict(progress_report[constants.IMPORTER_ID])
            self._display_sync_metadata_step(sync_report)
            self._display_sync_modules_step(sync_report)

        # Publish Steps
        if constants.DISTRIBUTOR_ID in progress_report:
            publish_report = PublishProgressReport.from_progress_dict(progress_report[constants.DISTRIBUTOR_ID])
            self._display_publish_modules_step(publish_report)
            self._display_publish_metadata_step(publish_report)
            self._display_publish_http_https_step(publish_report)
Exemplo n.º 15
0
    def test_forge_synchronization(self, failed_call, mock_call):
        conduit = Mock()
        repository = Mock()
        config = {constants.CONFIG_FEED: 'http://host/tmp/forge'}

        # directory synchronization failure needed so the importer
        # will retry using the forge synchronization.
        failed_report = SyncProgressReport(conduit)
        failed_report.metadata_state = constants.STATE_FAILED
        failed_call.return_value = failed_report

        progress_report = SyncProgressReport(conduit)
        progress_report.metadata_state = constants.STATE_FAILED
        mock_call.return_value = progress_report

        # test

        plugin = PuppetModuleImporter()
        report = plugin.sync_repo(repository, conduit, config)

        # validation
        mock_call.assert_called_with()
        self.assertEquals(report, conduit.build_failure_report.return_value)
Exemplo n.º 16
0
    def display_report(self, progress_report):

        # Sync Steps
        if constants.IMPORTER_ID in progress_report:
            sync_report = SyncProgressReport.from_progress_dict(
                progress_report[constants.IMPORTER_ID])
            self._display_sync_metadata_step(sync_report)
            self._display_sync_modules_step(sync_report)

        # Publish Steps
        if constants.DISTRIBUTOR_ID in progress_report:
            publish_report = PublishProgressReport.from_progress_dict(
                progress_report[constants.DISTRIBUTOR_ID])
            self._display_publish_modules_step(publish_report)
            self._display_publish_metadata_step(publish_report)
            self._display_publish_http_https_step(publish_report)
Exemplo n.º 17
0
    def test_forge_synchronization(self, failed_call, mock_call):
        conduit = Mock()
        repository = Mock()
        config = {constants.CONFIG_FEED: 'http://host/tmp/forge'}

        # directory synchronization failure needed so the importer
        # will retry using the forge synchronization.
        failed_report = SyncProgressReport(conduit)
        failed_report.metadata_state = constants.STATE_FAILED
        failed_call.return_value = failed_report

        progress_report = SyncProgressReport(conduit)
        progress_report.metadata_state = constants.STATE_FAILED
        mock_call.return_value = progress_report

        # test

        plugin = PuppetModuleImporter()
        report = plugin.sync_repo(repository, conduit, config)

        # validation
        mock_call.assert_called_with()
        self.assertEquals(report, conduit.build_failure_report.return_value)
Exemplo n.º 18
0
class SynchronizeWithPuppetForge(object):
    """
    Used to perform a single sync of a puppet repository. This class will
    maintain state relevant to the run and should not be reused across runs.
    """

    def __init__(self, repo, sync_conduit, config):
        self.repo = repo
        self.sync_conduit = sync_conduit
        self.config = config

        self.progress_report = SyncProgressReport(sync_conduit)
        self.downloader = None
        # Since SynchronizeWithPuppetForge creats a Nectar downloader for each unit, we cannot
        # rely on telling the current downloader to cancel. Therefore, we need another state tracker
        # to check in the download units loop.
        self._canceled = False

    def __call__(self):
        """
        Performs the sync operation according to the configured state of the
        instance. The report to be sent back to Pulp is returned from this
        call. This call will make calls into the conduit's progress update
        as appropriate.

        This call executes serially. No threads are created by this call. It
        will not return until either a step fails or the entire sync is
        completed.

        :return: the report object to return to Pulp from the sync call
        :rtype:  SyncProgressReport
        """
        _logger.info('Beginning sync for repository <%s>' % self.repo.id)

        # quit now if there is no feed URL defined
        if not self.config.get(constants.CONFIG_FEED):
            self.progress_report.metadata_state = STATE_FAILED
            self.progress_report.metadata_error_message = _(
                'Cannot perform repository sync on a repository with no feed')
            self.progress_report.update_progress()
            return self.progress_report.build_final_report()

        try:
            metadata = self._parse_metadata()
            if not metadata:
                report = self.progress_report.build_final_report()
                return report

            self._import_modules(metadata)
        finally:
            # One final progress update before finishing
            self.progress_report.update_progress()

            return self.progress_report

    def cancel(self):
        """
        Cancel an in-progress sync, if there is one.
        """
        self._canceled = True
        if self.downloader is None:
            return
        self.downloader.cancel()

    def _parse_metadata(self):
        """
        Takes the necessary actions (according to the run configuration) to
        retrieve and parse the repository's metadata. This call will return
        either the successfully parsed metadata or None if it could not
        be retrieved or parsed. The progress report will be updated with the
        appropriate description of what went wrong in the event of an error,
        so the caller should interpet a None return as an error occuring and
        not continue the sync.

        :return: object representation of the metadata
        :rtype:  RepositoryMetadata
        """
        _logger.info('Beginning metadata retrieval for repository <%s>' % self.repo.id)

        self.progress_report.metadata_state = STATE_RUNNING
        self.progress_report.update_progress()

        start_time = datetime.now()

        # Retrieve the metadata from the source
        try:
            downloader = self._create_downloader()
            self.downloader = downloader
            metadata_json_docs = downloader.retrieve_metadata(self.progress_report)

        except Exception, e:
            if self._canceled:
                _logger.warn('Exception occurred on canceled metadata download: %s' % e)
                self.progress_report.metadata_state = STATE_CANCELED
                return None
            _logger.exception('Exception while retrieving metadata for repository <%s>' % self.repo.id)
            self.progress_report.metadata_state = STATE_FAILED
            self.progress_report.metadata_error_message = _('Error downloading metadata')
            self.progress_report.metadata_exception = e
            self.progress_report.metadata_traceback = sys.exc_info()[2]

            end_time = datetime.now()
            duration = end_time - start_time
            self.progress_report.metadata_execution_time = duration.seconds

            self.progress_report.update_progress()

            return None

        finally:
Exemplo n.º 19
0
class SynchronizeWithPuppetForge(object):
    """
    Used to perform a single sync of a puppet repository. This class will
    maintain state relevant to the run and should not be reused across runs.
    """

    def __init__(self, repo, sync_conduit, config):
        self.repo = repo
        self.sync_conduit = sync_conduit
        self.config = config

        self.progress_report = SyncProgressReport(sync_conduit)
        self.downloader = None
        # Since SynchronizeWithPuppetForge creates a Nectar downloader for each unit, we cannot
        # rely on telling the current downloader to cancel. Therefore, we need another state
        # tracker to check in the download units loop.
        self._canceled = False

    def __call__(self):
        """
        Sync according to the configured state of the instance and return a report.

        This function will make update progress as appropriate.

        This function executes serially, and does not create any threads. It will not return until
        either a step fails or the entire sync is complete.

        :return: the report object to return to Pulp from the sync call
        :rtype: SyncProgressReport
        """
        msg = _('Beginning sync for repository <%(repo_id)s>')
        msg_dict = {'repo_id': self.repo.id}
        _logger.info(msg, msg_dict)

        # quit now if there is no feed URL defined
        if not self.config.get(constants.CONFIG_FEED):
            self.progress_report.metadata_state = STATE_FAILED
            msg = _('Cannot perform repository sync on a repository with no feed')
            self.progress_report.metadata_error_message = msg
            self.progress_report.update_progress()
            return self.progress_report.build_final_report()

        try:
            metadata = self._parse_metadata()
            if not metadata:
                report = self.progress_report.build_final_report()
                return report

            self._import_modules(metadata)
        finally:
            # One final progress update before finishing
            self.progress_report.update_progress()

            return self.progress_report

    def cancel(self):
        """
        Cancel an in-progress sync, if there is one.
        """
        self._canceled = True
        if self.downloader is None:
            return
        self.downloader.cancel()

    def _parse_metadata(self):
        """
        Takes the necessary actions (according to the run configuration) to
        retrieve and parse the repository's metadata. This call will return
        either the successfully parsed metadata or None if it could not
        be retrieved or parsed. The progress report will be updated with the
        appropriate description of what went wrong in the event of an error,
        so the caller should interpret a None return as an error occurring and
        not continue the sync.

        :return: object representation of the metadata
        :rtype:  RepositoryMetadata
        """
        msg = _('Beginning metadata retrieval for repository <%(repo_id)s>')
        msg_dict = {'repo_id': self.repo.id}
        _logger.info(msg, msg_dict)

        self.progress_report.metadata_state = STATE_RUNNING
        self.progress_report.update_progress()

        start_time = datetime.now()

        # Retrieve the metadata from the source
        try:
            downloader = self._create_downloader()
            self.downloader = downloader
            metadata_json_docs = downloader.retrieve_metadata(self.progress_report)

        except Exception as e:
            if self._canceled:
                msg = _('Exception occurred on canceled metadata download: %(exc)s')
                msg_dict = {'exc': e}
                _logger.warn(msg, msg_dict)
                self.progress_report.metadata_state = STATE_CANCELED
                return None
            msg = _('Exception while retrieving metadata for repository <%(repo_id)s>')
            msg_dict = {'repo_id': self.repo.id}
            _logger.exception(msg, msg_dict)
            self.progress_report.metadata_state = STATE_FAILED
            self.progress_report.metadata_error_message = _('Error downloading metadata')
            self.progress_report.metadata_exception = e
            self.progress_report.metadata_traceback = sys.exc_info()[2]

            end_time = datetime.now()
            duration = end_time - start_time
            self.progress_report.metadata_execution_time = duration.seconds

            self.progress_report.update_progress()

            return None

        finally:
            self.downloader = None

        # Parse the retrieved metadata documents
        try:
            metadata = RepositoryMetadata()
            for doc in metadata_json_docs:
                metadata.update_from_json(doc)
        except Exception as e:
            msg = _('Exception parsing metadata for repository <%(repo_id)s>')
            msg_dict = {'repo_id': self.repo.id}
            _logger.exception(msg, msg_dict)
            self.progress_report.metadata_state = STATE_FAILED
            msg = _("Error parsing repository modules metadata document")
            self.progress_report.metadata_error_message = msg
            self.progress_report.metadata_exception = e
            self.progress_report.metadata_traceback = sys.exc_info()[2]

            end_time = datetime.now()
            duration = end_time - start_time
            self.progress_report.metadata_execution_time = duration.seconds

            self.progress_report.update_progress()

            return None

        # Last update to the progress report before returning
        self.progress_report.metadata_state = STATE_SUCCESS

        end_time = datetime.now()
        duration = end_time - start_time
        self.progress_report.metadata_execution_time = duration.seconds

        self.progress_report.update_progress()

        return metadata

    def _import_modules(self, metadata):
        """
        Imports each module in the repository into Pulp.

        This method is mostly just a wrapper on top of the actual logic
        of performing an import to set the stage for the progress report and
        more importantly catch any rogue exceptions that crop up.

        :param metadata: object representation of the repository metadata
               containing the modules to import
        :type  metadata: RepositoryMetadata
        """
        msg = _('Retrieving modules for repository <%(repo_id)s>')
        msg_dict = {'repo_id': self.repo.id}
        _logger.info(msg, msg_dict)

        self.progress_report.modules_state = STATE_RUNNING

        # Do not send the update about the state yet. The counts need to be
        # set later once we know how many are new, so to prevent a situation
        # where the report reflects running but does not have counts, wait
        # until they are populated before sending the update to Pulp.

        start_time = datetime.now()

        try:
            self._do_import_modules(metadata)
        except Exception as e:
            msg = _('Exception importing modules for repository <%(repo_id)s>')
            msg_dict = {'repo_id': self.repo.id}
            _logger.exception(msg, msg_dict)
            self.progress_report.modules_state = STATE_FAILED
            self.progress_report.modules_error_message = _('Error retrieving modules')
            self.progress_report.modules_exception = e
            self.progress_report.modules_traceback = sys.exc_info()[2]

            end_time = datetime.now()
            duration = end_time - start_time
            self.progress_report.modules_execution_time = duration.seconds

            self.progress_report.update_progress()

            return

        # Last update to the progress report before returning
        self.progress_report.modules_state = STATE_SUCCESS

        end_time = datetime.now()
        duration = end_time - start_time
        self.progress_report.modules_execution_time = duration.seconds

        self.progress_report.update_progress()

    def _do_import_modules(self, metadata):
        """
        Actual logic of the import. This method will do a best effort per module;
        if an individual module fails it will be recorded and the import will
        continue. This method will only raise an exception in an extreme case
        where it cannot react and continue.
        """
        downloader = self._create_downloader()
        self.downloader = downloader

        # Ease module lookup
        metadata_modules_by_key = dict([(m.unit_key_as_named_tuple, m) for m in metadata.modules])

        # Collect information about the repository's modules before changing it
        existing_module_ids_by_key = {}
        modules = repo_controller.find_repo_content_units(
            self.repo.repo_obj, unit_fields=Module.unit_key_fields, yield_content_unit=True)

        for module in modules:
            existing_module_ids_by_key[module.unit_key_as_named_tuple] = module.id

        new_unit_keys = self._resolve_new_units(existing_module_ids_by_key.keys(),
                                                metadata_modules_by_key.keys())

        # Once we know how many things need to be processed, we can update the progress report
        self.progress_report.modules_total_count = len(new_unit_keys)
        self.progress_report.modules_finished_count = 0
        self.progress_report.modules_error_count = 0
        self.progress_report.update_progress()

        # Add new units
        for key in new_unit_keys:
            if self._canceled:
                break
            module = metadata_modules_by_key[key]
            try:
                self._add_new_module(downloader, module)
                self.progress_report.modules_finished_count += 1
            except Exception as e:
                self.progress_report.add_failed_module(module, e, sys.exc_info()[2])

            self.progress_report.update_progress()

        # Remove missing units if the configuration indicates to do so
        if self._should_remove_missing():
            remove_unit_keys = self._resolve_remove_units(existing_module_ids_by_key.keys(),
                                                          metadata_modules_by_key.keys())
            doomed_ids = [existing_module_ids_by_key[key] for key in remove_unit_keys]
            doomed_module_iterator = Module.objects.in_bulk(doomed_ids).itervalues()
            repo_controller.disassociate_units(self.repo.repo_obj, doomed_module_iterator)

        self.downloader = None

    def _add_new_module(self, downloader, module):
        """
        Performs the tasks for downloading and saving a new unit in Pulp.

        This method entirely skips modules that are already in the repository.

        :param downloader: downloader instance to use for retrieving the unit
        :type downloader: child of pulp_puppet.plugins.importers.downloaders.base.BaseDownloader

        :param module: module to download and add
        :type  module: pulp_puppet.plugins.db.models.Module
        """
        try:
            # Download the bits
            downloaded_filename = downloader.retrieve_module(self.progress_report, module)

            # Extract the extra metadata into the module
            metadata = metadata_module.extract_metadata(downloaded_filename,
                                                        self.repo.working_dir)

            # Overwrite the author and name
            metadata.update(Module.split_filename(metadata['name']))

            # Create and save the Module
            module = Module.from_metadata(metadata)
            module.set_storage_path(os.path.basename(downloaded_filename))
            try:
                module.save_and_import_content(downloaded_filename)
            except NotUniqueError:
                module = module.__class__.objects.get(**module.unit_key)
            # Associate the module with the repo
            repo_controller.associate_single_unit(self.repo.repo_obj, module)
        finally:
            downloader.cleanup_module(module)

    def _resolve_new_units(self, existing, wanted):
        """
        Decide what units are needed to be downloaded.

        Filter out units which are already in a repository,
        associate units which are already downloaded,

        :param existing: units which are already in a repository
        :type existing: list of unit keys as namedtuples
        :param wanted: units which should be imported into a repository
        :type wanted: list of unit keys as namedtuples

        :return: list of unit keys to download; empty list if all units are already downloaded
        :rtype:  list of unit keys as namedtuples
        """
        model = plugin_api.get_unit_model_by_id(constants.TYPE_PUPPET_MODULE)
        unit_generator = (model(**unit_tuple._asdict()) for unit_tuple in wanted)
        still_wanted = set(wanted)
        for unit in units_controller.find_units(unit_generator):
            file_exists = unit._storage_path is not None and os.path.isfile(unit._storage_path)
            if file_exists:
                if unit.unit_key_as_named_tuple not in existing:
                    repo_controller.associate_single_unit(self.repo.repo_obj, unit)
                still_wanted.discard(unit.unit_key_as_named_tuple)

        return list(still_wanted)

    def _resolve_remove_units(self, existing_unit_keys, metadata_unit_keys):
        """
        Returns a list of unit keys that are in the repository but not in
        the current repository metadata.

        :return: list of unit keys; empty list if none have been removed
        :rtype:  list
        """
        return list(set(existing_unit_keys) - set(metadata_unit_keys))

    def _create_downloader(self):
        """
        Uses the configuratoin to determine which downloader style to use
        for this run.

        :return: one of the *Downloader classes in the downloaders module
        """

        feed = self.config.get(constants.CONFIG_FEED)
        return downloader_factory.get_downloader(feed, self.repo, self.sync_conduit, self.config)

    def _should_remove_missing(self):
        """
        Returns whether or not missing units should be removed.

        :return: true if missing units should be removed; false otherwise
        :rtype:  bool
        """

        if constants.CONFIG_REMOVE_MISSING not in self.config.keys():
            return constants.DEFAULT_REMOVE_MISSING
        else:
            return self.config.get_boolean(constants.CONFIG_REMOVE_MISSING)
Exemplo n.º 20
0
class PuppetModuleSyncRun(object):
    """
    Used to perform a single sync of a puppet repository. This class will
    maintain state relevant to the run and should not be reused across runs.
    """

    def __init__(self, repo, sync_conduit, config, is_cancelled_call):
        self.repo = repo
        self.sync_conduit = sync_conduit
        self.config = config
        self.is_cancelled_call = is_cancelled_call

        self.progress_report = SyncProgressReport(sync_conduit)

    def perform_sync(self):
        """
        Performs the sync operation according to the configured state of the
        instance. The report to be sent back to Pulp is returned from this
        call. This call will make calls into the conduit's progress update
        as appropriate.

        This call executes serially. No threads are created by this call. It
        will not return until either a step fails or the entire sync is
        completed.

        :return: the report object to return to Pulp from the sync call
        :rtype:  pulp.plugins.model.SyncReport
        """
        _LOG.info('Beginning sync for repository <%s>' % self.repo.id)

        try:
            metadata = self._parse_metadata()
            if not metadata:
                report = self.progress_report.build_final_report()
                return report

            self._import_modules(metadata)
        finally:
            # One final progress update before finishing
            self.progress_report.update_progress()

            report = self.progress_report.build_final_report()
            return report

    def _parse_metadata(self):
        """
        Takes the necessary actions (according to the run configuration) to
        retrieve and parse the repository's metadata. This call will return
        either the successfully parsed metadata or None if it could not
        be retrieved or parsed. The progress report will be updated with the
        appropriate description of what went wrong in the event of an error,
        so the caller should interpet a None return as an error occuring and
        not continue the sync.

        :return: object representation of the metadata
        :rtype:  RepositoryMetadata
        """
        _LOG.info('Beginning metadata retrieval for repository <%s>' % self.repo.id)

        self.progress_report.metadata_state = STATE_RUNNING
        self.progress_report.update_progress()

        start_time = datetime.now()

        # Retrieve the metadata from the source
        try:
            downloader = self._create_downloader()
            metadata_json_docs = downloader.retrieve_metadata(self.progress_report)
        except Exception, e:
            _LOG.exception('Exception while retrieving metadata for repository <%s>' % self.repo.id)
            self.progress_report.metadata_state = STATE_FAILED
            self.progress_report.metadata_error_message = _('Error downloading metadata')
            self.progress_report.metadata_exception = e
            self.progress_report.metadata_traceback = sys.exc_info()[2]

            end_time = datetime.now()
            duration = end_time - start_time
            self.progress_report.metadata_execution_time = duration.seconds

            self.progress_report.update_progress()

            return None

        # Parse the retrieved metadata documents
        try:
            metadata = RepositoryMetadata()
            for doc in metadata_json_docs:
                metadata.update_from_json(doc)
        except Exception, e:
            _LOG.exception('Exception parsing metadata for repository <%s>' % self.repo.id)
            self.progress_report.metadata_state = STATE_FAILED
            self.progress_report.metadata_error_message = _('Error parsing repository modules metadata document')
            self.progress_report.metadata_exception = e
            self.progress_report.metadata_traceback = sys.exc_info()[2]

            end_time = datetime.now()
            duration = end_time - start_time
            self.progress_report.metadata_execution_time = duration.seconds

            self.progress_report.update_progress()

            return None
Exemplo n.º 21
0
class SynchronizeWithPuppetForge(object):
    """
    Used to perform a single sync of a puppet repository. This class will
    maintain state relevant to the run and should not be reused across runs.
    """
    def __init__(self, repo, sync_conduit, config):
        self.repo = repo
        self.sync_conduit = sync_conduit
        self.config = config

        self.progress_report = SyncProgressReport(sync_conduit)
        self.downloader = None
        # Since SynchronizeWithPuppetForge creats a Nectar downloader for each unit, we cannot
        # rely on telling the current downloader to cancel. Therefore, we need another state tracker
        # to check in the download units loop.
        self._canceled = False

    def __call__(self):
        """
        Performs the sync operation according to the configured state of the
        instance. The report to be sent back to Pulp is returned from this
        call. This call will make calls into the conduit's progress update
        as appropriate.

        This call executes serially. No threads are created by this call. It
        will not return until either a step fails or the entire sync is
        completed.

        :return: the report object to return to Pulp from the sync call
        :rtype:  SyncProgressReport
        """
        _logger.info('Beginning sync for repository <%s>' % self.repo.id)

        # quit now if there is no feed URL defined
        if not self.config.get(constants.CONFIG_FEED):
            self.progress_report.metadata_state = STATE_FAILED
            self.progress_report.metadata_error_message = _(
                'Cannot perform repository sync on a repository with no feed')
            self.progress_report.update_progress()
            return self.progress_report.build_final_report()

        try:
            metadata = self._parse_metadata()
            if not metadata:
                report = self.progress_report.build_final_report()
                return report

            self._import_modules(metadata)
        finally:
            # One final progress update before finishing
            self.progress_report.update_progress()

            return self.progress_report

    def cancel(self):
        """
        Cancel an in-progress sync, if there is one.
        """
        self._canceled = True
        if self.downloader is None:
            return
        self.downloader.cancel()

    def _parse_metadata(self):
        """
        Takes the necessary actions (according to the run configuration) to
        retrieve and parse the repository's metadata. This call will return
        either the successfully parsed metadata or None if it could not
        be retrieved or parsed. The progress report will be updated with the
        appropriate description of what went wrong in the event of an error,
        so the caller should interpet a None return as an error occuring and
        not continue the sync.

        :return: object representation of the metadata
        :rtype:  RepositoryMetadata
        """
        _logger.info('Beginning metadata retrieval for repository <%s>' %
                     self.repo.id)

        self.progress_report.metadata_state = STATE_RUNNING
        self.progress_report.update_progress()

        start_time = datetime.now()

        # Retrieve the metadata from the source
        try:
            downloader = self._create_downloader()
            self.downloader = downloader
            metadata_json_docs = downloader.retrieve_metadata(
                self.progress_report)

        except Exception, e:
            if self._canceled:
                _logger.warn(
                    'Exception occurred on canceled metadata download: %s' % e)
                self.progress_report.metadata_state = STATE_CANCELED
                return None
            _logger.exception(
                'Exception while retrieving metadata for repository <%s>' %
                self.repo.id)
            self.progress_report.metadata_state = STATE_FAILED
            self.progress_report.metadata_error_message = _(
                'Error downloading metadata')
            self.progress_report.metadata_exception = e
            self.progress_report.metadata_traceback = sys.exc_info()[2]

            end_time = datetime.now()
            duration = end_time - start_time
            self.progress_report.metadata_execution_time = duration.seconds

            self.progress_report.update_progress()

            return None

        finally:
Exemplo n.º 22
0
    def setUp(self):
        super(PuppetStatusRendererTests, self).setUp()
        self.renderer = PuppetStatusRenderer(self.context)

        self.sync_report = SyncProgressReport.from_progress_dict(IMPORTER_REPORT)
        self.publish_report = PublishProgressReport.from_progress_dict(DISTRIBUTOR_REPORT)
Exemplo n.º 23
0
class SynchronizeWithPuppetForge(object):
    """
    Used to perform a single sync of a puppet repository. This class will
    maintain state relevant to the run and should not be reused across runs.
    """
    def __init__(self, repo, sync_conduit, config):
        self.repo = repo
        self.sync_conduit = sync_conduit
        self.config = config

        self.progress_report = SyncProgressReport(sync_conduit)
        self.downloader = None
        # Since SynchronizeWithPuppetForge creates a Nectar downloader for each unit, we cannot
        # rely on telling the current downloader to cancel. Therefore, we need another state
        # tracker to check in the download units loop.
        self._canceled = False

    def __call__(self):
        """
        Sync according to the configured state of the instance and return a report.

        This function will make update progress as appropriate.

        This function executes serially, and does not create any threads. It will not return until
        either a step fails or the entire sync is complete.

        :return: the report object to return to Pulp from the sync call
        :rtype: SyncProgressReport
        """
        msg = _('Beginning sync for repository <%(repo_id)s>')
        msg_dict = {'repo_id': self.repo.id}
        _logger.info(msg, msg_dict)

        # quit now if there is no feed URL defined
        if not self.config.get(constants.CONFIG_FEED):
            self.progress_report.metadata_state = STATE_FAILED
            msg = _(
                'Cannot perform repository sync on a repository with no feed')
            self.progress_report.metadata_error_message = msg
            self.progress_report.update_progress()
            return self.progress_report.build_final_report()

        try:
            metadata = self._parse_metadata()
            if not metadata:
                report = self.progress_report.build_final_report()
                return report

            self._import_modules(metadata)
        finally:
            # One final progress update before finishing
            self.progress_report.update_progress()

            return self.progress_report

    def cancel(self):
        """
        Cancel an in-progress sync, if there is one.
        """
        self._canceled = True
        if self.downloader is None:
            return
        self.downloader.cancel()

    def _parse_metadata(self):
        """
        Takes the necessary actions (according to the run configuration) to
        retrieve and parse the repository's metadata. This call will return
        either the successfully parsed metadata or None if it could not
        be retrieved or parsed. The progress report will be updated with the
        appropriate description of what went wrong in the event of an error,
        so the caller should interpret a None return as an error occurring and
        not continue the sync.

        :return: object representation of the metadata
        :rtype:  RepositoryMetadata
        """
        msg = _('Beginning metadata retrieval for repository <%(repo_id)s>')
        msg_dict = {'repo_id': self.repo.id}
        _logger.info(msg, msg_dict)

        self.progress_report.metadata_state = STATE_RUNNING
        self.progress_report.update_progress()

        start_time = datetime.now()

        # Retrieve the metadata from the source
        try:
            downloader = self._create_downloader()
            self.downloader = downloader
            metadata_json_docs = downloader.retrieve_metadata(
                self.progress_report)

        except Exception as e:
            if self._canceled:
                msg = _(
                    'Exception occurred on canceled metadata download: %(exc)s'
                )
                msg_dict = {'exc': e}
                _logger.warn(msg, msg_dict)
                self.progress_report.metadata_state = STATE_CANCELED
                return None
            msg = _(
                'Exception while retrieving metadata for repository <%(repo_id)s>'
            )
            msg_dict = {'repo_id': self.repo.id}
            _logger.exception(msg, msg_dict)
            self.progress_report.metadata_state = STATE_FAILED
            self.progress_report.metadata_error_message = _(
                'Error downloading metadata')
            self.progress_report.metadata_exception = e
            self.progress_report.metadata_traceback = sys.exc_info()[2]

            end_time = datetime.now()
            duration = end_time - start_time
            self.progress_report.metadata_execution_time = duration.seconds

            self.progress_report.update_progress()

            return None

        finally:
            self.downloader = None

        # Parse the retrieved metadata documents
        try:
            metadata = RepositoryMetadata()
            for doc in metadata_json_docs:
                metadata.update_from_json(doc)
        except Exception as e:
            msg = _('Exception parsing metadata for repository <%(repo_id)s>')
            msg_dict = {'repo_id': self.repo.id}
            _logger.exception(msg, msg_dict)
            self.progress_report.metadata_state = STATE_FAILED
            self.progress_report.metadata_error_message = _(
                'Error parsing repository modules metadata document')
            self.progress_report.metadata_exception = e
            self.progress_report.metadata_traceback = sys.exc_info()[2]

            end_time = datetime.now()
            duration = end_time - start_time
            self.progress_report.metadata_execution_time = duration.seconds

            self.progress_report.update_progress()

            return None

        # Last update to the progress report before returning
        self.progress_report.metadata_state = STATE_SUCCESS

        end_time = datetime.now()
        duration = end_time - start_time
        self.progress_report.metadata_execution_time = duration.seconds

        self.progress_report.update_progress()

        return metadata

    def _import_modules(self, metadata):
        """
        Imports each module in the repository into Pulp.

        This method is mostly just a wrapper on top of the actual logic
        of performing an import to set the stage for the progress report and
        more importantly catch any rogue exceptions that crop up.

        :param metadata: object representation of the repository metadata
               containing the modules to import
        :type  metadata: RepositoryMetadata
        """
        msg = _('Retrieving modules for repository <%(repo_id)s>')
        msg_dict = {'repo_id': self.repo.id}
        _logger.info(msg, msg_dict)

        self.progress_report.modules_state = STATE_RUNNING

        # Do not send the update about the state yet. The counts need to be
        # set later once we know how many are new, so to prevent a situation
        # where the report reflects running but does not have counts, wait
        # until they are populated before sending the update to Pulp.

        start_time = datetime.now()

        try:
            self._do_import_modules(metadata)
        except Exception as e:
            msg = _('Exception importing modules for repository <%(repo_id)s>')
            msg_dict = {'repo_id': self.repo.id}
            _logger.exception(msg, msg_dict)
            self.progress_report.modules_state = STATE_FAILED
            self.progress_report.modules_error_message = _(
                'Error retrieving modules')
            self.progress_report.modules_exception = e
            self.progress_report.modules_traceback = sys.exc_info()[2]

            end_time = datetime.now()
            duration = end_time - start_time
            self.progress_report.modules_execution_time = duration.seconds

            self.progress_report.update_progress()

            return

        # Last update to the progress report before returning
        self.progress_report.modules_state = STATE_SUCCESS

        end_time = datetime.now()
        duration = end_time - start_time
        self.progress_report.modules_execution_time = duration.seconds

        self.progress_report.update_progress()

    def _do_import_modules(self, metadata):
        """
        Actual logic of the import. This method will do a best effort per module;
        if an individual module fails it will be recorded and the import will
        continue. This method will only raise an exception in an extreme case
        where it cannot react and continue.
        """
        downloader = self._create_downloader()
        self.downloader = downloader

        # Ease module lookup
        metadata_modules_by_key = dict([(m.unit_key_str, m)
                                        for m in metadata.modules])

        # Collect information about the repository's modules before changing it
        existing_module_ids_by_key = {}
        modules = repo_controller.find_repo_content_units(
            self.repo.repo_obj,
            unit_fields=Module.unit_key_fields,
            yield_content_unit=True)
        for module in modules:
            existing_module_ids_by_key[module.unit_key_str] = module.id

        new_unit_keys = self._resolve_new_units(
            existing_module_ids_by_key.keys(), metadata_modules_by_key.keys())

        # Once we know how many things need to be processed, we can update the progress report
        self.progress_report.modules_total_count = len(new_unit_keys)
        self.progress_report.modules_finished_count = 0
        self.progress_report.modules_error_count = 0
        self.progress_report.update_progress()

        # Add new units
        for key in new_unit_keys:
            if self._canceled:
                break
            module = metadata_modules_by_key[key]
            try:
                self._add_new_module(downloader, module)
                self.progress_report.modules_finished_count += 1
            except Exception as e:
                self.progress_report.add_failed_module(module, e,
                                                       sys.exc_info()[2])

            self.progress_report.update_progress()

        # Remove missing units if the configuration indicates to do so
        if self._should_remove_missing():
            remove_unit_keys = self._resolve_remove_units(
                existing_module_ids_by_key.keys(),
                metadata_modules_by_key.keys())
            doomed_ids = [
                existing_module_ids_by_key[key] for key in remove_unit_keys
            ]
            doomed_module_iterator = Module.objects.in_bulk(
                doomed_ids).itervalues()
            repo_controller.disassociate_units(self.repo,
                                               doomed_module_iterator)

        self.downloader = None

    def _add_new_module(self, downloader, module):
        """
        Performs the tasks for downloading and saving a new unit in Pulp.

        This method entirely skips modules that are already in the repository.

        :param downloader: downloader instance to use for retrieving the unit
        :type downloader: child of pulp_puppet.plugins.importers.downloaders.base.BaseDownloader

        :param module: module to download and add
        :type  module: pulp_puppet.plugins.db.models.Module
        """
        try:
            # Download the bits
            downloaded_filename = downloader.retrieve_module(
                self.progress_report, module)

            # Extract the extra metadata into the module
            metadata = metadata_module.extract_metadata(
                downloaded_filename, self.repo.working_dir)

            # Overwrite the author and name
            metadata.update(Module.split_filename(metadata['name']))

            # Create and save the Module
            module = Module.from_metadata(metadata)
            module.set_storage_path(os.path.basename(downloaded_filename))
            try:
                module.save_and_import_content(downloaded_filename)
            except NotUniqueError:
                module = module.__class__.objects.get(**module.unit_key)
            # Associate the module with the repo
            repo_controller.associate_single_unit(self.repo.repo_obj, module)
        finally:
            downloader.cleanup_module(module)

    def _resolve_new_units(self, existing_unit_keys, metadata_unit_keys):
        """
        Returns a list of metadata keys that are new to the repository.

        :return: list of unit keys; empty list if none are new
        :rtype:  list
        """
        return list(set(metadata_unit_keys) - set(existing_unit_keys))

    def _resolve_remove_units(self, existing_unit_keys, metadata_unit_keys):
        """
        Returns a list of unit keys that are in the repository but not in
        the current repository metadata.

        :return: list of unit keys; empty list if none have been removed
        :rtype:  list
        """
        return list(set(existing_unit_keys) - set(metadata_unit_keys))

    def _create_downloader(self):
        """
        Uses the configuratoin to determine which downloader style to use
        for this run.

        :return: one of the *Downloader classes in the downloaders module
        """

        feed = self.config.get(constants.CONFIG_FEED)
        return downloader_factory.get_downloader(feed, self.repo,
                                                 self.sync_conduit,
                                                 self.config)

    def _should_remove_missing(self):
        """
        Returns whether or not missing units should be removed.

        :return: true if missing units should be removed; false otherwise
        :rtype:  bool
        """

        if constants.CONFIG_REMOVE_MISSING not in self.config.keys():
            return constants.DEFAULT_REMOVE_MISSING
        else:
            return self.config.get_boolean(constants.CONFIG_REMOVE_MISSING)
Exemplo n.º 24
0
class SynchronizeWithDirectory(object):
    """
    A callable object used to synchronize with a directory of packaged puppet modules.
    The source of the import is a directory containing a PULP_MANIFEST and
    multiple puppet built puppet modules.

    :ivar conduit: Provides access to relevant Pulp functionality.
    :type conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
    :ivar config: Plugin configuration.
    :type config: pulp.plugins.config.PluginCallConfiguration
    :ivar report: An import report.
    :type report: SyncProgressReport
    :ivar canceled: The operation canceled flag.
    :type canceled: bool
    :ivar tmp_dir: The path to the temporary directory used to download files.
    :type tmp_dir: str
    """
    @staticmethod
    def _extract_metadata(module_path):
        """
        Extract the puppet module metadata from the tarball at the specified path.
        Search the tarball content for a file named: */metadata.json and extract
        it into temporary directory.  Then read the file and return the json decoded content.

        :param module_path: The fully qualified path to the module.
        :type module_path: str
        :return: The puppet module metadata.
        :rtype: dict
        """
        tmp_dir = mkdtemp(dir=os.path.dirname(module_path))
        try:
            with closing(tarfile.open(module_path)) as tarball:
                for member in tarball.getmembers():
                    path = member.name.split('/')
                    if path[-1] == constants.MODULE_METADATA_FILENAME:
                        tarball.extract(member, tmp_dir)
                        with open(os.path.join(tmp_dir, member.name)) as fp:
                            return json.load(fp)
        finally:
            shutil.rmtree(tmp_dir)

    def __init__(self, conduit, config):
        """
        :param conduit: Provides access to relevant Pulp functionality.
        :type conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config: Plugin configuration.
        :type config: pulp.plugins.config.PluginCallConfiguration
        """
        self.conduit = conduit
        self.config = config
        self.report = None
        self.canceled = False
        self.tmp_dir = None

    def feed_url(self):
        """
        Get the feed URL from the configuration and ensure it has a
        trailing '/' so urljoin will work correctly.
        :return: The feed URL.
        :rtype: str
        """
        url = self.config.get(constants.CONFIG_FEED)
        if not url.endswith('/'):
            url += '/'
        return url

    def cancel(self):
        """
        Cancel puppet module import.
        """
        self.canceled = True

    def _download(self, urls):
        """
        Download files by URL.
        Encapsulates nectar details and provides a simplified method
        of downloading files.

        :param urls: A list of tuples: (url, destination).  The *url* and
            *destination* are both strings.  The *destination* is the fully
            qualified path to where the file is to be downloaded.
        :type urls: list
        :return: The nectar reports.  Tuple of: (succeeded_reports, failed_reports)
        :rtype: tuple
        """
        feed_url = self.feed_url()
        nectar_config = importer_config_to_nectar_config(self.config.flatten())
        nectar_class = URL_TO_DOWNLOADER[urlparse(feed_url).scheme]
        downloader = nectar_class(nectar_config)
        listener = DownloadListener(self, downloader)

        request_list = []
        for url, destination in urls:
            request_list.append(DownloadRequest(url, destination))
        downloader.download(request_list)
        nectar_config.finalize()

        for report in listener.succeeded_reports:
            _logger.info(FETCH_SUCCEEDED %
                         dict(url=report.url, dst=report.destination))
        for report in listener.failed_reports:
            _logger.error(FETCH_FAILED %
                          dict(url=report.url, msg=report.error_msg))

        return listener.succeeded_reports, listener.failed_reports

    def _fetch_manifest(self):
        """
        Fetch the PULP_MANIFEST.
        After the manifest is fetched, the file is parsed into a list of tuples.

        :return: The manifest content.  List of: (name,checksum,size).
        :rtype: list
        """
        started = time()

        # report progress: started
        self.report.metadata_state = constants.STATE_RUNNING
        self.report.metadata_query_total_count = 1
        self.report.metadata_query_finished_count = 0
        self.report.update_progress()

        # download manifest
        destination = StringIO()
        feed_url = self.feed_url()
        url = urljoin(feed_url, constants.MANIFEST_FILENAME)
        succeeded_reports, failed_reports = self._download([(url, destination)
                                                            ])

        # report download failed
        if failed_reports:
            report = failed_reports[0]
            self.report.metadata_state = constants.STATE_FAILED
            self.report.metadata_error_message = report.error_msg
            self.report.metadata_execution_time = time() - started
            return None

        # report download succeeded
        self.report.metadata_state = constants.STATE_SUCCESS
        self.report.metadata_query_finished_count = 1
        self.report.metadata_current_query = None
        self.report.metadata_execution_time = time() - started
        self.report.update_progress()

        # return parsed manifest
        entries = destination.getvalue().split('\n')
        manifest = [tuple(e.split(',')) for e in entries if e]
        return manifest

    def _fetch_modules(self, manifest):
        """
        Fetch all of the modules referenced in the manifest.

        :param manifest: A parsed PULP_MANIFEST. List of: (name,checksum,size).
        :type  manifest: list

        :return: A list of paths to the fetched module files.
        :rtype:  list
        """
        self.started_fetch_modules = time()

        # report progress: started
        self.report.modules_state = constants.STATE_RUNNING
        self.report.modules_total_count = len(manifest)
        self.report.modules_finished_count = 0
        self.report.modules_error_count = 0
        self.report.update_progress()

        # download modules
        urls = []
        feed_url = self.feed_url()
        for path, checksum, size in manifest:
            url = urljoin(feed_url, path)
            destination = os.path.join(self.tmp_dir, os.path.basename(path))
            urls.append((url, destination))
        succeeded_reports, failed_reports = self._download(urls)

        # report failed downloads
        if failed_reports:
            self.report.modules_state = constants.STATE_FAILED
            self.report.modules_error_count = len(failed_reports)
            self.report.modules_individual_errors = []

        for report in failed_reports:
            self.report.modules_individual_errors.append(report.error_msg)
        self.report.update_progress()

        return [r.destination for r in succeeded_reports]

    def _import_modules(self, module_paths):
        """
        Import the puppet modules (tarballs) at the specified paths. This will also handle
        removing any modules in the local repository if they are no longer present on remote
        repository and the 'remove_missing' config value is True.

        :param module_paths: A list of paths to puppet module files.
        :type module_paths: list
        """
        criteria = UnitAssociationCriteria(
            type_ids=[constants.TYPE_PUPPET_MODULE],
            unit_fields=Module.UNIT_KEY_NAMES)
        local_units = self.conduit.get_units(criteria=criteria)
        local_unit_keys = [unit.unit_key for unit in local_units]
        remote_unit_keys = []

        for module_path in module_paths:
            if self.canceled:
                return
            puppet_manifest = self._extract_metadata(module_path)
            module = Module.from_json(puppet_manifest)
            remote_unit_keys.append(module.unit_key())

            # Even though we've already basically processed this unit, not doing this makes the
            # progress reporting confusing because it shows Pulp always importing all the modules.
            if module.unit_key() in local_unit_keys:
                self.report.modules_total_count -= 1
                continue
            _logger.debug(IMPORT_MODULE % dict(mod=module_path))
            self._add_module(module_path, module)
            self.report.modules_finished_count += 1
            self.report.update_progress()

        # Write the report, making sure we don't overwrite a failure in _fetch_modules
        if self.report.modules_state not in constants.COMPLETE_STATES:
            self.report.modules_state = constants.STATE_SUCCESS
        self.report.modules_execution_time = time(
        ) - self.started_fetch_modules
        self.report.update_progress()

        remove_missing = self.config.get_boolean(
            constants.CONFIG_REMOVE_MISSING)
        if remove_missing is None:
            remove_missing = constants.DEFAULT_REMOVE_MISSING
        if remove_missing:
            self._remove_missing(local_units, remote_unit_keys)

    def _remove_missing(self, local_units, remote_unit_keys):
        """
        Removes units from the local repository if they are missing from the remote repository.

        :param local_units:         A list of units associated with the current repository
        :type  local_units:         list of AssociatedUnit
        :param remote_unit_keys:    a list of all the unit keys in the remote repository
        :type  remote_unit_keys:    list of dict
        """
        for missing in [
                unit for unit in local_units
                if unit.unit_key not in remote_unit_keys
        ]:
            if self.canceled:
                return
            self.conduit.remove_unit(missing)

    def _add_module(self, path, module):
        """
        Add the specified module to Pulp using the conduit. This will both create the module
        and associate it to a repository. The module tarball is copied to the *storage path*
        only if it does not already exist at the *storage path*.

        :param path: The path to the downloaded module tarball.
        :type path: str
        :param module: A puppet module model object.
        :type module: Module
        """
        type_id = constants.TYPE_PUPPET_MODULE
        unit_key = module.unit_key()
        unit_metadata = module.unit_metadata()
        relative_path = constants.STORAGE_MODULE_RELATIVE_PATH % module.filename(
        )
        unit = self.conduit.init_unit(type_id, unit_key, unit_metadata,
                                      relative_path)
        if not os.path.exists(unit.storage_path):
            shutil.copy(path, unit.storage_path)
        self.conduit.save_unit(unit)

    def __call__(self, repository):
        """
        Invoke the callable object.
        All work is performed in the repository working directory and
        cleaned up after the call.

        :param repository: A Pulp repository object.
        :type repository: pulp.server.plugins.model.Repository
        :return: The final synchronization report.
        :rtype: SyncProgressReport
        """
        self.canceled = False
        self.report = SyncProgressReport(self.conduit)
        self.tmp_dir = mkdtemp(dir=repository.working_dir)
        try:
            manifest = self._fetch_manifest()
            if manifest is not None:
                module_paths = self._fetch_modules(manifest)
                self._import_modules(module_paths)
        finally:
            # Update the progress report one last time
            self.report.update_progress()

            shutil.rmtree(self.tmp_dir)
            self.tmp_dir = None

        return self.report
Exemplo n.º 25
0
class SynchronizeWithDirectory(object):
    """
    A callable object used to synchronize with a directory of packaged puppet modules.
    The source of the import is a directory containing a PULP_MANIFEST and
    multiple puppet built puppet modules.

    :ivar conduit: Provides access to relevant Pulp functionality.
    :type conduit: pulp.plugins.conduits.unit_import.ImportUnitConduit
    :ivar config: Plugin configuration.
    :type config: pulp.plugins.config.PluginCallConfiguration
    :ivar report: An import report.
    :type report: SyncProgressReport
    :ivar canceled: The operation canceled flag.
    :type canceled: bool
    :ivar tmp_dir: The path to the temporary directory used to download files.
    :type tmp_dir: str
    """

    @staticmethod
    def _extract_metadata(module_path):
        """
        Extract the puppet module metadata from the tarball at the specified path.
        Search the tarball content for a file named: */metadata.json and extract
        it into temporary directory.  Then read the file and return the json decoded content.

        :param module_path: The fully qualified path to the module.
        :type module_path: str
        :return: The puppet module metadata.
        :rtype: dict
        """
        tmp_dir = mkdtemp(dir=os.path.dirname(module_path))
        try:
            with closing(tarfile.open(module_path)) as tarball:
                for member in tarball.getmembers():
                    path = member.name.split('/')
                    if path[-1] == constants.MODULE_METADATA_FILENAME:
                        tarball.extract(member, tmp_dir)
                        with open(os.path.join(tmp_dir, member.name)) as fp:
                            return json.load(fp)
        finally:
            shutil.rmtree(tmp_dir)

    def __init__(self, conduit, config):
        """
        :param conduit: Provides access to relevant Pulp functionality.
        :type conduit: pulp.plugins.conduits.unit_import.ImportUnitConduit
        :param config: Plugin configuration.
        :type config: pulp.plugins.config.PluginCallConfiguration
        """
        self.conduit = conduit
        self.config = config
        self.report = None
        self.canceled = False
        self.tmp_dir = None

    def feed_url(self):
        """
        Get the feed URL from the configuration and ensure it has a
        trailing '/' so urljoin will work correctly.
        :return: The feed URL.
        :rtype: str
        """
        url = self.config.get(constants.CONFIG_FEED)
        if not url.endswith('/'):
            url += '/'
        return url

    def cancel(self):
        """
        Cancel puppet module import.
        """
        self.canceled = True

    def _run(self, inventory):
        """
        Perform the synchronization using the supplied inventory.

        :param inventory: An inventory object.
        :type inventory: Inventory
        """
        manifest = self._fetch_manifest()
        if manifest is None:
            # fetch manifest failed
            return
        module_paths = self._fetch_modules(manifest)
        imported_modules = self._import_modules(inventory, module_paths)
        self._purge_unwanted_modules(inventory, imported_modules)

    def _download(self, urls):
        """
        Download files by URL.
        Encapsulates nectar details and provides a simplified method
        of downloading files.

        :param urls: A list of tuples: (url, destination).  The *url* and
            *destination* are both strings.  The *destination* is the fully
            qualified path to where the file is to be downloaded.
        :type urls: list
        :return: The nectar reports.  Tuple of: (succeeded_reports, failed_reports)
        :rtype: tuple
        """
        feed_url = self.feed_url()
        nectar_config = importer_config_to_nectar_config(self.config.flatten())
        nectar_class = URL_TO_DOWNLOADER[urlparse(feed_url).scheme]
        downloader = nectar_class(nectar_config)
        listener = DownloadListener(self, downloader)

        request_list = []
        for url, destination in urls:
            request_list.append(DownloadRequest(url, destination))
        downloader.download(request_list)
        nectar_config.finalize()

        for report in listener.succeeded_reports:
            _LOG.info(FETCH_SUCCEEDED % dict(url=report.url, dst=report.destination))
        for report in listener.failed_reports:
            _LOG.error(FETCH_FAILED % dict(url=report.url, msg=report.error_msg))

        return listener.succeeded_reports, listener.failed_reports

    def _fetch_manifest(self):
        """
        Fetch the PULP_MANIFEST.
        After the manifest is fetched, the file is parsed into a list of tuples.

        :return: The manifest content.  List of: (name,checksum,size).
        :rtype: list
        """
        started = time()

        # report progress: started
        self.report.metadata_state = constants.STATE_RUNNING
        self.report.metadata_query_total_count = 1
        self.report.metadata_query_finished_count = 0
        self.report.update_progress()

        # download manifest
        destination = StringIO()
        feed_url = self.feed_url()
        url = urljoin(feed_url, constants.MANIFEST_FILENAME)
        succeeded_reports, failed_reports = self._download([(url, destination)])

        # report download failed
        if failed_reports:
            report = failed_reports[0]
            self.report.metadata_state = constants.STATE_FAILED
            self.report.metadata_error_message = report.error_msg
            self.report.metadata_execution_time = time() - started
            return None

        # report download succeeded
        self.report.metadata_state = constants.STATE_SUCCESS
        self.report.metadata_query_finished_count = 1
        self.report.metadata_current_query = None
        self.report.metadata_execution_time = time() - started
        self.report.update_progress()

        # return parsed manifest
        entries = destination.getvalue().split('\n')
        manifest = [tuple(e.split(',')) for e in entries if e]
        return manifest

    def _fetch_modules(self, manifest):
        """
        Fetch all of the modules referenced in the manifest.

        :param manifest: A parsed PULP_MANIFEST. List of: (name,checksum,size).
        :type  manifest: list

        :return: A list of paths to the fetched module files.
        :rtype:  list
        """
        self.started_fetch_modules = time()

        # report progress: started
        self.report.modules_state = constants.STATE_RUNNING
        self.report.modules_total_count = len(manifest)
        self.report.modules_finished_count = 0
        self.report.modules_error_count = 0
        self.report.update_progress()

        # download modules
        urls = []
        feed_url = self.feed_url()
        for path, checksum, size in manifest:
            url = urljoin(feed_url, path)
            destination = os.path.join(self.tmp_dir, os.path.basename(path))
            urls.append((url, destination))
        succeeded_reports, failed_reports = self._download(urls)

        # report failed downloads
        if failed_reports:
            self.report.modules_state = constants.STATE_FAILED
            self.report.modules_error_count = len(failed_reports)
            self.report.modules_individual_errors = []

        for report in failed_reports:
            self.report.modules_individual_errors.append(report.error_msg)
        self.report.update_progress()

        return [r.destination for r in succeeded_reports]

    def _import_modules(self, inventory, module_paths):
        """
        Import the puppet modules (tarballs) at the specified paths.

        :param inventory: A module inventory object.
        :type inventory: Inventory
        :param module_paths: A list of paths to puppet module files.
        :type module_paths: list
        :return: A list of the imported module unit keys.
        :rtype: list
        """
        imported_modules = []
        for module_path in module_paths:
            if self.canceled:
                return []
            puppet_manifest = self._extract_metadata(module_path)
            module = Module.from_json(puppet_manifest)
            if inventory.already_associated(module):
                # Decrement the total number of modules we're importing
                self.report.modules_total_count -= 1
                continue
            _LOG.info(IMPORT_MODULE % dict(mod=module_path))
            imported_modules.append(module.unit_key())
            self._add_module(module_path, module)
            self.report.modules_finished_count += 1
            self.report.update_progress()

        # Write the report, making sure we don't overwrite the a failure in _fetch_modules
        if self.report.modules_state != constants.STATE_FAILED:
            self.report.modules_state = constants.STATE_SUCCESS
        self.report.modules_execution_time = time() - self.started_fetch_modules
        self.report.update_progress()

        return imported_modules

    def _add_module(self, path, module):
        """
        Add the specified module to Pulp using the conduit.
        This will both create the module and associate it to a repository.
        The module tarball is copied to the *storage path* only if it does
        not already exist at the *storage path*.

        :param path: The path to the downloaded module tarball.
        :type path: str
        :param module: A puppet module model object.
        :type module: Module
        """
        type_id = constants.TYPE_PUPPET_MODULE
        unit_key = module.unit_key()
        unit_metadata = module.unit_metadata()
        relative_path = constants.STORAGE_MODULE_RELATIVE_PATH % module.filename()
        unit = self.conduit.init_unit(type_id, unit_key, unit_metadata, relative_path)
        if not os.path.exists(unit.storage_path):
            shutil.copy(path, unit.storage_path)
        self.conduit.save_unit(unit)

    def _purge_unwanted_modules(self, inventory, imported_modules):
        """
        Purge unwanted puppet modules.
        Unwanted modules are those modules associated with the repository but
        not imported during this operation.  Skipped when the configuration does
        not specify do perform it.  The inventory is used to determine which
        modules should be removed.

        :param inventory: A module inventory object.
        :type inventory: Inventory
        :param imported_modules: List of modules import.  List of: Module.
        :type imported_modules: list
        """
        purge_option = self.config.get_boolean(constants.CONFIG_REMOVE_MISSING)
        if purge_option is None:
            purge_option = constants.DEFAULT_REMOVE_MISSING
        if not purge_option:
            # no purge requested
            return
        for unit_key in inventory.unwanted_modules(imported_modules):
            if self.canceled:
                return
            self.conduit.remove_unit(unit_key)

    def __call__(self, repository):
        """
        Invoke the callable object.
        All work is performed in the repository working directory and
        cleaned up after the call.

        :param repository: A Pulp repository object.
        :type repository: pulp.server.plugins.model.Repository
        :return: The final synchronization report.
        :rtype: SyncProgressReport
        """
        self.canceled = False
        self.report = SyncProgressReport(self.conduit)
        self.tmp_dir = mkdtemp(dir=repository.working_dir)
        try:
            inventory = Inventory(self.conduit)
            self._run(inventory)
        finally:
            # Update the progress report one last time
            self.report.update_progress()

            shutil.rmtree(self.tmp_dir)
            self.tmp_dir = None

        return self.report
Exemplo n.º 26
0
class SynchronizeWithDirectory(object):
    """
    A callable object used to synchronize with a directory of packaged puppet modules.
    The source of the import is a directory containing a PULP_MANIFEST and
    multiple puppet built puppet modules.

    :ivar repo: A Pulp repository object
    :type repo: pulp.plugins.model.Repository
    :ivar conduit: Provides access to relevant Pulp functionality.
    :type conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
    :ivar config: Plugin configuration.
    :type config: pulp.plugins.config.PluginCallConfiguration
    :ivar report: An import report.
    :type report: SyncProgressReport
    :ivar canceled: The operation canceled flag.
    :type canceled: bool
    :ivar tmp_dir: The path to the temporary directory used to download files.
    :type tmp_dir: str
    """
    @staticmethod
    def _extract_metadata(module_path):
        """
        Extract the puppet module metadata from the tarball at the specified path.
        Search the tarball content for a file named: */metadata.json and extract
        it into temporary directory. Then read the file and return the json decoded content.

        :param module_path: The fully qualified path to the module.
        :type module_path: str

        :return: The puppet module metadata.
        :rtype: dict
        """
        tmp_dir = mkdtemp(dir=os.path.dirname(module_path))
        try:
            with closing(tarfile.open(module_path)) as tarball:
                for member in tarball.getmembers():
                    path = member.name.split('/')
                    if path[-1] == constants.MODULE_METADATA_FILENAME:
                        tarball.extract(member, tmp_dir)
                        with open(os.path.join(tmp_dir, member.name)) as fp:
                            return json.load(fp)
        finally:
            shutil.rmtree(tmp_dir)

    def __init__(self, repo, conduit, config):
        """
        :param repo: A Pulp repository object
        :type repo: pulp.plugins.model.Repository
        :param conduit: Provides access to relevant Pulp functionality.
        :type conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config: Plugin configuration.
        :type config: pulp.plugins.config.PluginCallConfiguration
        """
        self.repo = repo
        self.conduit = conduit
        self.config = config
        self.report = None
        self.canceled = False
        self.tmp_dir = None

    def feed_url(self):
        """
        Get the feed URL from the configuration and ensure it has a trailing '/' so urljoin will
        work correctly.

        :return: The feed URL.
        :rtype: str
        """
        url = self.config.get(constants.CONFIG_FEED)
        if not url.endswith('/'):
            url += '/'
        return url

    def cancel(self):
        """
        Cancel puppet module import.
        """
        self.canceled = True

    def _download(self, urls):
        """
        Download files by URL.

        Encapsulates nectar details and provides a simplified method of downloading files.

        :param urls: A list of tuples: (url, destination).  The *url* and *destination* are both
                     strings.  The *destination* is the fully qualified path to where the file is
                     to be downloaded.
        :type urls: list

        :return: The nectar reports.  Tuple of: (succeeded_reports, failed_reports)
        :rtype: tuple
        """
        feed_url = self.feed_url()
        nectar_config = importer_config_to_nectar_config(self.config.flatten())
        nectar_class = URL_TO_DOWNLOADER[urlparse(feed_url).scheme]
        downloader = nectar_class(nectar_config)
        listener = DownloadListener(self, downloader)

        request_list = []
        for url, destination in urls:
            request_list.append(DownloadRequest(url, destination))
        downloader.download(request_list)
        nectar_config.finalize()

        for report in listener.succeeded_reports:
            _logger.info(FETCH_SUCCEEDED,
                         dict(url=report.url, dst=report.destination))
        for report in listener.failed_reports:
            _logger.error(FETCH_FAILED,
                          dict(url=report.url, msg=report.error_msg))

        return listener.succeeded_reports, listener.failed_reports

    def _fetch_manifest(self):
        """
        Fetch the PULP_MANIFEST.

        After the manifest is fetched, the file is parsed into a list of tuples.

        :return: The manifest content.  List of: (name,checksum,size).
        :rtype: list
        """
        started = time()

        # report progress: started
        self.report.metadata_state = constants.STATE_RUNNING
        self.report.metadata_query_total_count = 1
        self.report.metadata_query_finished_count = 0
        self.report.update_progress()

        # download manifest
        destination = StringIO()
        feed_url = self.feed_url()
        url = urljoin(feed_url, constants.MANIFEST_FILENAME)
        succeeded_reports, failed_reports = self._download([(url, destination)
                                                            ])

        # report download failed
        if failed_reports:
            report = failed_reports[0]
            self.report.metadata_state = constants.STATE_FAILED
            self.report.metadata_error_message = report.error_msg
            self.report.metadata_execution_time = time() - started
            return None

        # report download succeeded
        self.report.metadata_state = constants.STATE_SUCCESS
        self.report.metadata_query_finished_count = 1
        self.report.metadata_current_query = None
        self.report.metadata_execution_time = time() - started
        self.report.update_progress()

        # return parsed manifest
        entries = destination.getvalue().split('\n')
        manifest = [tuple(e.split(',')) for e in entries if e]
        return manifest

    def _fetch_modules(self, manifest):
        """
        Fetch all of the modules referenced in the manifest.

        :param manifest: A parsed PULP_MANIFEST. List of: (name,checksum,size).
        :type manifest: list

        :return: A list of paths to the fetched module files.
        :rtype: list
        """
        self.started_fetch_modules = time()

        # report progress: started
        self.report.modules_state = constants.STATE_RUNNING
        self.report.modules_total_count = len(manifest)
        self.report.modules_finished_count = 0
        self.report.modules_error_count = 0
        self.report.update_progress()

        # download modules
        urls = []
        feed_url = self.feed_url()
        for path, checksum, size in manifest:
            url = urljoin(feed_url, path)
            destination = os.path.join(self.tmp_dir, os.path.basename(path))
            urls.append((url, destination))
        succeeded_reports, failed_reports = self._download(urls)

        # report failed downloads
        if failed_reports:
            self.report.modules_state = constants.STATE_FAILED
            self.report.modules_error_count = len(failed_reports)
            self.report.modules_individual_errors = []

        for report in failed_reports:
            self.report.modules_individual_errors.append(report.error_msg)
        self.report.update_progress()

        return [r.destination for r in succeeded_reports]

    def _import_modules(self, module_paths):
        """
        Import the puppet modules (tarballs) at the specified paths. This will also handle
        removing any modules in the local repository if they are no longer present on remote
        repository and the 'remove_missing' config value is True.

        :param module_paths: A list of paths to puppet module files.
        :type module_paths: list
        """
        existing_module_ids_by_key = {}
        for module in Module.objects.only(*Module.unit_key_fields).all():
            existing_module_ids_by_key[module.unit_key_str] = module.id

        remote_paths = {}

        list_of_modules = []
        for module_path in module_paths:
            puppet_manifest = self._extract_metadata(module_path)
            module = Module.from_metadata(puppet_manifest)
            remote_paths[module.unit_key_str] = module_path
            list_of_modules.append(module)

        pub_step = publish_step.GetLocalUnitsStep(
            constants.IMPORTER_TYPE_ID,
            available_units=list_of_modules,
            repo=self.repo)
        pub_step.process_main()
        self.report.modules_total_count = len(pub_step.units_to_download)

        for module in pub_step.units_to_download:
            remote_path = remote_paths[module.unit_key_str]
            if self.canceled:
                return
            _logger.debug(IMPORT_MODULE, dict(mod=remote_path))

            module.set_storage_path(os.path.basename(remote_path))
            try:
                module.save_and_import_content(remote_path)
            except NotUniqueError:
                module = module.__class__.objects.get(**module.unit_key)

            repo_controller.associate_single_unit(self.repo.repo_obj, module)

            self.report.modules_finished_count += 1
            self.report.update_progress()

        # Write the report, making sure we don't overwrite a failure in _fetch_modules
        if self.report.modules_state not in constants.COMPLETE_STATES:
            self.report.modules_state = constants.STATE_SUCCESS
        self.report.modules_execution_time = time(
        ) - self.started_fetch_modules
        self.report.update_progress()

        remove_missing = self.config.get_boolean(
            constants.CONFIG_REMOVE_MISSING)
        if remove_missing is None:
            remove_missing = constants.DEFAULT_REMOVE_MISSING
        if remove_missing:
            self._remove_missing(existing_module_ids_by_key,
                                 remote_paths.keys())

    def _remove_missing(self, existing_module_ids_by_key, remote_unit_keys):
        """
        Removes units from the local repository if they are missing from the remote repository.

        :param existing_module_ids_by_key: A dict keyed on Module unit key associated with the
            current repository. The values are the mongoengine id of the corresponding Module.
        :type existing_module_ids_by_key: dict of Module.id values keyed on unit_key_str
        :param remote_unit_keys: A list of all the Module keys in the remote repository
        :type remote_unit_keys: list of strings
        """
        keys_to_remove = list(
            set(existing_module_ids_by_key.keys()) - set(remote_unit_keys))
        doomed_ids = [
            existing_module_ids_by_key[key] for key in keys_to_remove
        ]
        doomed_module_iterator = Module.objects.in_bulk(
            doomed_ids).itervalues()
        repo_controller.disassociate_units(self.repo, doomed_module_iterator)

    def __call__(self):
        """
        Invoke the callable object.

        All work is performed in the repository working directory and cleaned up after the call.

        :return: The final synchronization report.
        :rtype: SyncProgressReport
        """
        self.canceled = False
        self.report = SyncProgressReport(self.conduit)
        self.tmp_dir = mkdtemp(dir=self.repo.working_dir)
        try:
            manifest = self._fetch_manifest()
            if manifest is not None:
                module_paths = self._fetch_modules(manifest)
                self._import_modules(module_paths)
        finally:
            # Update the progress report one last time
            self.report.update_progress()

            shutil.rmtree(self.tmp_dir)
            self.tmp_dir = None

        return self.report
Exemplo n.º 27
0
class SynchronizeWithDirectory(object):
    """
    A callable object used to synchronize with a directory of packaged puppet modules.
    The source of the import is a directory containing a PULP_MANIFEST and
    multiple puppet built puppet modules.

    :ivar repo: A Pulp repository object
    :type repo: pulp.plugins.model.Repository
    :ivar conduit: Provides access to relevant Pulp functionality.
    :type conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
    :ivar config: Plugin configuration.
    :type config: pulp.plugins.config.PluginCallConfiguration
    :ivar report: An import report.
    :type report: SyncProgressReport
    :ivar canceled: The operation canceled flag.
    :type canceled: bool
    :ivar tmp_dir: The path to the temporary directory used to download files.
    :type tmp_dir: str
    """

    @staticmethod
    def _extract_metadata(module_path):
        """
        Extract the puppet module metadata from the tarball at the specified path.
        Search the tarball content for a file named: */metadata.json and extract
        it into temporary directory. Then read the file and return the json decoded content.

        :param module_path: The fully qualified path to the module.
        :type module_path: str

        :return: The puppet module metadata.
        :rtype: dict
        """
        tmp_dir = mkdtemp(dir=os.path.dirname(module_path))
        try:
            with closing(tarfile.open(module_path)) as tarball:
                for member in tarball.getmembers():
                    path = member.name.split('/')
                    if path[-1] == constants.MODULE_METADATA_FILENAME:
                        tarball.extract(member, tmp_dir)
                        with open(os.path.join(tmp_dir, member.name)) as fp:
                            return json.load(fp)
        finally:
            shutil.rmtree(tmp_dir)

    def __init__(self, repo, conduit, config):
        """
        :param repo: A Pulp repository object
        :type repo: pulp.plugins.model.Repository
        :param conduit: Provides access to relevant Pulp functionality.
        :type conduit: pulp.plugins.conduits.repo_sync.RepoSyncConduit
        :param config: Plugin configuration.
        :type config: pulp.plugins.config.PluginCallConfiguration
        """
        self.repo = repo
        self.conduit = conduit
        self.config = config
        self.report = None
        self.canceled = False
        self.tmp_dir = None

    def feed_url(self):
        """
        Get the feed URL from the configuration and ensure it has a trailing '/' so urljoin will
        work correctly.

        :return: The feed URL.
        :rtype: str
        """
        url = self.config.get(constants.CONFIG_FEED)
        if not url.endswith('/'):
            url += '/'
        return url

    def cancel(self):
        """
        Cancel puppet module import.
        """
        self.canceled = True

    def _download(self, urls):
        """
        Download files by URL.

        Encapsulates nectar details and provides a simplified method of downloading files.

        :param urls: A list of tuples: (url, destination).  The *url* and *destination* are both
                     strings.  The *destination* is the fully qualified path to where the file is
                     to be downloaded.
        :type urls: list

        :return: The nectar reports.  Tuple of: (succeeded_reports, failed_reports)
        :rtype: tuple
        """
        feed_url = self.feed_url()
        nectar_config = importer_config_to_nectar_config(self.config.flatten())
        nectar_class = URL_TO_DOWNLOADER[urlparse(feed_url).scheme]
        downloader = nectar_class(nectar_config)
        listener = DownloadListener(self, downloader)

        request_list = []
        for url, destination in urls:
            request_list.append(DownloadRequest(url, destination))
        downloader.download(request_list)
        nectar_config.finalize()

        for report in listener.succeeded_reports:
            _logger.info(FETCH_SUCCEEDED, dict(url=report.url, dst=report.destination))
        for report in listener.failed_reports:
            _logger.error(FETCH_FAILED, dict(url=report.url, msg=report.error_msg))

        return listener.succeeded_reports, listener.failed_reports

    def _fetch_manifest(self):
        """
        Fetch the PULP_MANIFEST.

        After the manifest is fetched, the file is parsed into a list of tuples.

        :return: The manifest content.  List of: (name,checksum,size).
        :rtype: list
        """
        started = time()

        # report progress: started
        self.report.metadata_state = constants.STATE_RUNNING
        self.report.metadata_query_total_count = 1
        self.report.metadata_query_finished_count = 0
        self.report.update_progress()

        # download manifest
        destination = StringIO()
        feed_url = self.feed_url()
        url = urljoin(feed_url, constants.MANIFEST_FILENAME)
        succeeded_reports, failed_reports = self._download([(url, destination)])

        # report download failed
        if failed_reports:
            report = failed_reports[0]
            self.report.metadata_state = constants.STATE_FAILED
            self.report.metadata_error_message = report.error_msg
            self.report.metadata_execution_time = time() - started
            return None

        # report download succeeded
        self.report.metadata_state = constants.STATE_SUCCESS
        self.report.metadata_query_finished_count = 1
        self.report.metadata_current_query = None
        self.report.metadata_execution_time = time() - started
        self.report.update_progress()

        # return parsed manifest
        entries = destination.getvalue().split('\n')
        manifest = [tuple(e.split(',')) for e in entries if e]
        return manifest

    def _fetch_modules(self, manifest):
        """
        Fetch all of the modules referenced in the manifest.

        :param manifest: A parsed PULP_MANIFEST. List of: (name,checksum,size).
        :type manifest: list

        :return: A list of paths to the fetched module files.
        :rtype: list
        """
        self.started_fetch_modules = time()

        # report progress: started
        self.report.modules_state = constants.STATE_RUNNING
        self.report.modules_total_count = len(manifest)
        self.report.modules_finished_count = 0
        self.report.modules_error_count = 0
        self.report.update_progress()

        # download modules
        urls = []
        feed_url = self.feed_url()
        for path, checksum, size in manifest:
            url = urljoin(feed_url, path)
            destination = os.path.join(self.tmp_dir, os.path.basename(path))
            urls.append((url, destination))
        succeeded_reports, failed_reports = self._download(urls)

        # report failed downloads
        if failed_reports:
            self.report.modules_state = constants.STATE_FAILED
            self.report.modules_error_count = len(failed_reports)
            self.report.modules_individual_errors = []

        for report in failed_reports:
            self.report.modules_individual_errors.append(report.error_msg)
        self.report.update_progress()

        return [r.destination for r in succeeded_reports]

    def _import_modules(self, module_paths):
        """
        Import the puppet modules (tarballs) at the specified paths. This will also handle
        removing any modules in the local repository if they are no longer present on remote
        repository and the 'remove_missing' config value is True.

        :param module_paths: A list of paths to puppet module files.
        :type module_paths: list
        """
        existing_module_ids_by_key = {}
        for module in Module.objects.only(*Module.unit_key_fields).all():
            existing_module_ids_by_key[module.unit_key_str] = module.id

        remote_paths = {}

        list_of_modules = []
        for module_path in module_paths:
            puppet_manifest = self._extract_metadata(module_path)
            module = Module.from_metadata(puppet_manifest)
            remote_paths[module.unit_key_str] = module_path
            list_of_modules.append(module)

        pub_step = publish_step.GetLocalUnitsStep(constants.IMPORTER_TYPE_ID, available_units=list_of_modules, repo=self.repo)
        pub_step.process_main()
        self.report.modules_total_count = len(pub_step.units_to_download)

        for module in pub_step.units_to_download:
            remote_path = remote_paths[module.unit_key_str]
            if self.canceled:
                return
            _logger.debug(IMPORT_MODULE, dict(mod=remote_path))

            module.set_storage_path(os.path.basename(remote_path))
            try:
                module.save_and_import_content(remote_path)
            except NotUniqueError:
                module = module.__class__.objects.get(**module.unit_key)

            repo_controller.associate_single_unit(self.repo.repo_obj, module)

            self.report.modules_finished_count += 1
            self.report.update_progress()

        # Write the report, making sure we don't overwrite a failure in _fetch_modules
        if self.report.modules_state not in constants.COMPLETE_STATES:
            self.report.modules_state = constants.STATE_SUCCESS
        self.report.modules_execution_time = time() - self.started_fetch_modules
        self.report.update_progress()

        remove_missing = self.config.get_boolean(constants.CONFIG_REMOVE_MISSING)
        if remove_missing is None:
            remove_missing = constants.DEFAULT_REMOVE_MISSING
        if remove_missing:
            self._remove_missing(existing_module_ids_by_key, remote_paths.keys())

    def _remove_missing(self, existing_module_ids_by_key, remote_unit_keys):
        """
        Removes units from the local repository if they are missing from the remote repository.

        :param existing_module_ids_by_key: A dict keyed on Module unit key associated with the
            current repository. The values are the mongoengine id of the corresponding Module.
        :type existing_module_ids_by_key: dict of Module.id values keyed on unit_key_str
        :param remote_unit_keys: A list of all the Module keys in the remote repository
        :type remote_unit_keys: list of strings
        """
        keys_to_remove = list(set(existing_module_ids_by_key.keys()) - set(remote_unit_keys))
        doomed_ids = [existing_module_ids_by_key[key] for key in keys_to_remove]
        doomed_module_iterator = Module.objects.in_bulk(doomed_ids).itervalues()
        repo_controller.disassociate_units(self.repo, doomed_module_iterator)

    def __call__(self):
        """
        Invoke the callable object.

        All work is performed in the repository working directory and cleaned up after the call.

        :return: The final synchronization report.
        :rtype: SyncProgressReport
        """
        self.canceled = False
        self.report = SyncProgressReport(self.conduit)
        self.tmp_dir = mkdtemp(dir=self.repo.working_dir)
        try:
            manifest = self._fetch_manifest()
            if manifest is not None:
                module_paths = self._fetch_modules(manifest)
                self._import_modules(module_paths)
        finally:
            # Update the progress report one last time
            self.report.update_progress()

            shutil.rmtree(self.tmp_dir)
            self.tmp_dir = None

        return self.report
Exemplo n.º 28
0
class PuppetModuleSyncRun(object):
    """
    Used to perform a single sync of a puppet repository. This class will
    maintain state relevant to the run and should not be reused across runs.
    """
    def __init__(self, repo, sync_conduit, config, is_cancelled_call):
        self.repo = repo
        self.sync_conduit = sync_conduit
        self.config = config
        self.is_cancelled_call = is_cancelled_call

        self.progress_report = SyncProgressReport(sync_conduit)

    def perform_sync(self):
        """
        Performs the sync operation according to the configured state of the
        instance. The report to be sent back to Pulp is returned from this
        call. This call will make calls into the conduit's progress update
        as appropriate.

        This call executes serially. No threads are created by this call. It
        will not return until either a step fails or the entire sync is
        completed.

        :return: the report object to return to Pulp from the sync call
        :rtype:  pulp.plugins.model.SyncReport
        """
        _LOG.info('Beginning sync for repository <%s>' % self.repo.id)

        # quit now if there is no feed URL defined
        if not self.config.get(constants.CONFIG_FEED):
            self.progress_report.metadata_state = STATE_FAILED
            self.progress_report.metadata_error_message = _(
                'Cannot perform repository sync on a repository with no feed')
            self.progress_report.update_progress()
            return self.progress_report.build_final_report()

        try:
            metadata = self._parse_metadata()
            if not metadata:
                report = self.progress_report.build_final_report()
                return report

            self._import_modules(metadata)
        finally:
            # One final progress update before finishing
            self.progress_report.update_progress()

            report = self.progress_report.build_final_report()
            return report

    def _parse_metadata(self):
        """
        Takes the necessary actions (according to the run configuration) to
        retrieve and parse the repository's metadata. This call will return
        either the successfully parsed metadata or None if it could not
        be retrieved or parsed. The progress report will be updated with the
        appropriate description of what went wrong in the event of an error,
        so the caller should interpet a None return as an error occuring and
        not continue the sync.

        :return: object representation of the metadata
        :rtype:  RepositoryMetadata
        """
        _LOG.info('Beginning metadata retrieval for repository <%s>' %
                  self.repo.id)

        self.progress_report.metadata_state = STATE_RUNNING
        self.progress_report.update_progress()

        start_time = datetime.now()

        # Retrieve the metadata from the source
        try:
            downloader = self._create_downloader()
            metadata_json_docs = downloader.retrieve_metadata(
                self.progress_report)
        except Exception, e:
            _LOG.exception(
                'Exception while retrieving metadata for repository <%s>' %
                self.repo.id)
            self.progress_report.metadata_state = STATE_FAILED
            self.progress_report.metadata_error_message = _(
                'Error downloading metadata')
            self.progress_report.metadata_exception = e
            self.progress_report.metadata_traceback = sys.exc_info()[2]

            end_time = datetime.now()
            duration = end_time - start_time
            self.progress_report.metadata_execution_time = duration.seconds

            self.progress_report.update_progress()

            return None

        # Parse the retrieved metadata documents
        try:
            metadata = RepositoryMetadata()
            for doc in metadata_json_docs:
                metadata.update_from_json(doc)
        except Exception, e:
            _LOG.exception('Exception parsing metadata for repository <%s>' %
                           self.repo.id)
            self.progress_report.metadata_state = STATE_FAILED
            self.progress_report.metadata_error_message = _(
                'Error parsing repository modules metadata document')
            self.progress_report.metadata_exception = e
            self.progress_report.metadata_traceback = sys.exc_info()[2]

            end_time = datetime.now()
            duration = end_time - start_time
            self.progress_report.metadata_execution_time = duration.seconds

            self.progress_report.update_progress()

            return None