示例#1
0
    async def run(self):
        """
        Build and emit `DeclarativeContent` from the Manifest data.

        If a cookbook specifier is set in the remote, cookbooks are filtered
        using this specifier.

        """
        with ProgressBar(message="Downloading Metadata", total=1) as pb:
            downloader = self.remote.get_downloader(url=urljoin(self.remote.url + "/", "universe"))
            result = await downloader.run()
            pb.increment()

        cookbook_names = self.remote.specifier_cookbook_names()

        with ProgressBar(message="Parsing Metadata") as pb:
            universe = Universe(result.path)
            for entry in universe.read():
                if cookbook_names and entry.name not in cookbook_names:
                    continue
                cookbook = CookbookPackageContent(
                    name=entry.name, version=entry.version, dependencies=entry.dependencies
                )
                artifact = Artifact()
                da = DeclarativeArtifact(
                    artifact=artifact,
                    url=entry.download_url,
                    relative_path=cookbook.relative_path(),
                    remote=self.remote,
                    deferred_download=not self.download_artifacts,
                )
                dc = DeclarativeContent(content=cookbook, d_artifacts=[da])
                pb.increment()
                await self.put(dc)
示例#2
0
    async def run(self):
        """
        Build and emit `DeclarativeContent` from the Manifest data.

        Args:
            in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue.
            out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to

        """
        with ProgressBar(message='Downloading Metadata') as pb:
            parsed_url = urlparse(self.remote.url)
            root_dir = os.path.dirname(parsed_url.path)
            downloader = self.remote.get_downloader(url=self.remote.url)
            result = await downloader.run()
            pb.increment()

        with ProgressBar(message='Parsing Metadata') as pb:
            for entry in self.read_my_metadata_file_somehow(result.path):
                path = os.path.join(root_dir, entry['picture'])
                url = urlunparse(parsed_url._replace(path=path))
                unit = Animal(**entry)  # make the content unit in memory-only
                artifact = Artifact()  # make Artifact in memory-only
                da =  DeclarativeArtifact(artifact, url, entry['picture'], self.remote)
                dc = DeclarativeContent(content=unit, d_artifacts=[da])
                pb.increment()
                await self.put(dc)
示例#3
0
    async def run(self):
        """
        Build and emit `DeclarativeContent` from the Manifest data.
        """
        deferred_download = (self.remote.policy != Remote.IMMEDIATE)  # Interpret download policy
        with ProgressBar(message='Downloading Metadata') as pb:
            parsed_url = urlparse(self.remote.url)
            root_dir = os.path.dirname(parsed_url.path)
            downloader = self.remote.get_downloader(url=self.remote.url)
            result = await downloader.run()
            pb.increment()

        with ProgressBar(message='Parsing Metadata') as pb:
            manifest = Manifest(result.path)
            for entry in manifest.read():
                path = os.path.join(root_dir, entry.relative_path)
                url = urlunparse(parsed_url._replace(path=path))
                file = FileContent(relative_path=entry.relative_path, digest=entry.digest)
                artifact = Artifact(size=entry.size, sha256=entry.digest)
                da = DeclarativeArtifact(
                    artifact=artifact,
                    url=url,
                    relative_path=entry.relative_path,
                    remote=self.remote,
                    deferred_download=deferred_download,
                )
                dc = DeclarativeContent(content=file, d_artifacts=[da])
                pb.increment()
                await self.put(dc)
示例#4
0
    async def __call__(self, in_q, out_q):
        """
        Build and emit `DeclarativeContent` from the Manifest data.

        Args:
            in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue.
            out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to
        """
        with ProgressBar(message='Downloading Metadata') as pb:
            parsed_url = urlparse(self.remote.url)
            root_dir = os.path.dirname(parsed_url.path)
            downloader = self.remote.get_downloader(self.remote.url)
            result = await downloader.run()
            pb.increment()

        with ProgressBar(message='Parsing Metadata') as pb:
            manifest = Manifest(result.path)
            for entry in manifest.read():
                path = os.path.join(root_dir, entry.relative_path)
                url = urlunparse(parsed_url._replace(path=path))
                file = FileContent(relative_path=entry.relative_path,
                                   digest=entry.digest)
                artifact = Artifact(size=entry.size, sha256=entry.digest)
                da = DeclarativeArtifact(artifact, url, entry.relative_path,
                                         self.remote)
                dc = DeclarativeContent(content=file, d_artifacts=[da])
                pb.increment()
                await out_q.put(dc)
        await out_q.put(None)
示例#5
0
    async def run(self):
        """
        Parse Release content units.

        Update release content with information obtained from its artifact.
        """
        with ProgressBar(message="Update Release units") as pb:
            async for d_content in self.items():
                if isinstance(d_content.content, Release):
                    release = d_content.content
                    release_artifact = d_content.d_artifacts[0].artifact
                    release.sha256 = release_artifact.sha256
                    release_dict = deb822.Release(release_artifact.file)
                    release.codename = release_dict["Codename"]
                    release.suite = release_dict["Suite"]
                    # TODO split of extra stuff e.g. : 'updates/main' -> 'main'
                    release.components = _filter_ssl(
                        release_dict["Components"], self.components
                    )
                    release.architectures = _filter_ssl(
                        release_dict["Architectures"], self.architectures
                    )
                    log.debug("Codename: {}".format(release.codename))
                    log.debug("Components: {}".format(release.components))
                    log.debug("Architectures: {}".format(release.architectures))
                    pb.increment()
                await self.put(d_content)
示例#6
0
    async def run(self):
        """
        Parse PackageIndex content units.

        Ensure, that an uncompressed artifact is available.
        """
        with ProgressBar(message='Update PackageIndex units') as pb:
            async for d_content in self.items():
                if isinstance(d_content.content, PackageIndex):
                    if not d_content.d_artifacts:
                        raise NoPackageIndexFile()

                    content = d_content.content
                    if not [da for da in d_content.d_artifacts
                            if da.artifact.sha256 == content.sha256]:
                        # No main_artifact found uncompress one
                        filename = _uncompress_artifact(d_content.d_artifacts[0].artifact)
                        da = DeclarativeArtifact(
                            Artifact(sha256=content.sha256),
                            filename,
                            content.relative_path,
                            d_content.d_artifacts[0].remote,
                        )
                        d_content.d_artifacts.append(da)
                        await da.download()
                        da.artifact.save()
                        log.info("*** Expected: {} *** Uncompressed: {} ***".format(
                            content.sha256, da.artifact.sha256))

                    pb.increment()
                await self.put(d_content)
示例#7
0
 async def run(self):
     """
     Build and emit `DeclarativeContent` from the ansible metadata.
     """
     with ProgressBar(message='Parsing Role Metadata') as pb:
         async for metadata in self._fetch_roles():
             for version in metadata['summary_fields']['versions']:
                 url = GITHUB_URL % (
                     metadata['github_user'],
                     metadata['github_repo'],
                     version['name'],
                 )
                 role = Role(version=version['name'],
                             name=metadata['name'],
                             namespace=metadata['namespace'])
                 relative_path = "%s/%s/%s.tar.gz" % (
                     metadata['namespace'],
                     metadata['name'],
                     version['name'],
                 )
                 d_artifact = DeclarativeArtifact(
                     artifact=Artifact(),
                     url=url,
                     relative_path=relative_path,
                     remote=self.remote,
                     deferred_download=self.deferred_download,
                 )
                 d_content = DeclarativeContent(
                     content=role,
                     d_artifacts=[d_artifact],
                 )
                 pb.increment()
                 await self.put(d_content)
示例#8
0
async def migrate_content(content_models):
    """
    A coroutine to initiate content migration for each plugin.

    Args:
         content_models: List of Pulp 2 content models to migrate data for
    """
    pre_migrators = []
    content_migrators = []
    for content_model in content_models:
        pre_migrators.append(pre_migrate_content(content_model))

    _logger.debug('Pre-migrating Pulp 2 content')
    await asyncio.wait(pre_migrators)

    with ProgressBar(message='Migrating content to Pulp 3', total=0) as pb:
        # schedule content migration into Pulp 3 using pre-migrated Pulp 2 content
        for content_model in content_models:
            content_migrators.append(
                content_model.pulp_2to3_detail.migrate_content_to_pulp3())

            # only used for progress bar counters
            content_type = content_model.pulp_2to3_detail.type
            pulp2content_qs = Pulp2Content.objects.filter(
                pulp2_content_type_id=content_type, pulp3_content=None)
            pb.total += pulp2content_qs.count()
        pb.save()

        await asyncio.wait(content_migrators)

        pb.done = pb.total
示例#9
0
async def migrate_repositories():
    """
    A coroutine to migrate pre-migrated repositories.
    """
    with ProgressBar(message='Creating repositories in Pulp 3', total=0) as pb:
        pulp2repos_qs = Pulp2Repository.objects.filter(
            pulp3_repository_version=None)
        pb.total += pulp2repos_qs.count()
        pb.save()

        for pulp2repo in pulp2repos_qs:
            # if pulp2 repo_id is too long, its hash is included in pulp3 repo name
            pulp3_repo_name = pulp2repo.pulp2_repo_id
            if len(pulp3_repo_name) > 255:
                repo_name_hash = hashlib.sha256(
                    pulp3_repo_name.encode()).hexdigest()
                pulp3_repo_name = '{}-{}'.format(pulp3_repo_name[:190],
                                                 repo_name_hash)

            repo, created = Repository.objects.get_or_create(
                name=pulp3_repo_name, description=pulp2repo.pulp2_description)
            if created:
                pb.increment()
            else:
                pb.total -= 1
                pb.save()
示例#10
0
    def full_sync(self, delta):
        """
        Synchronize the repository with the remote repository and download artifacts.

        Args:
            delta (namedtuple): Set of unit keys for units to be added to the repository. Set
                of unit keys for units that should be removed from the repository. Only the
                additions are used in this method.
        """
        description = _("Dowloading artifacts and adding content to the repository.")
        current_task = Task()
        with ProgressBar(message=description, total=len(delta.additions)) as bar:
            with Batch(self.next_download(delta.additions)) as batch:
                for plan in batch():
                    try:
                        plan.result()
                    except DownloadError as e:
                        current_task.append_non_fatal_error(e)
                    else:
                        content = self.content_dict.pop(plan.download.url)
                        monitor_dict = self.monitors.pop(plan.download.url).facts()
                        monitor_dict.update({'path': plan.download.writer.path})
                        self._create_and_associate_content(content,
                                                           {plan.download.attachment: monitor_dict})
                        bar.increment()
    async def __call__(self, in_q, out_q):
        """
            The coroutine for this stage.

            Args:
                in_q (:class:`asyncio.Queue`): Each item is a
                    :class:`django.db.models.query.QuerySet` of
                    :class:`~pulpcore.plugin.models.Content` subclass that are already associated
                    but not included in the stream of items from `in_q`. One
                    :class:`django.db.models.query.QuerySet` is put for each
                    :class:`~pulpcore.plugin.models.Content` type.
                out_q (:class:`asyncio.Queue`): Each item is a
                    :class:`django.db.models.query.QuerySet` of
                    :class:`~pulpcore.plugin.models.Content` subclass that were unassociated. One
                    :class:`django.db.models.query.QuerySet` is put for each
                    :class:`~pulpcore.plugin.models.Content` type.

            Returns:
                The coroutine for this stage.
            """
        with ProgressBar(message='Un-Associating Content') as pb:
            while True:
                queryset_to_unassociate = await in_q.get()
                if queryset_to_unassociate is None:
                    break

                self.new_version.remove_content(queryset_to_unassociate)
                pb.done = pb.done + queryset_to_unassociate.count()
                pb.save()

                await out_q.put(queryset_to_unassociate)
            await out_q.put(None)
示例#12
0
    async def run(self):
        """
        Schedules multiple coroutines to migrate pre-migrated content to Pulp 3
        """
        content_type = self.model.type
        pulp2content_qs = Pulp2Content.objects.filter(
            pulp2_content_type_id=content_type, pulp3_content=None)
        total_pulp2content = pulp2content_qs.count()

        # determine the batch size if we can have up to 36 coroutines and the number of batches (or
        # coroutines)
        max_coro = 36
        batch_size = 1
        if total_pulp2content > max_coro:
            batch_size = math.ceil(total_pulp2content / max_coro)
        batch_count = math.ceil(total_pulp2content / batch_size)

        with ProgressBar(message='Migrating {} content to Pulp 3'.format(
                content_type.upper()),
                         total=total_pulp2content) as pb:
            # schedule content migration
            migrators = []
            for batch_idx in range(batch_count):
                start = batch_idx * batch_size
                end = (batch_idx + 1) * batch_size
                batch = pulp2content_qs[start:end]
                migrators.append(self.migrate_to_pulp3(batch, pb=pb))

            if migrators:
                await asyncio.wait(migrators)
示例#13
0
    def associate_existing_content(self, content_q):
        """
        Associates existing content to the importer's repository

        Args:
            content_q (queryset): Queryset that will return content that needs to be associated
                with the importer's repository.

        Returns:
            Set of natural keys representing each piece of content associated with the repository.
        """
        added = set()
        with ProgressBar(message=_(
                "Associating units already in Pulp with the repository"),
                         total=content_q.count()) as bar:
            for content in ExampleContent.objects.paginated_qs_results(
                    content_q):
                association = RepositoryContent(repository=self.repository,
                                                content=content)
                association.save()
                bar.increment()
                # Remove it from the delta
                key = Key(path=content.path, digest=content.digest)
                added.add(key)
        return added
示例#14
0
    async def run(self):
        """
        The coroutine for this stage.

        Returns:
            The coroutine for this stage.
        """
        with ProgressBar(message='Associating Content') as pb:
            to_delete = set(
                self.new_version.content.values_list('pk', flat=True))
            async for batch in self.batches():
                to_add = set()
                for d_content in batch:
                    try:
                        to_delete.remove(d_content.content.pk)
                    except KeyError:
                        to_add.add(d_content.content.pk)

                if to_add:
                    self.new_version.add_content(
                        Content.objects.filter(pk__in=to_add))
                    pb.done = pb.done + len(to_add)
                    pb.save()

            if to_delete:
                await self.put(Content.objects.filter(pk__in=to_delete))
示例#15
0
    def deferred_sync(self, delta):
        """
        Synchronize the repository with the remote repository without downloading artifacts.

        Args:
            delta (namedtuple): Set of unit keys for units to be added to the repository. Set
                of unit keys for units that should be removed from the repository. Only the
                additions are used in this method.
        """
        description = _("Adding file content to the repository without downloading artifacts.")
        progress_bar = ProgressBar(message=description, total=len(delta.additions))

        with progress_bar:
            for remote_artifact in self.next_remote_artifact(delta.additions):
                content = self.content_dict.pop(remote_artifact.url)
                self._create_and_associate_content(content, {remote_artifact: None})
                progress_bar.increment()
示例#16
0
    async def __call__(self, in_q, out_q):
        """
        Build and emit `DeclarativeContent` from the remote metadata.

        Fetch and parse the remote metadata, use the Project Specifiers on the Remote
        to determine which Python packages should be synced.

        Args:
            in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue.
            out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to.

        """
        ps = ProjectSpecifier.objects.filter(remote=self.remote)

        with ProgressBar(message='Fetching Project Metadata') as pb:
            # Group multiple specifiers to the same project together, so that we only have to fetch
            # the metadata once, and can re-use it if there are multiple specifiers.
            for name, project_specifiers in groupby_unsorted(
                    ps, key=lambda x: x.name):
                # Fetch the metadata from PyPI
                pb.increment()
                try:
                    metadata = await self.get_project_metadata(name)
                except ClientResponseError as e:
                    # Project doesn't exist, log a message and move on
                    log.info(
                        _("HTTP 404 'Not Found' for url '{url}'\n"
                          "Does project '{name}' exist on the remote repository?"
                          ).format(url=e.request_info.url, name=name))
                    continue
                project_specifiers = list(project_specifiers)

                # Determine which packages from the project match the criteria in the specifiers
                packages = await self.get_relevant_packages(
                    metadata=metadata,
                    includes=[
                        specifier for specifier in project_specifiers
                        if not specifier.exclude
                    ],
                    excludes=[
                        specifier for specifier in project_specifiers
                        if specifier.exclude
                    ],
                    prereleases=self.remote.prereleases)

                # For each package, create Declarative objects to pass into the next stage
                for entry in packages:
                    url = entry.pop('url')

                    artifact = Artifact(sha256=entry.pop('sha256_digest'))
                    package = PythonPackageContent(**entry)

                    da = DeclarativeArtifact(artifact, url, entry['filename'],
                                             self.remote)
                    dc = DeclarativeContent(content=package, d_artifacts=[da])

                    await out_q.put(dc)
        await out_q.put(None)
示例#17
0
async def pre_migrate_all_without_content(plan):
    """
    Pre-migrate repositories, relations to their contents, importers and distributors.

    NOTE: MongoDB and Django handle datetime fields differently. MongoDB doesn't care about
    timezones and provides "naive" time, while Django is complaining about time without a timezone.
    The problem is that naive time != time with specified timezone, that's why all the time for
    MongoDB comparisons should be naive and all the time for Django/PostgreSQL should be timezone
    aware.

    Args:
        plan(MigrationPlan): A Migration Plan
    """
    repos = plan.get_repositories()
    importers = plan.get_importers()
    distributors = plan.get_distributors()

    _logger.debug('Pre-migrating Pulp 2 repositories')

    # the latest time we have in the migration tool in Pulp2Repository table
    zero_datetime = timezone.make_aware(datetime(1970, 1, 1), timezone.utc)
    last_added = Pulp2Repository.objects.aggregate(
        Max('pulp2_last_unit_added'
            ))['pulp2_last_unit_added__max'] or zero_datetime
    last_removed = Pulp2Repository.objects.aggregate(
        Max('pulp2_last_unit_removed')
    )['pulp2_last_unit_removed__max'] or zero_datetime
    last_updated = max(last_added, last_removed)
    last_updated_naive = timezone.make_naive(last_updated,
                                             timezone=timezone.utc)

    with ProgressBar(
            message='Pre-migrating Pulp 2 repositories, importers, distributors'
    ) as pb:
        # we pre-migrate:
        #  - empty repos (last_unit_added is not set)
        #  - repos which were updated since last migration (last_unit_added/removed >= last_updated)
        mongo_repo_q = (mongo_Q(last_unit_added__exists=False)
                        | mongo_Q(last_unit_added__gte=last_updated_naive)
                        | mongo_Q(last_unit_removed__gte=last_updated_naive))

        # in case only certain repositories are specified in the migration plan
        if repos:
            mongo_repo_q &= mongo_Q(repo_id__in=repos)

        mongo_repo_qs = Repository.objects(mongo_repo_q)
        pb.total = mongo_repo_qs.count()
        pb.save()

        for repo_data in mongo_repo_qs.only('id', 'repo_id', 'last_unit_added',
                                            'last_unit_removed'):
            # await pre_migrate_one(repo_data, importers, distributors)
            with transaction.atomic():
                repo = await pre_migrate_repo(repo_data)
                await pre_migrate_importer(repo, importers)
                await pre_migrate_distributor(repo, distributors)
                await pre_migrate_repocontent(repo)
            pb.increment()
示例#18
0
    async def __call__(self, in_q, out_q):
        """
        Build and emit `DeclarativeContent` from the Spec data.

        Args:
            in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue.
            out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to

        """
        with ProgressBar(message='Downloading Metadata') as pb:
            parsed_url = urlparse(self.remote.url)
            root_dir = parsed_url.path
            specs_path = os.path.join(root_dir, 'specs.4.8.gz')
            specs_url = urlunparse(parsed_url._replace(path=specs_path))
            downloader = self.remote.get_downloader(url=specs_url)
            result = await downloader.run()
            pb.increment()

        with ProgressBar(message='Parsing Metadata') as pb:
            for key in read_specs(result.path):
                relative_path = os.path.join(
                    'gems', key.name + '-' + key.version + '.gem')
                path = os.path.join(root_dir, relative_path)
                url = urlunparse(parsed_url._replace(path=path))

                spec_relative_path = os.path.join(
                    'quick/Marshal.4.8',
                    key.name + '-' + key.version + '.gemspec.rz')
                spec_path = os.path.join(root_dir, spec_relative_path)
                spec_url = urlunparse(parsed_url._replace(path=spec_path))
                gem = GemContent(name=key.name, version=key.version)
                da_gem = DeclarativeArtifact(Artifact(), url, relative_path,
                                             self.remote)
                da_spec = DeclarativeArtifact(Artifact(), spec_url,
                                              spec_relative_path, self.remote)
                dc = DeclarativeContent(content=gem,
                                        d_artifacts=[da_gem, da_spec])
                pb.increment()
                await out_q.put(dc)
        await out_q.put(None)
示例#19
0
    def deferred_sync(self, delta):
        """
        Synchronize the repository with the remote repository without downloading artifacts.

        Args:
            delta (namedtuple)
        """
        description = _("Adding file content to the repository without downloading artifacts.")

        with ProgressBar(message=description, total=len(delta.additions)) as bar:
            for group in self.next_group(delta.additions):
                self._create_and_associate_content(group)
                bar.increment()
    async def __call__(self, in_q, out_q):
        """
        The coroutine for this stage.

        Args:
            in_q (:class:`asyncio.Queue`): Each item is a
                :class:`~pulpcore.plugin.stages.DeclarativeContent` with saved `content` that needs
                to be associated.
            out_q (:class:`asyncio.Queue`): Each item is a :class:`django.db.models.query.QuerySet`
                of :class:`~pulpcore.plugin.models.Content` subclass that are already associated but
                not included in the stream of items from `in_q`. One
                :class:`django.db.models.query.QuerySet` is put for each
                :class:`~pulpcore.plugin.models.Content` type.

        Returns:
            The coroutine for this stage.
        """
        with ProgressBar(message='Associating Content') as pb:
            async for batch in self.batches(in_q):
                content_q_by_type = defaultdict(lambda: Q(pk=None))
                for declarative_content in batch:
                    try:
                        unit_key = declarative_content.content.natural_key()
                        self.unit_keys_by_type[type(
                            declarative_content.content)].remove(unit_key)
                    except KeyError:
                        model_type = type(declarative_content.content)
                        unit_key_dict = declarative_content.content.natural_key_dict(
                        )
                        unit_q = Q(**unit_key_dict)
                        content_q_by_type[model_type] = content_q_by_type[
                            model_type] | unit_q

                for model_type, q_object in content_q_by_type.items():
                    queryset = model_type.objects.filter(q_object)
                    self.new_version.add_content(queryset)
                    pb.done = pb.done + queryset.count()
                    pb.save()

            for unit_type, ids in self.unit_keys_by_type.items():
                if ids:
                    units_to_unassociate = Q()
                    for unit_key in self.unit_keys_by_type[unit_type]:
                        query_dict = {}
                        for i, key_name in enumerate(
                                unit_type.natural_key_fields()):
                            query_dict[key_name] = unit_key[i]
                        units_to_unassociate |= Q(**query_dict)
                    await out_q.put(
                        unit_type.objects.filter(units_to_unassociate))
            await out_q.put(None)
示例#21
0
    async def run(self):
        """
        The coroutine for this stage.

        Returns:
            The coroutine for this stage.
        """
        def _add_to_pending(coro):
            nonlocal pending
            task = asyncio.ensure_future(coro)
            pending.add(task)
            return task

        #: (set): The set of unfinished tasks.  Contains the content
        #    handler tasks and may contain `content_get_task`.
        pending = set()

        content_iterator = self.items()

        #: (:class:`asyncio.Task`): The task that gets new content from `self._in_q`.
        #    Set to None if stage is shutdown.
        content_get_task = _add_to_pending(content_iterator.__anext__())

        with ProgressBar(message='Downloading Artifacts') as pb:
            try:
                while pending:
                    done, pending = await asyncio.wait(
                        pending, return_when=asyncio.FIRST_COMPLETED)
                    for task in done:
                        if task is content_get_task:
                            try:
                                _add_to_pending(
                                    self._handle_content_unit(task.result()))
                            except StopAsyncIteration:
                                # previous stage is finished and we retrieved all
                                # content instances: shutdown
                                content_get_task = None
                        else:
                            pb.done += task.result()  # download_count
                            pb.save()

                    if content_get_task and content_get_task not in pending:  # not yet shutdown
                        if len(pending) < self.max_concurrent_content:
                            content_get_task = _add_to_pending(
                                content_iterator.__anext__())
            except asyncio.CancelledError:
                # asyncio.wait does not cancel its tasks when cancelled, we need to do this
                for future in pending:
                    future.cancel()
                raise
示例#22
0
    async def run(self):
        """
        The coroutine for this stage.

        Returns:
            The coroutine for this stage.
        """
        with ProgressBar(message='Un-Associating Content') as pb:
            async for queryset_to_unassociate in self.items():
                self.new_version.remove_content(queryset_to_unassociate)
                pb.done = pb.done + queryset_to_unassociate.count()
                pb.save()

                await self.put(queryset_to_unassociate)
示例#23
0
    async def __call__(self, in_q, out_q):
        """
        Build and emit `DeclarativeContent` from the Manifest data.

        If a cookbook specifier is set in the remote, cookbooks are filtered
        using this specifier.

        Args: in_q (asyncio.Queue): Unused because the first stage doesn't read
            from an input queue. out_q (asyncio.Queue): The out_q to send
            `DeclarativeContent` objects to

        """
        with ProgressBar(message='Downloading Metadata', total=1) as pb:
            downloader = self.remote.get_downloader(
                url=urljoin(self.remote.url + '/', 'universe'))
            result = await downloader.run()
            pb.increment()

        cookbook_names = self.remote.specifier_cookbook_names()

        with ProgressBar(message='Parsing Metadata') as pb:
            universe = Universe(result.path)
            for entry in universe.read():
                if cookbook_names and entry.name not in cookbook_names:
                    continue
                cookbook = CookbookPackageContent(
                    name=entry.name,
                    version=entry.version,
                    dependencies=entry.dependencies)
                artifact = Artifact()
                da = DeclarativeArtifact(artifact, entry.download_url,
                                         cookbook.relative_path(), self.remote)
                dc = DeclarativeContent(content=cookbook, d_artifacts=[da])
                pb.increment()
                await out_q.put(dc)
        await out_q.put(None)
示例#24
0
    async def run(self):
        """
        The coroutine doing the stage's work.
        """
        #: (set): The set of unfinished tasks.  Contains the content
        #    handler tasks and may contain `self._content_get_task`.
        self._pending = set()

        #: (:class:`asyncio.Task`): The task that gets new content from `in_q`.
        #    Set to None if stage is shutdown.
        self._content_get_task = self._add_to_pending(self.in_q.get())

        #: (:class:`asyncio.Semaphore`): Semaphore controlling the number of concurrent downloads
        self._download_semaphore = asyncio.Semaphore(
            value=self.max_concurrent_downloads)

        with ProgressBar(message='Downloading Artifacts') as pb:
            try:
                while self._pending:
                    done, self._pending = await asyncio.wait(
                        self._pending, return_when=asyncio.FIRST_COMPLETED)
                    for task in done:
                        if task is self._content_get_task:
                            content = task.result()
                            if content is None:
                                # previous stage is finished and we retrieved all
                                # content instances: shutdown
                                self._content_get_task = None
                            else:
                                self._add_to_pending(
                                    self._handle_content_unit(content))
                        else:
                            download_count = task.result()
                            pb.done += download_count
                            pb.save()

                    if not self.shutdown:
                        if not self.saturated and self._content_get_task not in self._pending:
                            self._content_get_task = self._add_to_pending(
                                self.in_q.get())
            except asyncio.CancelledError:
                # asyncio.wait does not cancel its tasks when cancelled, we need to do this
                for future in self._pending:
                    future.cancel()
                raise

        await self.out_q.put(None)
示例#25
0
    def publish(self):
        """
        Publish the repository.
        """
        with ProgressBar(message=_("Publishing repository metadata"), total=1) as bar:
            manifest_name = 'PULP_MANIFEST'
            with open(manifest_name, 'w+') as fp:
                for entry in self._publish():
                    fp.write(entry)
                    fp.write('\n')

            metadata = PublishedMetadata(
                relative_path=os.path.basename(manifest_name),
                publication=self.publication,
                file=File(open(manifest_name, 'rb')))
            metadata.save()
            bar.increment()
示例#26
0
    async def _fetch_galaxy_pages(self):
        """
        Fetch the roles in a remote repository.

        Returns:
            async generator: dicts that represent pages from galaxy api

        """
        page_count = 0
        remote = self.remote

        def role_page_url(url, page=1):
            parsed_url = urlparse(url)
            new_query = parse_qs(parsed_url.query)
            new_query['page'] = page
            return urlunparse(
                parsed_url._replace(query=urlencode(new_query, doseq=True)))

        def parse_metadata(download_result):
            with open(download_result.path) as fd:
                return json.load(fd)

        with ProgressBar(
                message='Parsing Pages from Galaxy Roles API') as progress_bar:
            downloader = remote.get_downloader(url=role_page_url(remote.url))
            metadata = parse_metadata(await downloader.run())

            page_count = math.ceil(float(metadata['count']) / float(PAGE_SIZE))
            progress_bar.total = page_count
            progress_bar.save()

            yield metadata
            progress_bar.increment()

            # Concurrent downloads are limited by aiohttp...
            not_done = set(
                remote.get_downloader(
                    url=role_page_url(remote.url, page)).run()
                for page in range(2, page_count + 1))

            while not_done:
                done, not_done = await asyncio.wait(
                    not_done, return_when=FIRST_COMPLETED)
                for item in done:
                    yield parse_metadata(item.result())
                    progress_bar.increment()
示例#27
0
    def full_sync(self, delta):
        """
        Synchronize the repository with the remote repository without downloading artifacts.
        """
        description = _("Dowloading artifacts and adding content to the repository.")
        downloader = GroupDownloader(self)
        downloader.schedule_from_iterator(self.next_group(delta.additions))

        with ProgressBar(message=description, total=len(delta.additions)) as bar:
            for group in downloader:
                download_error = False
                for url, result in group.downloaded_files.items():
                    if result.exception:
                        download_error = True
                if not download_error:
                    self._create_and_associate_content(group)
                    bar.increment()
                    log.warning('content_unit = {0}'.format(group.id))
示例#28
0
 async def run(self):
     """
     Update package content with the information obtained from its artifact.
     """
     with ProgressBar(message="Update Package units") as pb:
         async for d_content in self.items():
             if isinstance(d_content.content, Package):
                 package = d_content.content
                 package_artifact = d_content.d_artifacts[0].artifact
                 if not package_artifact._state.adding:
                     package_paragraph = debfile.DebFile(
                         fileobj=package_artifact.file
                     ).debcontrol()
                     package_dict = Package.from822(package_paragraph)
                     for key, value in package_dict.items():
                         setattr(package, key, value)
                     pb.increment()
             await self.put(d_content)
示例#29
0
    def sync(self):
        """
        Synchronize the repository with the remote repository.
        """
        self.content_dict = {
        }  # keys are unit keys and values are lists of deferred artifacts
        # associated with the content
        self.monitors = {}
        delta = self._find_delta()

        # Find all content being added that already exists in Pulp and associate with repository.
        fields = {f for f in ExampleContent.natural_key_fields()}
        if not self.is_deferred:
            # Filter out any content that still needs to have artifacts downloaded
            ready_to_associate = ExampleContent.objects.find_by_unit_key(
                delta.additions).only(*fields)
        else:
            ready_to_associate = ExampleContent.objects.find_by_unit_key(
                delta.additions, partial=True).only(*fields)
        added = self.associate_existing_content(ready_to_associate)
        remaining_additions = delta.additions - added
        delta = Delta(additions=remaining_additions, removals=delta.removals)

        if self.is_deferred:
            self.deferred_sync(delta)
        else:
            self.full_sync(delta)

        # Remove content if there is any to remove
        if delta.removals:
            # Build a query that uniquely identifies all content that needs to be removed.
            with ProgressBar(message=_("Removing content from repository."),
                             total=len(delta.removals)) as bar:
                q = models.Q()
                for key in delta.removals:
                    q |= models.Q(examplecontent__path=key.path,
                                  examplecontent__digest=key.digest)
                q_set = self.repository.content.filter(q)
                bar.done = RepositoryContent.objects.filter(
                    repository=self.repository).filter(
                        content__in=q_set).delete()[0]
示例#30
0
 async def run(self):
     """
     Build and emit `DeclarativeContent` from the ansible metadata.
     """
     with ProgressBar(message='Parsing Role Metadata') as pb:
         pending = []
         async for metadata in self._fetch_roles():
             role = AnsibleRole(name=metadata['name'],
                                namespace=metadata['namespace'])
             d_content = DeclarativeContent(content=role,
                                            d_artifacts=[],
                                            does_batch=False)
             pending.append(
                 asyncio.ensure_future(
                     self._add_role_versions(
                         d_content.get_or_create_future(),
                         metadata,
                     )))
             await self.put(d_content)
             pb.increment()
         await asyncio.gather(*pending)
示例#31
0
    async def run(self):
        """
        DockerFirstStage.
        """
        future_manifests = []
        tag_list = []
        to_download = []
        man_dcs = {}
        total_blobs = []

        with ProgressBar(message='Downloading tag list', total=1) as pb:
            repo_name = self.remote.namespaced_upstream_name
            relative_url = '/v2/{name}/tags/list'.format(name=repo_name)
            tag_list_url = urljoin(self.remote.url, relative_url)
            list_downloader = self.remote.get_downloader(url=tag_list_url)
            await list_downloader.run(extra_data={'repo_name': repo_name})

            with open(list_downloader.path) as tags_raw:
                tags_dict = json.loads(tags_raw.read())
                tag_list = tags_dict['tags']

            # check for the presence of the pagination link header
            link = list_downloader.response_headers.get('Link')
            await self.handle_pagination(link, repo_name, tag_list)
            whitelist_tags = self.remote.whitelist_tags
            if whitelist_tags:
                tag_list = list(set(tag_list) & set(whitelist_tags.split(',')))
            pb.increment()

        msg = 'Creating Download requests for v2 Tags'
        with ProgressBar(message=msg, total=len(tag_list)) as pb:
            for tag_name in tag_list:
                relative_url = '/v2/{name}/manifests/{tag}'.format(
                    name=self.remote.namespaced_upstream_name,
                    tag=tag_name,
                )
                url = urljoin(self.remote.url, relative_url)
                downloader = self.remote.get_downloader(url=url)
                to_download.append(downloader.run(extra_data={'headers': V2_ACCEPT_HEADERS}))
                pb.increment()

        pb_parsed_tags = ProgressBar(message='Processing v2 Tags', state='running')
        pb_parsed_ml_tags = ProgressBar(message='Parsing Manifest List Tags', state='running')
        pb_parsed_m_tags = ProgressBar(message='Parsing Manifests Tags', state='running')
        global pb_parsed_blobs
        pb_parsed_blobs = ProgressBar(message='Parsing Blobs', state='running')
        pb_parsed_man = ProgressBar(message='Parsing Manifests', state='running')

        for download_tag in asyncio.as_completed(to_download):
            tag = await download_tag
            with open(tag.path) as content_file:
                raw = content_file.read()
            content_data = json.loads(raw)
            mediatype = content_data.get('mediaType')
            tag.artifact_attributes['file'] = tag.path
            saved_artifact = Artifact(**tag.artifact_attributes)
            try:
                saved_artifact.save()
            except IntegrityError:
                del tag.artifact_attributes['file']
                saved_artifact = Artifact.objects.get(**tag.artifact_attributes)
            tag_dc = self.create_tag(mediatype, saved_artifact, tag.url)

            if type(tag_dc.content) is ManifestListTag:
                list_dc = self.create_tagged_manifest_list(
                    tag_dc, content_data)
                await self.put(list_dc)
                pb_parsed_ml_tags.increment()
                tag_dc.extra_data['list_relation'] = list_dc
                for manifest_data in content_data.get('manifests'):
                    man_dc = self.create_manifest(list_dc, manifest_data)
                    future_manifests.append(man_dc.get_or_create_future())
                    man_dcs[man_dc.content.digest] = man_dc
                    await self.put(man_dc)
                    pb_parsed_man.increment()
            elif type(tag_dc.content) is ManifestTag:
                man_dc = self.create_tagged_manifest(tag_dc, content_data)
                await self.put(man_dc)
                pb_parsed_m_tags.increment()
                tag_dc.extra_data['man_relation'] = man_dc
                self.handle_blobs(man_dc, content_data, total_blobs)
            await self.put(tag_dc)
            pb_parsed_tags.increment()

        pb_parsed_tags.state = 'completed'
        pb_parsed_tags.total = pb_parsed_tags.done
        pb_parsed_tags.save()
        pb_parsed_ml_tags.state = 'completed'
        pb_parsed_ml_tags.total = pb_parsed_ml_tags.done
        pb_parsed_ml_tags.save()
        pb_parsed_m_tags.state = 'completed'
        pb_parsed_m_tags.total = pb_parsed_m_tags.done
        pb_parsed_m_tags.save()
        pb_parsed_man.state = 'completed'
        pb_parsed_man.total = pb_parsed_man.done
        pb_parsed_man.save()

        for manifest_future in asyncio.as_completed(future_manifests):
            man = await manifest_future
            with man._artifacts.get().file.open() as content_file:
                raw = content_file.read()
            content_data = json.loads(raw)
            man_dc = man_dcs[man.digest]
            self.handle_blobs(man_dc, content_data, total_blobs)
        for blob in total_blobs:
            await self.put(blob)

        pb_parsed_blobs.state = 'completed'
        pb_parsed_blobs.total = pb_parsed_blobs.done
        pb_parsed_blobs.save()