Exemplo n.º 1
0
 async def create_pulp3_content(self):
     """
     Create a Pulp 3 Package content for saving it later in a bulk operation.
     """
     cr_package = await get_cr_obj(self)
     pkg_dict = Package.createrepo_to_dict(cr_package)
     return Package(**pkg_dict)
Exemplo n.º 2
0
 def create_pulp3_content(self):
     """
     Create a Pulp 3 Package content for saving it later in a bulk operation.
     """
     cr_package = get_cr_obj(self)
     pkg_dict = Package.createrepo_to_dict(cr_package)
     pkg_dict['is_modular'] = self.is_modular
     return (Package(**pkg_dict), None)
Exemplo n.º 3
0
    def deferred_validate(self, data):
        """
        Validate the rpm package data.

        Args:
            data (dict): Data to be validated

        Returns:
            dict: Data that has been validated

        """
        data = super().deferred_validate(data)
        # export META from rpm and prepare dict as saveable format
        try:
            new_pkg = Package.createrepo_to_dict(
                read_crpackage_from_artifact(data["artifact"]))
        except OSError:
            log.info(traceback.format_exc())
            raise NotAcceptable(
                detail="RPM file cannot be parsed for metadata")

        attrs = {key: new_pkg[key] for key in Package.natural_key_fields()}
        package = Package.objects.filter(**attrs)

        if package.exists():
            keywords = (
                "name",
                "epoch",
                "version",
                "release",
                "arch",
                "checksum_type",
                "pkgId",
            )
            error_data = ", ".join([
                "=".join(item) for item in new_pkg.items()
                if item[0] in keywords
            ])

            package.get().touch()
            raise serializers.ValidationError(
                _("There is already a package with: {values}.").format(
                    values=error_data))

        new_pkg["location_href"] = (format_nevra_short(
            new_pkg["name"],
            new_pkg["epoch"],
            new_pkg["version"],
            new_pkg["release"],
            new_pkg["arch"],
        ) + ".rpm")
        if not data.get("relative_path"):
            data["relative_path"] = new_pkg["location_href"]

        data.update(new_pkg)
        return data
Exemplo n.º 4
0
    def publish_artifacts(self, content, prefix=""):
        """
        Publish artifacts.

        Args:
            content (pulpcore.plugin.models.Content): content set.
            prefix (str): a relative path prefix for the published artifact

        """
        published_artifacts = []

        # Special case for Packages
        contentartifact_qs = ContentArtifact.objects.filter(
            content__in=content).filter(
                content__pulp_type=Package.get_pulp_type())

        for content_artifact in contentartifact_qs.values(
                "pk", "relative_path").iterator():
            relative_path = content_artifact["relative_path"]
            relative_path = os.path.join(prefix, PACKAGES_DIRECTORY,
                                         relative_path.lower()[0],
                                         relative_path)
            published_artifacts.append(
                PublishedArtifact(
                    relative_path=relative_path,
                    publication=self.publication,
                    content_artifact_id=content_artifact["pk"],
                ))

        # Handle everything else
        is_treeinfo = Q(relative_path__in=["treeinfo", ".treeinfo"])
        unpublishable_types = Q(content__pulp_type__in=[
            RepoMetadataFile.get_pulp_type(),
            Modulemd.get_pulp_type(),
            ModulemdDefaults.get_pulp_type(),
            # already dealt with
            Package.get_pulp_type(),
        ])

        contentartifact_qs = (ContentArtifact.objects.filter(
            content__in=content).exclude(unpublishable_types).exclude(
                is_treeinfo))

        for content_artifact in contentartifact_qs.values(
                "pk", "relative_path").iterator():
            published_artifacts.append(
                PublishedArtifact(
                    relative_path=content_artifact["relative_path"],
                    publication=self.publication,
                    content_artifact_id=content_artifact["pk"],
                ))

        PublishedArtifact.objects.bulk_create(published_artifacts,
                                              batch_size=2000)
Exemplo n.º 5
0
    async def _parse_packages(self, packages):
        progress_data = {
            "message": "Parsed Packages",
            "code": "sync.parsing.packages",
            "total": len(packages),
        }

        with ProgressReport(**progress_data) as packages_pb:
            while True:
                try:
                    (_, pkg) = packages.popitem(last=False)
                except KeyError:
                    break
                package = Package(**Package.createrepo_to_dict(pkg))
                del pkg
                artifact = Artifact(size=package.size_package)
                checksum_type = getattr(CHECKSUM_TYPES,
                                        package.checksum_type.upper())
                setattr(artifact, checksum_type, package.pkgId)
                url = urlpath_sanitize(self.data.remote_url,
                                       package.location_href)
                filename = os.path.basename(package.location_href)
                da = DeclarativeArtifact(
                    artifact=artifact,
                    url=url,
                    relative_path=filename,
                    remote=self.remote,
                    deferred_download=self.deferred_download,
                )
                dc = DeclarativeContent(content=package, d_artifacts=[da])
                dc.extra_data = defaultdict(list)

                # find if a package relates to a modulemd
                if dc.content.nevra in self.data.nevra_to_module.keys():
                    dc.content.is_modular = True
                    for dc_modulemd in self.data.nevra_to_module[
                            dc.content.nevra]:
                        dc.extra_data["modulemd_relation"].append(dc_modulemd)
                        dc_modulemd.extra_data["package_relation"].append(dc)

                if dc.content.name in self.data.pkgname_to_groups.keys():
                    for dc_group in self.data.pkgname_to_groups[
                            dc.content.name]:
                        dc.extra_data["group_relations"].append(dc_group)
                        dc_group.extra_data["related_packages"].append(dc)

                packages_pb.increment()
                await self.put(dc)
Exemplo n.º 6
0
def _prepare_package(artifact, filename):
    """
    Helper function for creating package.

    Copy file to a temp directory under
    the user provided filename and
    parsing it into a saveable format.

    Returns: artifact model as dict

    Args:
        artifact: inited and validated artifact to save
        filename: name of file uploaded by user
    """
    # Copy file to a temp directory under the user provided filename
    with tempfile.TemporaryDirectory() as td:
        temp_path = os.path.join(td, filename)
        shutil.copy2(artifact.file.path, temp_path)
        cr_pkginfo = createrepo_c.package_from_rpm(temp_path)

        package = Package.createrepo_to_dict(cr_pkginfo)

    package['location_href'] = filename

    # parsing it into a saveable format
    new_pkg = {}
    for key, value in package.items():
        if isinstance(value, list):
            new_pkg[key] = json.dumps(value)
        else:
            new_pkg[key] = value

    return new_pkg
Exemplo n.º 7
0
    def publish_artifacts(self, content):
        """
        Publish artifacts.

        Args:
            content (pulpcore.plugin.models.Content): content set.

        """
        published_artifacts = []
        for content_artifact in ContentArtifact.objects.filter(
                content__in=content.exclude(pulp_type__in=[
                    RepoMetadataFile.get_pulp_type(),
                    Modulemd.get_pulp_type(),
                    ModulemdDefaults.get_pulp_type()
                ]).distinct()).iterator():
            relative_path = content_artifact.relative_path
            if content_artifact.content.pulp_type == Package.get_pulp_type():
                relative_path = os.path.join(PACKAGES_DIRECTORY,
                                             relative_path.lower()[0],
                                             content_artifact.relative_path)
            published_artifacts.append(
                PublishedArtifact(relative_path=relative_path,
                                  publication=self.publication,
                                  content_artifact=content_artifact))

        PublishedArtifact.objects.bulk_create(published_artifacts,
                                              batch_size=2000)
Exemplo n.º 8
0
    def deferred_validate(self, data):
        """
        Validate the rpm package data.

        Args:
            data (dict): Data to be validated

        Returns:
            dict: Data that has been validated

        """
        data = super().deferred_validate(data)
        # export META from rpm and prepare dict as saveable format
        try:
            new_pkg = _prepare_package(data["artifact"], data["relative_path"])
        except OSError:
            raise NotAcceptable(detail='RPM file cannot be parsed for metadata.')

        attrs = {key: new_pkg[key] for key in Package.natural_key_fields()}
        package = Package.objects.filter(**attrs)

        if package.exists():
            keywords = ('name', 'epoch', 'version', 'release', 'arch', 'checksum_type', 'pkgId')
            error_data = ", ".join(
                ["=".join(item) for item in new_pkg.items() if item[0] in keywords]
            )

            raise serializers.ValidationError(
                _(
                    "There is already a package with: {values}."
                ).format(values=error_data)
            )

        data.update(new_pkg)
        return data
Exemplo n.º 9
0
def _prepare_package(artifact, filename):
    """
    Helper function for creating package.

    Copy file to a temp directory under
    the user provided filename.

    Returns: artifact model as dict

    Args:
        artifact: inited and validated artifact to save
        filename: name of file uploaded by user
    """
    artifact_file = storage.open(artifact.file.name)
    with tempfile.NamedTemporaryFile("wb", dir=".",
                                     suffix=filename) as temp_file:
        shutil.copyfileobj(artifact_file, temp_file)
        temp_file.flush()
        cr_pkginfo = createrepo_c.package_from_rpm(
            temp_file.name, changelog_limit=settings.KEEP_CHANGELOG_LIMIT)

    package = Package.createrepo_to_dict(cr_pkginfo)

    package["location_href"] = filename
    artifact_file.close()
    return package
Exemplo n.º 10
0
    def _apply_retention_policy(self, new_version):
        """Apply the repository's "retain_package_versions" settings to the new version.

        Remove all non-modular packages that are older than the retention policy. A value of 0
        for the package retention policy represents disabled. A value of 3 would mean that the
        3 most recent versions of each package would be kept while older versions are discarded.

        Args:
            new_version (models.RepositoryVersion): Repository version to filter
        """
        assert not new_version.complete, \
            "Cannot apply retention policy to completed repository versions"

        if self.retain_package_versions > 0:
            # It would be more ideal if, instead of annotating with an age and filtering manually,
            # we could use Django to filter the particular Package content we want to delete.
            # Something like ".filter(F('age') > self.retain_package_versions)" would be better
            # however this is not currently possible with Django. It would be possible with raw
            # SQL but the repository version content membership subquery is currently
            # django-managed and would be difficult to share.
            #
            # Instead we have to do the filtering manually.
            nonmodular_packages = Package.objects.with_age().filter(
                pk__in=new_version.content.filter(pulp_type=Package.get_pulp_type()),
                is_modular=False,  # don't want to filter out modular RPMs
            ).only('pk')

            old_packages = []
            for package in nonmodular_packages:
                if package.age > self.retain_package_versions:
                    old_packages.append(package.pk)

            new_version.remove_content(Content.objects.filter(pk__in=old_packages))
Exemplo n.º 11
0
    async def _parse_packages(self, packages):
        progress_data = {
            'message': 'Parsed Packages',
            'code': 'parsing.packages',
            'total': len(packages),
        }

        with ProgressReport(**progress_data) as packages_pb:
            for pkg in packages.values():
                package = Package(**Package.createrepo_to_dict(pkg))
                artifact = Artifact(size=package.size_package)
                checksum_type = getattr(CHECKSUM_TYPES,
                                        package.checksum_type.upper())
                setattr(artifact, checksum_type, package.pkgId)
                url = urljoin(self.data.remote_url, package.location_href)
                filename = os.path.basename(package.location_href)
                da = DeclarativeArtifact(
                    artifact=artifact,
                    url=url,
                    relative_path=filename,
                    remote=self.remote,
                    deferred_download=self.deferred_download)
                dc = DeclarativeContent(content=package, d_artifacts=[da])
                dc.extra_data = defaultdict(list)

                # find if a package relates to a modulemd
                if dc.content.nevra in self.data.nevra_to_module.keys():
                    dc.content.is_modular = True
                    for dc_modulemd in self.data.nevra_to_module[
                            dc.content.nevra]:
                        dc.extra_data['modulemd_relation'].append(dc_modulemd)
                        dc_modulemd.extra_data['package_relation'].append(dc)

                if dc.content.name in self.data.pkgname_to_groups.keys():
                    for dc_group in self.data.pkgname_to_groups[
                            dc.content.name]:
                        dc.extra_data['group_relations'].append(dc_group)
                        dc_group.extra_data['related_packages'].append(dc)

                packages_pb.increment()
                await self.put(dc)
Exemplo n.º 12
0
    def create(self, request):
        """
        Create a new Package from a request.
        """
        try:
            artifact = self.get_resource(request.data['_artifact'], Artifact)
        except KeyError:
            raise serializers.ValidationError(
                detail={'_artifact': _('This field is required')})

        try:
            filename = request.data['filename']
        except KeyError:
            raise serializers.ValidationError(
                detail={'filename': _('This field is required')})

        # Copy file to a temp directory under the user provided filename
        with tempfile.TemporaryDirectory() as td:
            temp_path = os.path.join(td, filename)
            shutil.copy2(artifact.file.path, temp_path)
            cr_pkginfo = createrepo_c.package_from_rpm(temp_path)
            package = Package.createrepo_to_dict(cr_pkginfo)

        package['location_href'] = filename

        # TODO: Clean this up, maybe make a new function for the purpose of parsing it into
        # a saveable format
        new_pkg = {}
        new_pkg['_artifact'] = request.data['_artifact']

        for key, value in package.items():
            if isinstance(value, list):
                new_pkg[key] = json.dumps(value)
            else:
                new_pkg[key] = value

        serializer = self.get_serializer(data=new_pkg)
        serializer.is_valid(raise_exception=True)
        serializer.validated_data.pop('_artifact')
        package = serializer.save()
        if package.pk:
            ContentArtifact.objects.create(artifact=artifact,
                                           content=package,
                                           relative_path=package.filename)

        headers = self.get_success_headers(request.data)
        return Response(serializer.data,
                        status=status.HTTP_201_CREATED,
                        headers=headers)
Exemplo n.º 13
0
def _prepare_package(artifact, filename):
    """
    Helper function for creating package.

    Copy file to a temp directory under
    the user provided filename.

    Returns: artifact model as dict

    Args:
        artifact: inited and validated artifact to save
        filename: name of file uploaded by user
    """
    artifact_file = storage.open(artifact.file.name)
    with tempfile.NamedTemporaryFile('wb', suffix=filename) as temp_file:
        shutil.copyfileobj(artifact_file, temp_file)
        temp_file.flush()
        cr_pkginfo = createrepo_c.package_from_rpm(temp_file.name)

    package = Package.createrepo_to_dict(cr_pkginfo)

    package['location_href'] = filename
    return package
Exemplo n.º 14
0
def _prepare_package(artifact, filename):
    """
    Helper function for creating package.

    Copy file to a temp directory under
    the user provided filename.

    Returns: artifact model as dict

    Args:
        artifact: inited and validated artifact to save
        filename: name of file uploaded by user
    """
    # Copy file to a temp directory under the user provided filename
    with tempfile.TemporaryDirectory() as td:
        temp_path = os.path.join(td, filename)
        shutil.copy2(artifact.file.path, temp_path)
        cr_pkginfo = createrepo_c.package_from_rpm(temp_path)

        package = Package.createrepo_to_dict(cr_pkginfo)

    package['location_href'] = filename
    return package
Exemplo n.º 15
0
def find_children_of_content(content, repository_version):
    """Finds the content referenced directly by other content and returns it all together.

    Finds RPMs referenced by Advisory/Errata content.

    Args:
        content (iterable): Content for which to resolve children
        repository_version (pulpcore.models.RepositoryVersion): Source repo version

    Returns: Queryset of Content objects that are children of the intial set of content
    """
    # Advisories that were selected to be copied
    advisory_ids = content.filter(
        pulp_type=UpdateRecord.get_pulp_type()).only('pk')
    # All packages in the source repository version
    package_ids = repository_version.content.filter(
        pulp_type=Package.get_pulp_type()).only('pk')
    # All modules in the source repository version
    module_ids = repository_version.content.filter(
        pulp_type=Modulemd.get_pulp_type()).only('pk')

    advisories = UpdateRecord.objects.filter(pk__in=advisory_ids)
    packages = Package.objects.filter(pk__in=package_ids)
    modules = Modulemd.objects.filter(pk__in=module_ids)

    children = set()

    for advisory in advisories:
        # Find rpms referenced by Advisories/Errata
        package_nevras = advisory.get_pkglist()
        for nevra in package_nevras:
            (name, epoch, version, release, arch) = nevra
            try:
                package = packages.get(name=name,
                                       epoch=epoch,
                                       version=version,
                                       release=release,
                                       arch=arch)
                children.add(package.pk)
            except Package.DoesNotExist:
                raise
            except MultipleObjectsReturned:
                raise

        module_nsvcas = advisory.get_module_list()
        for nsvca in module_nsvcas:
            (name, stream, version, context, arch) = nsvca
            try:
                module = modules.get(name=name,
                                     stream=stream,
                                     version=version,
                                     context=context,
                                     arch=arch)
                children.add(module.pk)
            except Modulemd.DoesNotExist:
                raise
            except MultipleObjectsReturned:
                raise

    # TODO: Find rpms referenced by PackageGroups,
    # PackageGroups referenced by PackageCategories, etc.

    return Content.objects.filter(pk__in=children)
Exemplo n.º 16
0
def find_children_of_content(content, src_repo_version):
    """Finds the content referenced directly by other content and returns it all together.

    Finds RPMs referenced by Advisory/Errata content.

    Args:
        content (iterable): Content for which to resolve children
        src_repo_version (pulpcore.models.RepositoryVersion): Source repo version

    Returns: Queryset of Content objects that are children of the intial set of content
    """
    # Content that were selected to be copied
    advisory_ids = content.filter(
        pulp_type=UpdateRecord.get_pulp_type()).only('pk')
    packagecategory_ids = content.filter(
        pulp_type=PackageCategory.get_pulp_type()).only('pk')
    packageenvironment_ids = content.filter(
        pulp_type=PackageEnvironment.get_pulp_type()).only('pk')
    packagegroup_ids = content.filter(
        pulp_type=PackageGroup.get_pulp_type()).only('pk')

    # Content in the source repository version
    package_ids = src_repo_version.content.filter(
        pulp_type=Package.get_pulp_type()).only('pk')
    module_ids = src_repo_version.content.filter(
        pulp_type=Modulemd.get_pulp_type()).only('pk')

    advisories = UpdateRecord.objects.filter(pk__in=advisory_ids)
    packages = Package.objects.filter(pk__in=package_ids)
    packagecategories = PackageCategory.objects.filter(
        pk__in=packagecategory_ids)
    packageenvironments = PackageEnvironment.objects.filter(
        pk__in=packageenvironment_ids)
    packagegroups = PackageGroup.objects.filter(pk__in=packagegroup_ids)
    modules = Modulemd.objects.filter(pk__in=module_ids)

    children = set()

    for advisory in advisories:
        # Find rpms referenced by Advisories/Errata
        package_nevras = advisory.get_pkglist()
        for nevra in package_nevras:
            (name, epoch, version, release, arch) = nevra
            try:
                package = packages.get(name=name,
                                       epoch=epoch,
                                       version=version,
                                       release=release,
                                       arch=arch)
                children.add(package.pk)
            except Package.DoesNotExist:
                raise
            except MultipleObjectsReturned:
                raise

        module_nsvcas = advisory.get_module_list()
        for nsvca in module_nsvcas:
            (name, stream, version, context, arch) = nsvca
            try:
                module = modules.get(name=name,
                                     stream=stream,
                                     version=version,
                                     context=context,
                                     arch=arch)
                children.add(module.pk)
            except Modulemd.DoesNotExist:
                raise
            except MultipleObjectsReturned:
                raise

    # PackageCategories & PackageEnvironments resolution must go before PackageGroups
    # TODO: refactor to be more effecient (lower number of queries)
    for packagecategory in packagecategories.iterator():
        for category_package_group in packagecategory.group_ids:
            category_package_groups = PackageGroup.objects.filter(
                name=category_package_group['name'],
                pk__in=src_repo_version.content)
            children.update(
                [pkggroup.pk for pkggroup in category_package_groups])
            packagegroups = packagegroups.union(category_package_groups)

    for packageenvironment in packageenvironments.iterator():
        for env_package_group in packageenvironment.group_ids:
            env_package_groups = PackageGroup.objects.filter(
                name=env_package_group['name'],
                pk__in=src_repo_version.content)
            children.update([envgroup.pk for envgroup in env_package_groups])
            packagegroups = packagegroups.union(env_package_groups)
        for optional_env_package_group in packageenvironment.option_ids:
            opt_env_package_groups = PackageGroup.objects.filter(
                name=optional_env_package_group['name'],
                pk__in=src_repo_version.content)
            children.update(
                [optpkggroup.pk for optpkggroup in opt_env_package_groups])
            packagegroups = packagegroups.union(opt_env_package_groups)

    # Find rpms referenced by PackageGroups
    for packagegroup in packagegroups.iterator():
        group_package_names = [pkg['name'] for pkg in packagegroup.packages]
        for pkg in group_package_names:
            packages_by_name = [
                pkg for pkg in Package.objects.with_age().filter(
                    name=pkg, pk__in=src_repo_version.content) if pkg.age == 1
            ]
            for pkg in packages_by_name:
                children.add(pkg.pk)

    return Content.objects.filter(pk__in=children)
Exemplo n.º 17
0
    async def run(self):
        """
        Build `DeclarativeContent` from the repodata.
        """
        remote_url = self.new_url or self.remote.url
        remote_url = remote_url if remote_url[-1] == "/" else f"{remote_url}/"
        optimize_sync = self.optimize

        progress_data = dict(message='Downloading Metadata Files',
                             code='downloading.metadata')
        with ProgressReport(**progress_data) as metadata_pb:
            downloader = self.remote.get_downloader(
                url=urljoin(remote_url, 'repodata/repomd.xml'))
            # TODO: decide how to distinguish between a mirror list and a normal repo
            result = await downloader.run()
            metadata_pb.increment()

            repomd_path = result.path
            repomd = cr.Repomd(repomd_path)

            # Caution: we are not storing when the remote was last updated, so the order of this
            # logic must remain in this order where we first check the version number as other
            # changes than sync could have taken place such that the date or repo version will be
            # different from last sync
            if (optimize_sync and self.repository.last_sync_remote
                    and self.remote.pk == self.repository.last_sync_remote.pk
                    and (self.repository.last_sync_repo_version
                         == self.repository.latest_version().number)
                    and (self.remote.pulp_last_updated <=
                         self.repository.latest_version().pulp_created)
                    and is_previous_version(
                        repomd.revision,
                        self.repository.last_sync_revision_number)):
                optimize_data = dict(message='Optimizing Sync',
                                     code='optimizing.sync')
                with ProgressReport(**optimize_data) as optimize_pb:
                    optimize_pb.done = 1
                    optimize_pb.save()
                    return

            self.repository.last_sync_revision_number = repomd.revision

            if self.treeinfo:
                d_artifacts = [
                    DeclarativeArtifact(
                        artifact=Artifact(),
                        url=urljoin(remote_url, self.treeinfo["filename"]),
                        relative_path=".treeinfo",
                        remote=self.remote,
                        deferred_download=False,
                    )
                ]
                for path, checksum in self.treeinfo["download"][
                        "images"].items():
                    artifact = Artifact(**checksum)
                    da = DeclarativeArtifact(
                        artifact=artifact,
                        url=urljoin(remote_url, path),
                        relative_path=path,
                        remote=self.remote,
                        deferred_download=self.deferred_download)
                    d_artifacts.append(da)

                distribution_tree = DistributionTree(
                    **self.treeinfo["distribution_tree"])
                dc = DeclarativeContent(content=distribution_tree,
                                        d_artifacts=d_artifacts)
                dc.extra_data = self.treeinfo
                await self.put(dc)

            package_repodata_urls = {}
            downloaders = []
            modulemd_list = list()
            dc_groups = []
            dc_categories = []
            dc_environments = []
            nevra_to_module = defaultdict(dict)
            pkgname_to_groups = defaultdict(list)
            group_to_categories = defaultdict(list)
            group_to_environments = defaultdict(list)
            optionalgroup_to_environments = defaultdict(list)
            modulemd_results = None
            comps_downloader = None
            main_types = set()
            checksums = {}

            for record in repomd.records:
                checksums[record.type] = record.checksum_type.upper()
                if record.type in PACKAGE_REPODATA:
                    main_types.update([record.type])
                    package_repodata_urls[record.type] = urljoin(
                        remote_url, record.location_href)

                elif record.type in UPDATE_REPODATA:
                    updateinfo_url = urljoin(remote_url, record.location_href)
                    downloader = self.remote.get_downloader(url=updateinfo_url)
                    downloaders.append([downloader.run()])

                elif record.type in COMPS_REPODATA:
                    comps_url = urljoin(remote_url, record.location_href)
                    comps_downloader = self.remote.get_downloader(
                        url=comps_url)

                elif record.type in SKIP_REPODATA:
                    continue

                elif '_zck' in record.type:
                    continue

                elif record.type in MODULAR_REPODATA:
                    modules_url = urljoin(remote_url, record.location_href)
                    modulemd_downloader = self.remote.get_downloader(
                        url=modules_url)
                    modulemd_results = await modulemd_downloader.run()

                elif record.type not in PACKAGE_DB_REPODATA:
                    file_data = {
                        record.checksum_type: record.checksum,
                        "size": record.size
                    }
                    da = DeclarativeArtifact(
                        artifact=Artifact(**file_data),
                        url=urljoin(remote_url, record.location_href),
                        relative_path=record.location_href,
                        remote=self.remote,
                        deferred_download=False)
                    repo_metadata_file = RepoMetadataFile(
                        data_type=record.type,
                        checksum_type=record.checksum_type,
                        checksum=record.checksum,
                    )
                    dc = DeclarativeContent(content=repo_metadata_file,
                                            d_artifacts=[da])
                    await self.put(dc)

            missing_type = set(PACKAGE_REPODATA) - main_types
            if missing_type:
                raise FileNotFoundError(
                    _("XML file(s): {filename} not found").format(
                        filename=", ".join(missing_type)))

            self.repository.original_checksum_types = checksums

            # we have to sync module.yaml first if it exists, to make relations to packages
            if modulemd_results:
                modulemd_index = mmdlib.ModuleIndex.new()
                open_func = gzip.open if modulemd_results.url.endswith(
                    '.gz') else open
                with open_func(modulemd_results.path, 'r') as moduleyaml:
                    content = moduleyaml.read()
                    module_content = content if isinstance(
                        content, str) else content.decode()
                    modulemd_index.update_from_string(module_content, True)

                modulemd_names = modulemd_index.get_module_names() or []
                modulemd_all = parse_modulemd(modulemd_names, modulemd_index)

                # Parsing modules happens all at one time, and from here on no useful work happens.
                # So just report that it finished this stage.
                modulemd_pb_data = {
                    'message': 'Parsed Modulemd',
                    'code': 'parsing.modulemds'
                }
                with ProgressReport(**modulemd_pb_data) as modulemd_pb:
                    modulemd_total = len(modulemd_all)
                    modulemd_pb.total = modulemd_total
                    modulemd_pb.done = modulemd_total

                for modulemd in modulemd_all:
                    artifact = modulemd.pop('artifact')
                    relative_path = '{}{}{}{}{}snippet'.format(
                        modulemd[PULP_MODULE_ATTR.NAME],
                        modulemd[PULP_MODULE_ATTR.STREAM],
                        modulemd[PULP_MODULE_ATTR.VERSION],
                        modulemd[PULP_MODULE_ATTR.CONTEXT],
                        modulemd[PULP_MODULE_ATTR.ARCH])
                    da = DeclarativeArtifact(artifact=artifact,
                                             relative_path=relative_path,
                                             url=modules_url)
                    modulemd_content = Modulemd(**modulemd)
                    dc = DeclarativeContent(content=modulemd_content,
                                            d_artifacts=[da])
                    dc.extra_data = defaultdict(list)

                    # dc.content.artifacts are Modulemd artifacts
                    for artifact in dc.content.artifacts:
                        nevra_to_module.setdefault(artifact, set()).add(dc)
                    modulemd_list.append(dc)

                # delete list now that we're done with it for memory savings
                del modulemd_all

                modulemd_default_names = parse_defaults(modulemd_index)

                # Parsing module-defaults happens all at one time, and from here on no useful
                # work happens. So just report that it finished this stage.
                modulemd_defaults_pb_data = {
                    'message': 'Parsed Modulemd-defaults',
                    'code': 'parsing.modulemd_defaults'
                }
                with ProgressReport(
                        **modulemd_defaults_pb_data) as modulemd_defaults_pb:
                    modulemd_defaults_total = len(modulemd_default_names)
                    modulemd_defaults_pb.total = modulemd_defaults_total
                    modulemd_defaults_pb.done = modulemd_defaults_total

                for default in modulemd_default_names:
                    artifact = default.pop('artifact')
                    relative_path = '{}{}snippet'.format(
                        default[PULP_MODULEDEFAULTS_ATTR.MODULE],
                        default[PULP_MODULEDEFAULTS_ATTR.STREAM])
                    da = DeclarativeArtifact(artifact=artifact,
                                             relative_path=relative_path,
                                             url=modules_url)
                    default_content = ModulemdDefaults(**default)
                    dc = DeclarativeContent(content=default_content,
                                            d_artifacts=[da])
                    await self.put(dc)

                # delete list now that we're done with it for memory savings
                del modulemd_default_names

            if comps_downloader:
                comps_result = await comps_downloader.run()

                comps = libcomps.Comps()
                comps.fromxml_f(comps_result.path)

                with ProgressReport(message='Parsed Comps',
                                    code='parsing.comps') as comps_pb:
                    comps_total = (len(comps.groups) + len(comps.categories) +
                                   len(comps.environments))
                    comps_pb.total = comps_total
                    comps_pb.done = comps_total

                if comps.langpacks:
                    langpack_dict = PackageLangpacks.libcomps_to_dict(
                        comps.langpacks)
                    packagelangpack = PackageLangpacks(
                        matches=strdict_to_dict(comps.langpacks),
                        digest=dict_digest(langpack_dict))
                    dc = DeclarativeContent(content=packagelangpack)
                    dc.extra_data = defaultdict(list)
                    await self.put(dc)

                if comps.categories:
                    for category in comps.categories:
                        category_dict = PackageCategory.libcomps_to_dict(
                            category)
                        category_dict['digest'] = dict_digest(category_dict)
                        packagecategory = PackageCategory(**category_dict)
                        dc = DeclarativeContent(content=packagecategory)
                        dc.extra_data = defaultdict(list)

                        if packagecategory.group_ids:
                            for group_id in packagecategory.group_ids:
                                group_to_categories[group_id['name']].append(
                                    dc)
                        dc_categories.append(dc)

                if comps.environments:
                    for environment in comps.environments:
                        environment_dict = PackageEnvironment.libcomps_to_dict(
                            environment)
                        environment_dict['digest'] = dict_digest(
                            environment_dict)
                        packageenvironment = PackageEnvironment(
                            **environment_dict)
                        dc = DeclarativeContent(content=packageenvironment)
                        dc.extra_data = defaultdict(list)

                        if packageenvironment.option_ids:
                            for option_id in packageenvironment.option_ids:
                                optionalgroup_to_environments[
                                    option_id['name']].append(dc)

                        if packageenvironment.group_ids:
                            for group_id in packageenvironment.group_ids:
                                group_to_environments[group_id['name']].append(
                                    dc)

                        dc_environments.append(dc)

                if comps.groups:
                    for group in comps.groups:
                        group_dict = PackageGroup.libcomps_to_dict(group)
                        group_dict['digest'] = dict_digest(group_dict)
                        packagegroup = PackageGroup(**group_dict)
                        dc = DeclarativeContent(content=packagegroup)
                        dc.extra_data = defaultdict(list)

                        if packagegroup.packages:
                            for package in packagegroup.packages:
                                pkgname_to_groups[package['name']].append(dc)

                        if dc.content.id in group_to_categories.keys():
                            for dc_category in group_to_categories[
                                    dc.content.id]:
                                dc.extra_data['category_relations'].append(
                                    dc_category)
                                dc_category.extra_data['packagegroups'].append(
                                    dc)

                        if dc.content.id in group_to_environments.keys():
                            for dc_environment in group_to_environments[
                                    dc.content.id]:
                                dc.extra_data['environment_relations'].append(
                                    dc_environment)
                                dc_environment.extra_data[
                                    'packagegroups'].append(dc)

                        if dc.content.id in optionalgroup_to_environments.keys(
                        ):
                            for dc_environment in optionalgroup_to_environments[
                                    dc.content.id]:
                                dc.extra_data['env_relations_optional'].append(
                                    dc_environment)
                                dc_environment.extra_data[
                                    'optionalgroups'].append(dc)

                        dc_groups.append(dc)

                for dc_category in dc_categories:
                    await self.put(dc_category)

                for dc_environment in dc_environments:
                    await self.put(dc_environment)

            # delete lists now that we're done with them for memory savings
            del dc_environments
            del dc_categories

            # to preserve order, downloaders are created after all repodata urls are identified
            package_repodata_downloaders = []
            for repodata_type in PACKAGE_REPODATA:
                downloader = self.remote.get_downloader(
                    url=package_repodata_urls[repodata_type])
                package_repodata_downloaders.append(downloader.run())

            downloaders.append(package_repodata_downloaders)

            # asyncio.gather is used to preserve the order of results for package repodata
            pending = [
                asyncio.gather(*downloaders_group)
                for downloaders_group in downloaders
            ]

            while pending:
                done, pending = await asyncio.wait(
                    pending, return_when=asyncio.FIRST_COMPLETED)
                for downloader in done:
                    try:
                        results = downloader.result()
                    except ClientResponseError as exc:
                        raise HTTPNotFound(
                            reason=_("File not found: {filename}").format(
                                filename=exc.request_info.url))
                    if results[0].url == package_repodata_urls['primary']:
                        primary_xml_path = results[0].path
                        filelists_xml_path = results[1].path
                        other_xml_path = results[2].path
                        metadata_pb.done += 3
                        metadata_pb.save()

                        packages = await RpmFirstStage.parse_repodata(
                            primary_xml_path, filelists_xml_path,
                            other_xml_path)
                        # skip SRPM if defined
                        if 'srpm' in self.skip_types:
                            packages = {
                                pkgId: pkg
                                for pkgId, pkg in packages.items()
                                if pkg.arch != 'src'
                            }

                        progress_data = {
                            'message': 'Parsed Packages',
                            'code': 'parsing.packages',
                            'total': len(packages),
                        }
                        with ProgressReport(**progress_data) as packages_pb:
                            for pkg in packages.values():
                                package = Package(
                                    **Package.createrepo_to_dict(pkg))
                                artifact = Artifact(size=package.size_package)
                                checksum_type = getattr(
                                    CHECKSUM_TYPES,
                                    package.checksum_type.upper())
                                setattr(artifact, checksum_type, package.pkgId)
                                url = urljoin(remote_url,
                                              package.location_href)
                                filename = os.path.basename(
                                    package.location_href)
                                da = DeclarativeArtifact(
                                    artifact=artifact,
                                    url=url,
                                    relative_path=filename,
                                    remote=self.remote,
                                    deferred_download=self.deferred_download)
                                dc = DeclarativeContent(content=package,
                                                        d_artifacts=[da])
                                dc.extra_data = defaultdict(list)

                                # find if a package relates to a modulemd
                                if dc.content.nevra in nevra_to_module.keys():
                                    dc.content.is_modular = True
                                    for dc_modulemd in nevra_to_module[
                                            dc.content.nevra]:
                                        dc.extra_data[
                                            'modulemd_relation'].append(
                                                dc_modulemd)
                                        dc_modulemd.extra_data[
                                            'package_relation'].append(dc)

                                if dc.content.name in pkgname_to_groups.keys():
                                    for dc_group in pkgname_to_groups[
                                            dc.content.name]:
                                        dc.extra_data[
                                            'group_relations'].append(dc_group)
                                        dc_group.extra_data[
                                            'related_packages'].append(dc)

                                packages_pb.increment()
                                await self.put(dc)

                    elif results[0].url == updateinfo_url:
                        updateinfo_xml_path = results[0].path
                        metadata_pb.increment()

                        updates = await RpmFirstStage.parse_updateinfo(
                            updateinfo_xml_path)

                        progress_data = {
                            'message': 'Parsed Advisories',
                            'code': 'parsing.advisories',
                            'total': len(updates),
                        }
                        with ProgressReport(**progress_data) as advisories_pb:
                            for update in updates:
                                update_record = UpdateRecord(
                                    **UpdateRecord.createrepo_to_dict(update))
                                update_record.digest = hash_update_record(
                                    update)
                                future_relations = {
                                    'collections': defaultdict(list),
                                    'references': []
                                }

                                for collection in update.collections:
                                    coll_dict = UpdateCollection.createrepo_to_dict(
                                        collection)
                                    coll = UpdateCollection(**coll_dict)

                                    for package in collection.packages:
                                        pkg_dict = UpdateCollectionPackage.createrepo_to_dict(
                                            package)
                                        pkg = UpdateCollectionPackage(
                                            **pkg_dict)
                                        future_relations['collections'][
                                            coll].append(pkg)

                                for reference in update.references:
                                    reference_dict = UpdateReference.createrepo_to_dict(
                                        reference)
                                    ref = UpdateReference(**reference_dict)
                                    future_relations['references'].append(ref)

                                advisories_pb.increment()
                                dc = DeclarativeContent(content=update_record)
                                dc.extra_data = future_relations
                                await self.put(dc)

            # now send modules down the pipeline since all relations have been set up
            for modulemd in modulemd_list:
                await self.put(modulemd)

            for dc_group in dc_groups:
                await self.put(dc_group)
Exemplo n.º 18
0
def publish(
    repository_version_pk,
    gpgcheck_options=None,
    metadata_signing_service=None,
    checksum_types=None,
    sqlite_metadata=False,
):
    """
    Create a Publication based on a RepositoryVersion.

    Args:
        repository_version_pk (str): Create a publication from this repository version.
        gpgcheck_options (dict): GPG signature check options.
        metadata_signing_service (pulpcore.app.models.AsciiArmoredDetachedSigningService):
            A reference to an associated signing service.
        checksum_types (dict): Checksum types for metadata and packages.
        sqlite_metadata (bool): Whether to generate metadata files in sqlite format.

    """
    repository_version = RepositoryVersion.objects.get(pk=repository_version_pk)
    repository = repository_version.repository.cast()
    checksum_types = checksum_types or {}

    if metadata_signing_service:
        metadata_signing_service = AsciiArmoredDetachedSigningService.objects.get(
            pk=metadata_signing_service
        )

    checksum_types["original"] = repository.original_checksum_types

    log.info(
        _("Publishing: repository={repo}, version={version}").format(
            repo=repository.name,
            version=repository_version.number,
        )
    )
    with tempfile.TemporaryDirectory("."):
        with RpmPublication.create(repository_version) as publication:
            kwargs = {}
            first_package = repository_version.content.filter(
                pulp_type=Package.get_pulp_type()
            ).first()
            if first_package:
                kwargs["default"] = first_package.cast().checksum_type
            publication.metadata_checksum_type = get_checksum_type(
                "primary", checksum_types, **kwargs
            )
            publication.package_checksum_type = (
                checksum_types.get("package") or publication.metadata_checksum_type
            )

            if gpgcheck_options is not None:
                publication.gpgcheck = gpgcheck_options.get("gpgcheck")
                publication.repo_gpgcheck = gpgcheck_options.get("repo_gpgcheck")

            if sqlite_metadata:
                publication.sqlite_metadata = True

            publication_data = PublicationData(publication)
            publication_data.populate()

            total_repos = 1 + len(publication_data.sub_repos)
            pb_data = dict(
                message="Generating repository metadata",
                code="publish.generating_metadata",
                total=total_repos,
            )
            with ProgressReport(**pb_data) as publish_pb:

                content = publication.repository_version.content

                # Main repo
                generate_repo_metadata(
                    content,
                    publication,
                    checksum_types,
                    publication_data.repomdrecords,
                    metadata_signing_service=metadata_signing_service,
                )
                publish_pb.increment()

                for sub_repo in publication_data.sub_repos:
                    name = sub_repo[0]
                    checksum_types["original"] = getattr(publication_data, f"{name}_checksums")
                    content = getattr(publication_data, f"{name}_content")
                    extra_repomdrecords = getattr(publication_data, f"{name}_repomdrecords")
                    generate_repo_metadata(
                        content,
                        publication,
                        checksum_types,
                        extra_repomdrecords,
                        name,
                        metadata_signing_service=metadata_signing_service,
                    )
                    publish_pb.increment()

            log.info(_("Publication: {publication} created").format(publication=publication.pk))

            return publication
Exemplo n.º 19
0
    def publish_artifacts(self, content, prefix=""):
        """
        Publish artifacts.

        Args:
            content (pulpcore.plugin.models.Content): content set.
            prefix (str): a relative path prefix for the published artifact

        """
        published_artifacts = []

        # Special case for Packages
        contentartifact_qs = ContentArtifact.objects.filter(content__in=content).filter(
            content__pulp_type=Package.get_pulp_type()
        )

        paths = set()
        duplicated_paths = []
        for content_artifact in contentartifact_qs.values("pk", "relative_path").iterator():
            relative_path = content_artifact["relative_path"]
            relative_path = os.path.join(
                prefix, PACKAGES_DIRECTORY, relative_path.lower()[0], relative_path
            )
            #
            # Some Suboptimal Repos have the 'same' artifact living in multiple places.
            # Specifically, the same NEVRA, in more than once place, **with different checksums**
            # (since if all that was different was location_href there would be only one
            # ContentArtifact in the first place).
            #
            # pulp_rpm wants to publish a 'canonical' repository-layout, under which an RPM
            # "name-version-release-arch" appears at "Packages/n/name-version-release-arch.rpm".
            # Because the assumption is that Packages don't "own" their path, only the filename
            # is kept as relative_path.
            #
            # In this case, we have to pick one - which is essentially what the rest of the RPM
            # Ecosystem does when faced with the impossible. This code takes the first-found. We
            # could implement something more complicated, if there are better options
            # (choose by last-created maybe?)
            #
            # Note that this only impacts user-created publications, which produce the "standard"
            # RPM layout of repo/Packages/f/foo.rpm. A publication created by mirror-sync retains
            # whatever layout their "upstream" repo-metadata dictates.
            #
            if relative_path in paths:
                duplicated_paths.append(f'{relative_path}:{content_artifact["pk"]}')
                continue
            else:
                paths.add(relative_path)
            published_artifacts.append(
                PublishedArtifact(
                    relative_path=relative_path,
                    publication=self.publication,
                    content_artifact_id=content_artifact["pk"],
                )
            )
        if duplicated_paths:
            log.warning(
                _("Duplicate paths found at publish : {problems} ").format(
                    problems="; ".join(duplicated_paths)
                )
            )

        # Handle everything else
        is_treeinfo = Q(relative_path__in=["treeinfo", ".treeinfo"])
        unpublishable_types = Q(
            content__pulp_type__in=[
                RepoMetadataFile.get_pulp_type(),
                Modulemd.get_pulp_type(),
                ModulemdDefaults.get_pulp_type(),
                # already dealt with
                Package.get_pulp_type(),
            ]
        )

        contentartifact_qs = (
            ContentArtifact.objects.filter(content__in=content)
            .exclude(unpublishable_types)
            .exclude(is_treeinfo)
        )

        for content_artifact in contentartifact_qs.values("pk", "relative_path").iterator():
            published_artifacts.append(
                PublishedArtifact(
                    relative_path=content_artifact["relative_path"],
                    publication=self.publication,
                    content_artifact_id=content_artifact["pk"],
                )
            )

        PublishedArtifact.objects.bulk_create(published_artifacts, batch_size=2000)
Exemplo n.º 20
0
def find_children_of_content(content, src_repo_version):
    """Finds the content referenced directly by other content and returns it all together.

    Finds RPMs referenced by Advisory/Errata content.

    Args:
        content (Queryset): Content for which to resolve children
        src_repo_version (pulpcore.models.RepositoryVersion): Source repo version

    Returns: Queryset of Content objects that are children of the intial set of content
    """
    # Content that were selected to be copied
    advisory_ids = content.filter(
        pulp_type=UpdateRecord.get_pulp_type()).only("pk")
    packagecategory_ids = content.filter(
        pulp_type=PackageCategory.get_pulp_type()).only("pk")
    packageenvironment_ids = content.filter(
        pulp_type=PackageEnvironment.get_pulp_type()).only("pk")
    packagegroup_ids = content.filter(
        pulp_type=PackageGroup.get_pulp_type()).only("pk")

    # Content in the source repository version
    package_ids = src_repo_version.content.filter(
        pulp_type=Package.get_pulp_type()).only("pk")
    module_ids = src_repo_version.content.filter(
        pulp_type=Modulemd.get_pulp_type()).only("pk")

    advisories = UpdateRecord.objects.filter(pk__in=advisory_ids)
    packages = Package.objects.filter(pk__in=package_ids)
    packagecategories = PackageCategory.objects.filter(
        pk__in=packagecategory_ids)
    packageenvironments = PackageEnvironment.objects.filter(
        pk__in=packageenvironment_ids)
    packagegroups = PackageGroup.objects.filter(pk__in=packagegroup_ids)
    modules = Modulemd.objects.filter(pk__in=module_ids)

    children = set()

    for advisory in advisories.iterator():
        # Find rpms referenced by Advisories/Errata
        package_nevras = advisory.get_pkglist()
        advisory_package_q = Q(pk__in=[])
        for nevra in package_nevras:
            (name, epoch, version, release, arch) = nevra
            advisory_package_q |= Q(name=name,
                                    epoch=epoch,
                                    version=version,
                                    release=release,
                                    arch=arch)
        children.update(
            packages.filter(advisory_package_q).values_list("pk", flat=True))

        module_nsvcas = advisory.get_module_list()
        advisory_module_q = Q(pk__in=[])
        for nsvca in module_nsvcas:
            (name, stream, version, context, arch) = nsvca
            advisory_module_q |= Q(name=name,
                                   stream=stream,
                                   version=version,
                                   context=context,
                                   arch=arch)
        children.update(
            modules.filter(advisory_module_q).values_list("pk", flat=True))

    # PackageCategories & PackageEnvironments resolution must go before PackageGroups
    packagegroup_names = set()
    for packagecategory in packagecategories.iterator():
        for group_id in packagecategory.group_ids:
            packagegroup_names.add(group_id["name"])

    for packageenvironment in packageenvironments.iterator():
        for group_id in packageenvironment.group_ids:
            packagegroup_names.add(group_id["name"])
        for group_id in packageenvironment.option_ids:
            packagegroup_names.add(group_id["name"])

    child_package_groups = PackageGroup.objects.filter(
        name__in=packagegroup_names, pk__in=src_repo_version.content)
    children.update([pkggroup.pk for pkggroup in child_package_groups])
    packagegroups = packagegroups.union(child_package_groups)

    # Find rpms referenced by PackageGroups
    packagegroup_package_names = set()
    for packagegroup in packagegroups.iterator():
        packagegroup_package_names |= set(pkg["name"]
                                          for pkg in packagegroup.packages)

    # TODO: do modular/nonmodular need to be taken into account?
    existing_package_names = (Package.objects.filter(
        name__in=packagegroup_package_names,
        pk__in=content,
    ).values_list("name", flat=True).distinct())

    missing_package_names = packagegroup_package_names - set(
        existing_package_names)

    needed_packages = Package.objects.with_age().filter(
        name__in=missing_package_names, pk__in=src_repo_version.content)

    # Pick the latest version of each package available which isn't already present
    # in the content set.
    for pkg in needed_packages.iterator():
        if pkg.age == 1:
            children.add(pkg.pk)

    return Content.objects.filter(pk__in=children)
Exemplo n.º 21
0
    async def run(self):
        """
        Build `DeclarativeContent` from the repodata.
        """
        packages_pb = ProgressBar(message='Parsed Packages')
        erratum_pb = ProgressBar(message='Parsed Erratum')

        packages_pb.save()
        erratum_pb.save()

        with ProgressBar(message='Downloading Metadata Files') as metadata_pb:
            downloader = self.remote.get_downloader(
                url=urljoin(self.remote.url, 'repodata/repomd.xml'))
            # TODO: decide how to distinguish between a mirror list and a normal repo
            result = await downloader.run()
            metadata_pb.increment()

            repomd_path = result.path
            repomd = cr.Repomd(repomd_path)
            package_repodata_urls = {}
            downloaders = []

            for record in repomd.records:
                if record.type in PACKAGE_REPODATA:
                    package_repodata_urls[record.type] = urljoin(
                        self.remote.url, record.location_href)
                elif record.type in UPDATE_REPODATA:
                    updateinfo_url = urljoin(self.remote.url,
                                             record.location_href)
                    downloader = self.remote.get_downloader(url=updateinfo_url)
                    downloaders.append([downloader.run()])
                else:
                    log.info(
                        _('Unknown repodata type: {t}. Skipped.').format(
                            t=record.type))
                    # TODO: skip databases, save unknown types to publish them as-is

            # to preserve order, downloaders are created after all repodata urls are identified
            package_repodata_downloaders = []
            for repodata_type in PACKAGE_REPODATA:
                downloader = self.remote.get_downloader(
                    url=package_repodata_urls[repodata_type])
                package_repodata_downloaders.append(downloader.run())

            downloaders.append(package_repodata_downloaders)

            # asyncio.gather is used to preserve the order of results for package repodata
            pending = [
                asyncio.gather(*downloaders_group)
                for downloaders_group in downloaders
            ]

            while pending:
                done, pending = await asyncio.wait(
                    pending, return_when=asyncio.FIRST_COMPLETED)
                for downloader in done:
                    results = downloader.result()
                    if results[0].url == package_repodata_urls['primary']:
                        primary_xml_path = results[0].path
                        filelists_xml_path = results[1].path
                        other_xml_path = results[2].path
                        metadata_pb.done += 3
                        metadata_pb.save()

                        packages = await RpmFirstStage.parse_repodata(
                            primary_xml_path, filelists_xml_path,
                            other_xml_path)
                        packages_pb.total = len(packages)
                        packages_pb.state = 'running'
                        packages_pb.save()

                        for pkg in packages.values():
                            package = Package(
                                **Package.createrepo_to_dict(pkg))
                            artifact = Artifact(size=package.size_package)
                            checksum_type = getattr(
                                CHECKSUM_TYPES, package.checksum_type.upper())
                            setattr(artifact, checksum_type, package.pkgId)
                            url = urljoin(self.remote.url,
                                          package.location_href)
                            filename = os.path.basename(package.location_href)
                            da = DeclarativeArtifact(
                                artifact=artifact,
                                url=url,
                                relative_path=filename,
                                remote=self.remote,
                                deferred_download=self.deferred_download)
                            dc = DeclarativeContent(content=package,
                                                    d_artifacts=[da])
                            packages_pb.increment()
                            await self.put(dc)

                    elif results[0].url == updateinfo_url:
                        updateinfo_xml_path = results[0].path
                        metadata_pb.increment()

                        updates = await RpmFirstStage.parse_updateinfo(
                            updateinfo_xml_path)

                        erratum_pb.total = len(updates)
                        erratum_pb.state = 'running'
                        erratum_pb.save()

                        for update in updates:
                            update_record = UpdateRecord(
                                **UpdateRecord.createrepo_to_dict(update))
                            update_record.digest = RpmFirstStage.hash_update_record(
                                update)
                            future_relations = {
                                'collections': defaultdict(list),
                                'references': []
                            }

                            for collection in update.collections:
                                coll_dict = UpdateCollection.createrepo_to_dict(
                                    collection)
                                coll = UpdateCollection(**coll_dict)

                                for package in collection.packages:
                                    pkg_dict = UpdateCollectionPackage.createrepo_to_dict(
                                        package)
                                    pkg = UpdateCollectionPackage(**pkg_dict)
                                    future_relations['collections'][
                                        coll].append(pkg)

                            for reference in update.references:
                                reference_dict = UpdateReference.createrepo_to_dict(
                                    reference)
                                ref = UpdateReference(**reference_dict)
                                future_relations['references'].append(ref)

                            erratum_pb.increment()
                            dc = DeclarativeContent(content=update_record)
                            dc.extra_data = future_relations
                            await self.put(dc)

        packages_pb.state = 'completed'
        erratum_pb.state = 'completed'
        packages_pb.save()
        erratum_pb.save()
Exemplo n.º 22
0
    def publish_artifacts(self, content, prefix=""):
        """
        Publish artifacts.

        Args:
            content (pulpcore.plugin.models.Content): content set.
            prefix (str): a relative path prefix for the published artifact

        """
        published_artifacts = []

        # Special case for Packages
        contentartifact_qs = (ContentArtifact.objects.filter(
            content__in=content).filter(
                content__pulp_type=Package.get_pulp_type()).select_related(
                    "content__rpm_package__time_build"))

        rel_path_mapping = defaultdict(list)
        # Some Suboptimal Repos have the 'same' artifact living in multiple places.
        # Specifically, the same NEVRA, in more than once place, **with different checksums**
        # (since if all that was different was location_href there would be only one
        # ContentArtifact in the first place).
        #
        # pulp_rpm wants to publish a 'canonical' repository-layout, under which an RPM
        # "name-version-release-arch" appears at "Packages/n/name-version-release-arch.rpm".
        # Because the assumption is that Packages don't "own" their path, only the filename
        # is kept as relative_path.
        #
        # In this case, we have to pick one - which is essentially what the rest of the RPM
        # Ecosystem does when faced with the impossible. This code takes the one with the
        # most recent build time which is the same heuristic used by Yum/DNF/Zypper.
        #
        # Note that this only impacts user-created publications, which produce the "standard"
        # RPM layout of repo/Packages/f/foo.rpm. A publication created by mirror-sync retains
        # whatever layout their "upstream" repo-metadata dictates.
        fields = ["pk", "relative_path", "content__rpm_package__time_build"]
        for content_artifact in contentartifact_qs.values(*fields).iterator():
            relative_path = content_artifact["relative_path"]
            time_build = content_artifact["content__rpm_package__time_build"]

            relative_path = os.path.join(prefix, PACKAGES_DIRECTORY,
                                         relative_path.lower()[0],
                                         relative_path)
            rel_path_mapping[relative_path].append(
                (content_artifact["pk"], time_build))

        for rel_path, content_artifacts in rel_path_mapping.items():
            # sort the content artifacts by when the package was built
            if len(content_artifacts) > 1:
                content_artifacts.sort(key=lambda p: p[1], reverse=True)
                log.warning(
                    "Duplicate packages found competing for {path}, selected the one with "
                    "the most recent build time, excluding {others} others.".
                    format(path=rel_path, others=len(content_artifacts[1:])))

            # Only add the first one (the one with the highest build time)
            published_artifacts.append(
                PublishedArtifact(
                    relative_path=rel_path,
                    publication=self.publication,
                    content_artifact_id=content_artifacts[0][0],
                ))

        # Handle everything else
        is_treeinfo = Q(relative_path__in=["treeinfo", ".treeinfo"])
        unpublishable_types = Q(content__pulp_type__in=[
            RepoMetadataFile.get_pulp_type(),
            Modulemd.get_pulp_type(),
            ModulemdDefaults.get_pulp_type(),
            # already dealt with
            Package.get_pulp_type(),
        ])

        contentartifact_qs = (ContentArtifact.objects.filter(
            content__in=content).exclude(unpublishable_types).exclude(
                is_treeinfo))

        for content_artifact in contentartifact_qs.values(
                "pk", "relative_path").iterator():
            published_artifacts.append(
                PublishedArtifact(
                    relative_path=content_artifact["relative_path"],
                    publication=self.publication,
                    content_artifact_id=content_artifact["pk"],
                ))

        PublishedArtifact.objects.bulk_create(published_artifacts,
                                              batch_size=2000)
Exemplo n.º 23
0
    async def __call__(self, in_q, out_q):
        """
        Build `DeclarativeContent` from the repodata.

        Args:
            in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue.
            out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to

        """
        with ProgressBar(message='Downloading and Parsing Metadata') as pb:
            downloader = self.remote.get_downloader(
                urljoin(self.remote.url, 'repodata/repomd.xml'))
            # TODO: decide how to distinguish between a mirror list and a normal repo
            result = await downloader.run()
            pb.increment()

            repomd_path = result.path
            repomd = cr.Repomd(repomd_path)
            package_repodata_urls = {}
            downloaders = []

            for record in repomd.records:
                if record.type in PACKAGE_REPODATA:
                    package_repodata_urls[record.type] = urljoin(
                        self.remote.url, record.location_href)
                elif record.type in UPDATE_REPODATA:
                    updateinfo_url = urljoin(self.remote.url,
                                             record.location_href)
                    downloader = self.remote.get_downloader(updateinfo_url)
                    downloaders.append([downloader.run()])
                else:
                    log.info(
                        _('Unknown repodata type: {t}. Skipped.').format(
                            t=record.type))
                    # TODO: skip databases, save unknown types to publish them as-is

            # to preserve order, downloaders are created after all repodata urls are identified
            package_repodata_downloaders = []
            for repodata_type in PACKAGE_REPODATA:
                downloader = self.remote.get_downloader(
                    package_repodata_urls[repodata_type])
                package_repodata_downloaders.append(downloader.run())

            downloaders.append(package_repodata_downloaders)

            # asyncio.gather is used to preserve the order of results for package repodata
            pending = [
                asyncio.gather(*downloaders_group)
                for downloaders_group in downloaders
            ]

            while pending:
                done, pending = await asyncio.wait(
                    pending, return_when=asyncio.FIRST_COMPLETED)
                for downloader in done:
                    results = downloader.result()
                    if results[0].url == package_repodata_urls['primary']:
                        primary_xml_path = results[0].path
                        filelists_xml_path = results[1].path
                        other_xml_path = results[2].path
                        pb.done += 3
                        pb.save()

                        packages = await RpmFirstStage.parse_repodata(
                            primary_xml_path, filelists_xml_path,
                            other_xml_path)
                        for pkg in packages.values():
                            package = Package(
                                **Package.createrepo_to_dict(pkg))
                            artifact = Artifact(size=package.size_package)
                            checksum_type = getattr(
                                CHECKSUM_TYPES, package.checksum_type.upper())
                            setattr(artifact, checksum_type, package.pkgId)
                            url = urljoin(self.remote.url,
                                          package.location_href)
                            da = DeclarativeArtifact(artifact, url,
                                                     package.location_href,
                                                     self.remote)
                            dc = DeclarativeContent(content=package,
                                                    d_artifacts=[da])
                            await out_q.put(dc)

                    elif results[0].url == updateinfo_url:
                        updateinfo_xml_path = results[0].path
                        pb.increment()

                        updates = await RpmFirstStage.parse_updateinfo(
                            updateinfo_xml_path)
                        for update in updates:
                            update_record = UpdateRecord(
                                **UpdateRecord.createrepo_to_dict(update))
                            update_record.digest = RpmFirstStage.hash_update_record(
                                update)

                            for collection in update.collections:
                                coll_dict = UpdateCollection.createrepo_to_dict(
                                    collection)
                                coll = UpdateCollection(**coll_dict)

                                for package in collection.packages:
                                    pkg_dict = UpdateCollectionPackage.createrepo_to_dict(
                                        package)
                                    pkg = UpdateCollectionPackage(**pkg_dict)
                                    coll._packages.append(pkg)

                                update_record._collections.append(coll)

                            for reference in update.references:
                                reference_dict = UpdateReference.createrepo_to_dict(
                                    reference)
                                update_record._references.append(
                                    UpdateReference(**reference_dict))

                            dc = DeclarativeContent(content=update_record)
                            await out_q.put(dc)

        await out_q.put(None)
Exemplo n.º 24
0
    async def run(self):
        """
        Build `DeclarativeContent` from the repodata.
        """
        with ProgressBar(message='Downloading and Parsing Metadata') as pb:
            downloader = self.remote.get_downloader(
                url=urljoin(self.remote.url, 'repodata/repomd.xml')
            )
            # TODO: decide how to distinguish between a mirror list and a normal repo
            result = await downloader.run()
            pb.increment()

            repomd_path = result.path
            repomd = cr.Repomd(repomd_path)
            package_repodata_urls = {}
            downloaders = []

            for record in repomd.records:
                if record.type in PACKAGE_REPODATA:
                    package_repodata_urls[record.type] = urljoin(self.remote.url,
                                                                 record.location_href)
                elif record.type in UPDATE_REPODATA:
                    updateinfo_url = urljoin(self.remote.url, record.location_href)
                    downloader = self.remote.get_downloader(url=updateinfo_url)
                    downloaders.append([downloader.run()])
                else:
                    log.info(_('Unknown repodata type: {t}. Skipped.').format(t=record.type))
                    # TODO: skip databases, save unknown types to publish them as-is

            # to preserve order, downloaders are created after all repodata urls are identified
            package_repodata_downloaders = []
            for repodata_type in PACKAGE_REPODATA:
                downloader = self.remote.get_downloader(url=package_repodata_urls[repodata_type])
                package_repodata_downloaders.append(downloader.run())

            downloaders.append(package_repodata_downloaders)

            # asyncio.gather is used to preserve the order of results for package repodata
            pending = [asyncio.gather(*downloaders_group) for downloaders_group in downloaders]

            while pending:
                done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
                for downloader in done:
                    results = downloader.result()
                    if results[0].url == package_repodata_urls['primary']:
                        primary_xml_path = results[0].path
                        filelists_xml_path = results[1].path
                        other_xml_path = results[2].path
                        pb.done += 3
                        pb.save()

                        packages = await RpmFirstStage.parse_repodata(primary_xml_path,
                                                                      filelists_xml_path,
                                                                      other_xml_path)
                        for pkg in packages.values():
                            package = Package(**Package.createrepo_to_dict(pkg))
                            artifact = Artifact(size=package.size_package)
                            checksum_type = getattr(CHECKSUM_TYPES, package.checksum_type.upper())
                            setattr(artifact, checksum_type, package.pkgId)
                            url = urljoin(self.remote.url, package.location_href)
                            filename = os.path.basename(package.location_href)
                            da = DeclarativeArtifact(
                                artifact=artifact,
                                url=url,
                                relative_path=filename,
                                remote=self.remote,
                                deferred_download=self.deferred_download
                            )
                            dc = DeclarativeContent(content=package, d_artifacts=[da])
                            await self.put(dc)

                    elif results[0].url == updateinfo_url:
                        updateinfo_xml_path = results[0].path
                        pb.increment()

                        updates = await RpmFirstStage.parse_updateinfo(updateinfo_xml_path)
                        for update in updates:
                            update_record = UpdateRecord(**UpdateRecord.createrepo_to_dict(update))
                            update_record.digest = RpmFirstStage.hash_update_record(update)

                            for collection in update.collections:
                                coll_dict = UpdateCollection.createrepo_to_dict(collection)
                                coll = UpdateCollection(**coll_dict)

                                for package in collection.packages:
                                    pkg_dict = UpdateCollectionPackage.createrepo_to_dict(package)
                                    pkg = UpdateCollectionPackage(**pkg_dict)
                                    coll._packages.append(pkg)

                                update_record._collections.append(coll)

                            for reference in update.references:
                                reference_dict = UpdateReference.createrepo_to_dict(reference)
                                update_record._references.append(UpdateReference(**reference_dict))

                            dc = DeclarativeContent(content=update_record)
                            await self.put(dc)
Exemplo n.º 25
0
    async def run(self):
        """
        Build `DeclarativeContent` from the repodata.
        """
        packages_pb = ProgressReport(message='Parsed Packages',
                                     code='parsing.packages')
        errata_pb = ProgressReport(message='Parsed Erratum',
                                   code='parsing.errata')
        modulemd_pb = ProgressReport(message='Parse Modulemd',
                                     code='parsing.modulemds')
        modulemd_defaults_pb = ProgressReport(
            message='Parse Modulemd-defaults', code='parsing.modulemddefaults')
        comps_pb = ProgressReport(message='Parsed Comps', code='parsing.comps')

        packages_pb.save()
        errata_pb.save()
        comps_pb.save()

        remote_url = self.new_url or self.remote.url
        remote_url = remote_url if remote_url[-1] == "/" else f"{remote_url}/"

        progress_data = dict(message='Downloading Metadata Files',
                             code='downloading.metadata')
        with ProgressReport(**progress_data) as metadata_pb:
            downloader = self.remote.get_downloader(
                url=urljoin(remote_url, 'repodata/repomd.xml'))
            # TODO: decide how to distinguish between a mirror list and a normal repo
            result = await downloader.run()
            metadata_pb.increment()

            if self.kickstart:
                d_artifacts = []
                for path, checksum in self.kickstart["download"][
                        "images"].items():
                    artifact = Artifact(**checksum)

                    da = DeclarativeArtifact(
                        artifact=artifact,
                        url=urljoin(remote_url, path),
                        relative_path=path,
                        remote=self.remote,
                        deferred_download=self.deferred_download)

                    d_artifacts.append(da)

                distribution_tree = DistributionTree(
                    **self.kickstart["distribution_tree"])
                dc = DeclarativeContent(content=distribution_tree,
                                        d_artifacts=d_artifacts)
                dc.extra_data = self.kickstart
                await self.put(dc)

            repomd_path = result.path
            repomd = cr.Repomd(repomd_path)
            package_repodata_urls = {}
            downloaders = []
            modulemd_list = list()
            dc_groups = []
            dc_categories = []
            dc_environments = []
            nevra_to_module = defaultdict(dict)
            pkgname_to_groups = defaultdict(list)
            group_to_categories = defaultdict(list)
            group_to_environments = defaultdict(list)
            optionalgroup_to_environments = defaultdict(list)
            modulemd_results = None
            comps_downloader = None

            for record in repomd.records:
                if record.type in PACKAGE_REPODATA:
                    package_repodata_urls[record.type] = urljoin(
                        remote_url, record.location_href)
                elif record.type in UPDATE_REPODATA:
                    updateinfo_url = urljoin(remote_url, record.location_href)
                    downloader = self.remote.get_downloader(url=updateinfo_url)
                    downloaders.append([downloader.run()])

                elif record.type in COMPS_REPODATA:
                    comps_url = urljoin(remote_url, record.location_href)
                    comps_downloader = self.remote.get_downloader(
                        url=comps_url)

                elif record.type in SKIP_REPODATA:
                    continue

                elif record.type in MODULAR_REPODATA:
                    modules_url = urljoin(remote_url, record.location_href)
                    modulemd_downloader = self.remote.get_downloader(
                        url=modules_url)
                    modulemd_results = await modulemd_downloader.run()

                elif record.type not in PACKAGE_DB_REPODATA:
                    file_data = {
                        record.checksum_type: record.checksum,
                        "size": record.size
                    }
                    da = DeclarativeArtifact(
                        artifact=Artifact(**file_data),
                        url=urljoin(remote_url, record.location_href),
                        relative_path=record.location_href,
                        remote=self.remote,
                        deferred_download=False)
                    repo_metadata_file = RepoMetadataFile(
                        data_type=record.type,
                        checksum_type=record.checksum_type,
                        checksum=record.checksum,
                    )
                    dc = DeclarativeContent(content=repo_metadata_file,
                                            d_artifacts=[da])
                    await self.put(dc)

            # we have to sync module.yaml first if it exists, to make relations to packages
            if modulemd_results:
                modulemd_index = mmdlib.ModuleIndex.new()
                open_func = gzip.open if modulemd_results.url.endswith(
                    '.gz') else open
                with open_func(modulemd_results.path, 'r') as moduleyaml:
                    modulemd_index.update_from_string(
                        moduleyaml.read().decode(), True)

                modulemd_names = modulemd_index.get_module_names() or []
                modulemd_all = parse_modulemd(modulemd_names, modulemd_index)

                modulemd_pb.total = len(modulemd_all)
                modulemd_pb.state = 'running'
                modulemd_pb.save()

                for modulemd in modulemd_all:
                    artifact = modulemd.pop('artifact')
                    relative_path = '{}{}{}{}{}snippet'.format(
                        modulemd[PULP_MODULE_ATTR.NAME],
                        modulemd[PULP_MODULE_ATTR.STREAM],
                        modulemd[PULP_MODULE_ATTR.VERSION],
                        modulemd[PULP_MODULE_ATTR.CONTEXT],
                        modulemd[PULP_MODULE_ATTR.ARCH])
                    da = DeclarativeArtifact(artifact=artifact,
                                             relative_path=relative_path,
                                             url=modules_url)
                    modulemd_content = Modulemd(**modulemd)
                    dc = DeclarativeContent(content=modulemd_content,
                                            d_artifacts=[da])
                    dc.extra_data = defaultdict(list)

                    # dc.content.artifacts are Modulemd artifacts
                    for artifact in json.loads(dc.content.artifacts):
                        nevra_to_module.setdefault(artifact, set()).add(dc)
                    modulemd_list.append(dc)

                modulemd_default_names = parse_defaults(modulemd_index)

                modulemd_defaults_pb.total = len(modulemd_default_names)
                modulemd_defaults_pb.state = 'running'
                modulemd_defaults_pb.save()

                for default in modulemd_default_names:
                    artifact = default.pop('artifact')
                    relative_path = '{}{}snippet'.format(
                        default[PULP_MODULEDEFAULTS_ATTR.MODULE],
                        default[PULP_MODULEDEFAULTS_ATTR.STREAM])
                    da = DeclarativeArtifact(artifact=artifact,
                                             relative_path=relative_path,
                                             url=modules_url)
                    default_content = ModulemdDefaults(**default)
                    modulemd_defaults_pb.increment()
                    dc = DeclarativeContent(content=default_content,
                                            d_artifacts=[da])
                    await self.put(dc)

            if comps_downloader:
                comps_result = await comps_downloader.run()

                comps = libcomps.Comps()
                comps.fromxml_f(comps_result.path)

                comps_pb.total = (len(comps.groups) + len(comps.categories) +
                                  len(comps.environments))
                comps_pb.state = 'running'
                comps_pb.save()

                if comps.langpacks:
                    langpack_dict = PackageLangpacks.libcomps_to_dict(
                        comps.langpacks)
                    packagelangpack = PackageLangpacks(
                        matches=strdict_to_dict(comps.langpacks),
                        digest=dict_digest(langpack_dict))
                    dc = DeclarativeContent(content=packagelangpack)
                    dc.extra_data = defaultdict(list)
                    await self.put(dc)

                if comps.categories:
                    for category in comps.categories:
                        category_dict = PackageCategory.libcomps_to_dict(
                            category)
                        category_dict['digest'] = dict_digest(category_dict)
                        packagecategory = PackageCategory(**category_dict)
                        dc = DeclarativeContent(content=packagecategory)
                        dc.extra_data = defaultdict(list)

                        if packagecategory.group_ids:
                            for group_id in packagecategory.group_ids:
                                group_to_categories[group_id['name']].append(
                                    dc)
                        dc_categories.append(dc)

                if comps.environments:
                    for environment in comps.environments:
                        environment_dict = PackageEnvironment.libcomps_to_dict(
                            environment)
                        environment_dict['digest'] = dict_digest(
                            environment_dict)
                        packageenvironment = PackageEnvironment(
                            **environment_dict)
                        dc = DeclarativeContent(content=packageenvironment)
                        dc.extra_data = defaultdict(list)

                        if packageenvironment.option_ids:
                            for option_id in packageenvironment.option_ids:
                                optionalgroup_to_environments[
                                    option_id['name']].append(dc)

                        if packageenvironment.group_ids:
                            for group_id in packageenvironment.group_ids:
                                group_to_environments[group_id['name']].append(
                                    dc)

                        dc_environments.append(dc)

                if comps.groups:
                    for group in comps.groups:
                        group_dict = PackageGroup.libcomps_to_dict(group)
                        group_dict['digest'] = dict_digest(group_dict)
                        packagegroup = PackageGroup(**group_dict)
                        dc = DeclarativeContent(content=packagegroup)
                        dc.extra_data = defaultdict(list)

                        if packagegroup.packages:
                            for package in packagegroup.packages:
                                pkgname_to_groups[package['name']].append(dc)

                        if dc.content.id in group_to_categories.keys():
                            for dc_category in group_to_categories[
                                    dc.content.id]:
                                dc.extra_data['category_relations'].append(
                                    dc_category)
                                dc_category.extra_data['packagegroups'].append(
                                    dc)

                        if dc.content.id in group_to_environments.keys():
                            for dc_environment in group_to_environments[
                                    dc.content.id]:
                                dc.extra_data['environment_relations'].append(
                                    dc_environment)
                                dc_environment.extra_data[
                                    'packagegroups'].append(dc)

                        if dc.content.id in optionalgroup_to_environments.keys(
                        ):
                            for dc_environment in optionalgroup_to_environments[
                                    dc.content.id]:
                                dc.extra_data['env_relations_optional'].append(
                                    dc_environment)
                                dc_environment.extra_data[
                                    'optionalgroups'].append(dc)

                        dc_groups.append(dc)

                for dc_category in dc_categories:
                    comps_pb.increment()
                    await self.put(dc_category)

                for dc_environment in dc_environments:
                    comps_pb.increment()
                    await self.put(dc_environment)

            # to preserve order, downloaders are created after all repodata urls are identified
            package_repodata_downloaders = []
            for repodata_type in PACKAGE_REPODATA:
                downloader = self.remote.get_downloader(
                    url=package_repodata_urls[repodata_type])
                package_repodata_downloaders.append(downloader.run())

            downloaders.append(package_repodata_downloaders)

            # asyncio.gather is used to preserve the order of results for package repodata
            pending = [
                asyncio.gather(*downloaders_group)
                for downloaders_group in downloaders
            ]

            while pending:
                done, pending = await asyncio.wait(
                    pending, return_when=asyncio.FIRST_COMPLETED)
                for downloader in done:
                    results = downloader.result()
                    if results[0].url == package_repodata_urls['primary']:
                        primary_xml_path = results[0].path
                        filelists_xml_path = results[1].path
                        other_xml_path = results[2].path
                        metadata_pb.done += 3
                        metadata_pb.save()

                        packages = await RpmFirstStage.parse_repodata(
                            primary_xml_path, filelists_xml_path,
                            other_xml_path)
                        packages_pb.total = len(packages)
                        packages_pb.state = 'running'
                        packages_pb.save()

                        for pkg in packages.values():
                            package = Package(
                                **Package.createrepo_to_dict(pkg))
                            artifact = Artifact(size=package.size_package)
                            checksum_type = getattr(
                                CHECKSUM_TYPES, package.checksum_type.upper())
                            setattr(artifact, checksum_type, package.pkgId)
                            url = urljoin(remote_url, package.location_href)
                            filename = os.path.basename(package.location_href)
                            da = DeclarativeArtifact(
                                artifact=artifact,
                                url=url,
                                relative_path=filename,
                                remote=self.remote,
                                deferred_download=self.deferred_download)
                            dc = DeclarativeContent(content=package,
                                                    d_artifacts=[da])
                            dc.extra_data = defaultdict(list)

                            # find if a package relates to a modulemd
                            if dc.content.nevra in nevra_to_module.keys():
                                dc.content.is_modular = True
                                for dc_modulemd in nevra_to_module[
                                        dc.content.nevra]:
                                    dc.extra_data['modulemd_relation'].append(
                                        dc_modulemd)
                                    dc_modulemd.extra_data[
                                        'package_relation'].append(dc)

                            if dc.content.name in pkgname_to_groups.keys():
                                for dc_group in pkgname_to_groups[
                                        dc.content.name]:
                                    dc.extra_data['group_relations'].append(
                                        dc_group)
                                    dc_group.extra_data[
                                        'related_packages'].append(dc)

                            packages_pb.increment()
                            await self.put(dc)

                    elif results[0].url == updateinfo_url:
                        updateinfo_xml_path = results[0].path
                        metadata_pb.increment()

                        updates = await RpmFirstStage.parse_updateinfo(
                            updateinfo_xml_path)

                        errata_pb.total = len(updates)
                        errata_pb.state = 'running'
                        errata_pb.save()

                        for update in updates:
                            update_record = UpdateRecord(
                                **UpdateRecord.createrepo_to_dict(update))
                            update_record.digest = RpmFirstStage.hash_update_record(
                                update)
                            future_relations = {
                                'collections': defaultdict(list),
                                'references': []
                            }

                            for collection in update.collections:
                                coll_dict = UpdateCollection.createrepo_to_dict(
                                    collection)
                                coll = UpdateCollection(**coll_dict)

                                for package in collection.packages:
                                    pkg_dict = UpdateCollectionPackage.createrepo_to_dict(
                                        package)
                                    pkg = UpdateCollectionPackage(**pkg_dict)
                                    future_relations['collections'][
                                        coll].append(pkg)

                            for reference in update.references:
                                reference_dict = UpdateReference.createrepo_to_dict(
                                    reference)
                                ref = UpdateReference(**reference_dict)
                                future_relations['references'].append(ref)

                            errata_pb.increment()
                            dc = DeclarativeContent(content=update_record)
                            dc.extra_data = future_relations
                            await self.put(dc)

            # now send modules down the pipeline since all relations have been set up
            for modulemd in modulemd_list:
                modulemd_pb.increment()
                await self.put(modulemd)

            for dc_group in dc_groups:
                comps_pb.increment()
                await self.put(dc_group)

        packages_pb.state = 'completed'
        errata_pb.state = 'completed'
        modulemd_pb.state = 'completed'
        modulemd_defaults_pb.state = 'completed'
        comps_pb.state = 'completed'
        packages_pb.save()
        errata_pb.save()
        modulemd_pb.save()
        modulemd_defaults_pb.save()
        comps_pb.save()