Пример #1
0
    def content_handler(self, path):
        """Serve config.repo and repomd.xml.key."""
        if path == self.repository_config_file_name:
            repository, publication = self.get_repository_and_publication()
            if not publication:
                return
            base_url = "{}/".format(
                urlpath_sanitize(settings.CONTENT_ORIGIN,
                                 settings.CONTENT_PATH_PREFIX, self.base_path))
            val = textwrap.dedent(f"""\
                [{self.name}]
                name={self.name}
                enabled=1
                baseurl={base_url}
                gpgcheck={publication.gpgcheck}
                repo_gpgcheck={publication.repo_gpgcheck}
                """)

            signing_service = repository.metadata_signing_service
            if signing_service:
                gpgkey_path = urlpath_sanitize(
                    base_url,
                    "/repodata/repomd.xml.key",
                )
                val += f"gpgkey={gpgkey_path}\n"

            return Response(body=val)
Пример #2
0
    def parse(self):
        """Parse repository metadata."""
        required_metadata_found = set()

        for record in self.data.repomd.records:
            self.checksum_types[record.type] = getattr(
                CHECKSUM_TYPES, record.checksum_type.upper())
            record.checksum_type = getattr(CHECKSUM_TYPES,
                                           record.checksum_type.upper())

            if record.type in PACKAGE_REPODATA:
                required_metadata_found.add(record.type)
                self.data.package_repodata_urls[
                    record.type] = urlpath_sanitize(self.data.remote_url,
                                                    record.location_href)
            elif record.type in UPDATE_REPODATA:
                self.data.updateinfo_url = urlpath_sanitize(
                    self.data.remote_url, record.location_href)
            elif record.type in COMPS_REPODATA:
                self.data.comps_url = urlpath_sanitize(self.data.remote_url,
                                                       record.location_href)
            elif record.type in MODULAR_REPODATA:
                self.data.modules_url = urlpath_sanitize(
                    self.data.remote_url, record.location_href)
            elif record.type in SKIP_REPODATA:
                pass
            else:
                self._set_repomd_file(record)

        missing_types = set(PACKAGE_REPODATA) - required_metadata_found
        if missing_types:
            raise FileNotFoundError(
                _("XML file(s): {filenames} not found").format(
                    filenames=", ".join(missing_types)))
Пример #3
0
    async def parse_distribution_tree(self):
        """Parse content from the file treeinfo if present."""
        if self.treeinfo:
            d_artifacts = [
                DeclarativeArtifact(
                    artifact=Artifact(),
                    url=urlpath_sanitize(self.data.remote_url,
                                         self.treeinfo["filename"]),
                    relative_path=".treeinfo",
                    remote=self.remote,
                    deferred_download=False,
                )
            ]
            for path, checksum in self.treeinfo["download"]["images"].items():
                artifact = Artifact(**checksum)
                da = DeclarativeArtifact(
                    artifact=artifact,
                    url=urlpath_sanitize(self.data.remote_url, path),
                    relative_path=path,
                    remote=self.remote,
                    deferred_download=self.deferred_download,
                )
                d_artifacts.append(da)

            distribution_tree = DistributionTree(
                **self.treeinfo["distribution_tree"])
            dc = DeclarativeContent(content=distribution_tree,
                                    d_artifacts=d_artifacts)
            dc.extra_data = self.treeinfo
            await self.put(dc)
Пример #4
0
    def content_handler(self, path):
        """Serve config.repo and public.key."""
        if path == self.repository_config_file_name:
            base_url = f"{settings.CONTENT_ORIGIN}{settings.CONTENT_PATH_PREFIX}{self.base_path}/"
            publication = self.publication.cast()
            val = textwrap.dedent(
                f"""\
                [{self.name}]
                enabled=1
                baseurl={base_url}
                gpgcheck={publication.gpgcheck}
                repo_gpgcheck={publication.repo_gpgcheck}
                """
            )

            repository_pk = self.publication.repository.pk
            repository = RpmRepository.objects.get(pk=repository_pk)
            signing_service = repository.metadata_signing_service
            if signing_service:
                gpgkey_path = urlpath_sanitize(
                    settings.CONTENT_ORIGIN, settings.CONTENT_PATH_PREFIX
                )
                gpgkey_path = urllib.parse.urljoin(gpgkey_path, self.base_path, True)
                gpgkey_path += "/repodata/public.key"

                val += f"gpgkey={gpgkey_path}\n"

            return Response(body=val)
Пример #5
0
def get_treeinfo_data(remote, remote_url):
    """
    Get Treeinfo data from remote.

    """
    treeinfo_serialized = {}
    namespaces = [".treeinfo", "treeinfo"]
    for namespace in namespaces:
        downloader = remote.get_downloader(
            url=urlpath_sanitize(remote_url, namespace),
            silence_errors_for_response_status_codes={403, 404},
        )

        try:
            result = downloader.fetch()
        except FileNotFoundError:
            continue

        treeinfo = PulpTreeInfo()
        treeinfo.load(f=result.path)
        treeinfo_parsed = treeinfo.parsed_sections()
        sha256 = result.artifact_attributes["sha256"]
        treeinfo_serialized = TreeinfoData(treeinfo_parsed).to_dict(
            hash=sha256, filename=namespace)
        break

    return treeinfo_serialized
Пример #6
0
    async def _run(self, extra_data=None):
        """
        Download, validate, and compute digests on the `url`. This is a coroutine.

        This method provides the same return object type and documented in
        :meth:`~pulpcore.plugin.download.BaseDownloader._run`.
        """
        if self.sles_auth_token:
            auth_param = f"?{self.sles_auth_token}"
            url = urlpath_sanitize(self.url) + auth_param
        else:
            url = self.url

        async with self.session.get(url,
                                    proxy=self.proxy,
                                    proxy_auth=self.proxy_auth,
                                    auth=self.auth) as response:
            self.raise_for_status(response)
            to_return = await self._handle_response(response)
            await response.release()
            self.response_headers = response.headers

        if self._close_session_on_finalize:
            self.session.close()
        return to_return
Пример #7
0
    async def run(self):
        """Build `DeclarativeContent` from the repodata."""
        progress_data = dict(message="Downloading Metadata Files",
                             code="sync.downloading.metadata")
        with ProgressReport(**progress_data) as metadata_pb:
            self.data.metadata_pb = metadata_pb

            downloader = self.remote.get_downloader(url=urlpath_sanitize(
                self.data.remote_url, "repodata/repomd.xml"))
            result = await downloader.run()
            metadata_pb.increment()

            repomd_path = result.path
            self.data.repomd = cr.Repomd(repomd_path)

            self.repository.last_sync_revision_number = self.data.repomd.revision
            self.repository.last_sync_repomd_checksum = get_sha256(repomd_path)

            await self.parse_distribution_tree()
            await self.parse_repository_metadata()
            await self.parse_modules_metadata()
            await self.parse_packages_components()
            await self.parse_content()

            # now send modules down the pipeline since all relations have been set up
            for modulemd in self.data.modulemd_list:
                await self.put(modulemd)

            for dc_group in self.data.dc_groups:
                await self.put(dc_group)
Пример #8
0
    async def _parse_packages(self, packages):
        progress_data = {
            "message": "Parsed Packages",
            "code": "sync.parsing.packages",
            "total": len(packages),
        }

        with ProgressReport(**progress_data) as packages_pb:
            while True:
                try:
                    (_, pkg) = packages.popitem(last=False)
                except KeyError:
                    break
                package = Package(**Package.createrepo_to_dict(pkg))
                del pkg
                artifact = Artifact(size=package.size_package)
                checksum_type = getattr(CHECKSUM_TYPES,
                                        package.checksum_type.upper())
                setattr(artifact, checksum_type, package.pkgId)
                url = urlpath_sanitize(self.data.remote_url,
                                       package.location_href)
                filename = os.path.basename(package.location_href)
                da = DeclarativeArtifact(
                    artifact=artifact,
                    url=url,
                    relative_path=filename,
                    remote=self.remote,
                    deferred_download=self.deferred_download,
                )
                dc = DeclarativeContent(content=package, d_artifacts=[da])
                dc.extra_data = defaultdict(list)

                # find if a package relates to a modulemd
                if dc.content.nevra in self.data.nevra_to_module.keys():
                    dc.content.is_modular = True
                    for dc_modulemd in self.data.nevra_to_module[
                            dc.content.nevra]:
                        dc.extra_data["modulemd_relation"].append(dc_modulemd)
                        dc_modulemd.extra_data["package_relation"].append(dc)

                if dc.content.name in self.data.pkgname_to_groups.keys():
                    for dc_group in self.data.pkgname_to_groups[
                            dc.content.name]:
                        dc.extra_data["group_relations"].append(dc_group)
                        dc_group.extra_data["related_packages"].append(dc)

                packages_pb.increment()
                await self.put(dc)
Пример #9
0
    def __init__(
        self,
        *args,
        silence_errors_for_response_status_codes=None,
        sles_auth_token=None,
        urlencode=True,
        **kwargs,
    ):
        """
        Initialize the downloader.
        """
        self.sles_auth_token = sles_auth_token

        if silence_errors_for_response_status_codes is None:
            silence_errors_for_response_status_codes = set()
        self.silence_errors_for_response_status_codes = silence_errors_for_response_status_codes

        super().__init__(*args, **kwargs)

        new_url = self.url
        if urlencode:
            # Some upstream-repos (eg, Amazon) require url-encoded paths for things like "libc++"
            # Let's make them happy.
            # We can't urlencode the whole url, because BasicAuth is still A Thing and we would
            #   break username/passwords in the url.
            # So we need to urlencode **only the path** and **nothing else** .
            # We can't use _replace() and urlunparse(), because urlunparse() "helpfully" undoes
            #   the urlencode we just did in the path.
            # We can't use urljoin(), because urljoin() "helpfully" treats a number of schemes
            #  (like, say, uln:) as "can't take relative paths", and throws away everything
            #  **except** the path-portion
            # So, we have a pretty ugly workaround.
            parsed = urlparse(self.url)
            # two pieces of the URL: pre- and post-path
            (before_path, after_path) = self.url.split(parsed.path)
            new_path = quote(unquote(parsed.path), safe=":/")  # fix the path
            new_url = "{}{}{}".format(before_path, new_path,
                                      after_path)  # rebuild
        if self.sles_auth_token:
            auth_param = f"?{self.sles_auth_token}"
            self.url = urlpath_sanitize(new_url) + auth_param
        else:
            self.url = new_url
Пример #10
0
 def _set_repomd_file(self, record):
     if "_zck" not in record.type and record.type not in PACKAGE_DB_REPODATA:
         file_data = {
             record.checksum_type: record.checksum,
             "size": record.size
         }
         da = DeclarativeArtifact(
             artifact=Artifact(**file_data),
             url=urlpath_sanitize(self.data.remote_url,
                                  record.location_href),
             relative_path=record.location_href,
             remote=self.remote,
             deferred_download=False,
         )
         repo_metadata_file = RepoMetadataFile(
             data_type=record.type,
             checksum_type=record.checksum_type,
             checksum=record.checksum,
             relative_path=record.location_href,
         )
         dc = DeclarativeContent(content=repo_metadata_file,
                                 d_artifacts=[da])
         self.repomd_dcs.append(dc)
Пример #11
0
def get_repomd_file(remote, url):
    """
    Check if repodata exists.

    Args:
        remote(RpmRemote or UlnRemote): An RpmRemote or UlnRemote to download with.
        url(str): A remote repository URL

    Returns:
        pulpcore.plugin.download.DownloadResult: downloaded repomd.xml

    """
    downloader = remote.get_downloader(
        url=urlpath_sanitize(url, "repodata/repomd.xml"))

    try:
        result = downloader.fetch()
    except ClientResponseError as exc:
        if 404 == exc.status:
            return
    except FileNotFoundError:
        return

    return result
Пример #12
0
    def test_urlpath_sanitize(self):
        """Test urljoin-replacement."""
        # arbitrary number of args become one single-slash-separated string
        a_expected = "a"
        ab_expected = "a/b"
        abc_expected = "a/b/c"

        # a /a a/ /a/
        self.assertEqual(a_expected, urlpath_sanitize("a"))
        self.assertEqual(a_expected, urlpath_sanitize("/a"))
        self.assertEqual(a_expected, urlpath_sanitize("a/"))
        self.assertEqual(a_expected, urlpath_sanitize("/a/"))

        # a b : a/ b : /a b : a b/ : a /b : a /b/ : a/ /b
        self.assertEqual(ab_expected, urlpath_sanitize("a", "b"))
        self.assertEqual(ab_expected, urlpath_sanitize("a/", "b"))
        self.assertEqual(ab_expected, urlpath_sanitize("/a", "b"))
        self.assertEqual(ab_expected, urlpath_sanitize("a", "b/"))
        self.assertEqual(ab_expected, urlpath_sanitize("a", "/b"))
        self.assertEqual(ab_expected, urlpath_sanitize("a", "/b/"))
        self.assertEqual(ab_expected, urlpath_sanitize("a/", "/b"))
        self.assertEqual(ab_expected, urlpath_sanitize("a/", "", "/b"))
        self.assertEqual(ab_expected, urlpath_sanitize("a/", "/", "/b"))

        # a b c : a /b/ /c : /a/ /b/ /c/
        self.assertEqual(abc_expected, urlpath_sanitize("a", "b", "c"))
        self.assertEqual(abc_expected, urlpath_sanitize("a", "/b/", "/c"))
        self.assertEqual(abc_expected, urlpath_sanitize("/a/", "/b/", "/c/"))
Пример #13
0
def synchronize(remote_pk, repository_pk, mirror, skip_types, optimize):
    """
    Sync content from the remote repository.

    Create a new version of the repository that is synchronized with the remote.

    Args:
        remote_pk (str): The remote PK.
        repository_pk (str): The repository PK.
        mirror (bool): Mirror mode.
        skip_types (list): List of content to skip.
        optimize(bool): Optimize mode.

    Raises:
        ValueError: If the remote does not specify a url to sync.

    """
    try:
        remote = RpmRemote.objects.get(pk=remote_pk)
    except ObjectDoesNotExist:
        remote = UlnRemote.objects.get(pk=remote_pk)
    repository = RpmRepository.objects.get(pk=repository_pk)

    if not remote.url:
        raise ValueError(
            _("A remote must have a url specified to synchronize."))

    log.info(
        _("Synchronizing: repository={r} remote={p}").format(r=repository.name,
                                                             p=remote.name))

    deferred_download = remote.policy != Remote.IMMEDIATE  # Interpret download policy

    with tempfile.TemporaryDirectory("."):
        remote_url = fetch_remote_url(remote)

    if optimize and is_optimized_sync(repository, remote, remote_url):
        return

    with tempfile.TemporaryDirectory("."):
        treeinfo = get_treeinfo_data(remote, remote_url)

    if treeinfo:
        treeinfo["repositories"] = {}
        for repodata in set(treeinfo["download"]["repodatas"]):
            if repodata == DIST_TREE_MAIN_REPO_PATH:
                treeinfo["repositories"].update({repodata: None})
                continue
            name = f"{repodata}-{treeinfo['hash']}"
            sub_repo, created = RpmRepository.objects.get_or_create(
                name=name, sub_repo=True)
            if created:
                sub_repo.save()
            directory = treeinfo["repo_map"][repodata]
            treeinfo["repositories"].update({directory: str(sub_repo.pk)})
            path = f"{repodata}/"
            new_url = urlpath_sanitize(remote_url, path)
            with tempfile.TemporaryDirectory("."):
                repodata_exists = get_repomd_file(remote, new_url)
            if repodata_exists:
                if optimize and is_optimized_sync(sub_repo, remote, new_url):
                    continue
                stage = RpmFirstStage(
                    remote,
                    sub_repo,
                    deferred_download,
                    skip_types=skip_types,
                    new_url=new_url,
                )
                dv = RpmDeclarativeVersion(first_stage=stage,
                                           repository=sub_repo)
                dv.create()
                sub_repo.last_sync_remote = remote
                sub_repo.last_sync_repo_version = sub_repo.latest_version(
                ).number
                sub_repo.save()

    first_stage = RpmFirstStage(
        remote,
        repository,
        deferred_download,
        skip_types=skip_types,
        treeinfo=treeinfo,
        new_url=remote_url,
    )
    dv = RpmDeclarativeVersion(first_stage=first_stage,
                               repository=repository,
                               mirror=mirror)
    version = dv.create()
    if version:
        repository.last_sync_remote = remote
        repository.last_sync_repo_version = version.number
        repository.save()
    return version