示例#1
0
    def deferred_sync(self, delta):
        """
        Synchronize the repository with the remote repository without downloading artifacts.

        Args:
            delta (namedtuple): Set of unit keys for units to be added to the repository. Set
                of unit keys for units that should be removed from the repository. Only the
                additions are used in this method.
        """
        description = _("Adding file content to the repository without downloading artifacts.")
        progress_bar = ProgressBar(message=description, total=len(delta.additions))

        with progress_bar:
            for remote_artifact in self.next_remote_artifact(delta.additions):
                content = self.content_dict.pop(remote_artifact.url)
                self._create_and_associate_content(content, {remote_artifact: None})
                progress_bar.increment()
示例#2
0
def fetch_roles(remote):
    """
    Fetch the roles in a remote repository

    Args:
        remote (AnsibleRemote): A remote.

    Returns:
        list: a list of dicts that represent roles
    """
    page_count = 0

    def role_page_url(remote, page=1):
        parsed = urlparse(remote.url)
        new_query = parse_qs(parsed.query)
        new_query['page'] = page
        return parsed.scheme + '://' + parsed.netloc + parsed.path + '?' + urlencode(
            new_query, doseq=True)

    def parse_metadata(path):
        metadata = json.load(open(path))
        page_count = metadata['num_pages']
        return page_count, parse_roles(metadata)

    downloader = remote.get_downloader(role_page_url(remote))
    downloader.fetch()

    page_count, roles = parse_metadata(downloader.path)

    progress_bar = ProgressBar(message='Parsing Pages from Galaxy Roles API',
                               total=page_count,
                               done=1,
                               state='running')
    progress_bar.save()

    def downloader_coroutines():
        for page in range(2, page_count + 1):
            downloader = remote.get_downloader(role_page_url(remote, page))
            yield downloader.run()

    loop = asyncio.get_event_loop()
    downloaders = downloader_coroutines()

    not_done = set()
    with suppress(StopIteration):
        for i in range(20):
            not_done.add(next(downloaders))

    while True:
        if not_done == set():
            break
        done, not_done = loop.run_until_complete(
            asyncio.wait(not_done, return_when=FIRST_COMPLETED))
        for item in done:
            download_result = item.result()
            new_page_count, new_roles = parse_metadata(download_result.path)
            roles.extend(new_roles)
            progress_bar.increment()
            with suppress(StopIteration):
                not_done.add(next(downloaders))

    progress_bar.state = 'completed'
    progress_bar.save()

    return roles
示例#3
0
def sync(remote_pk, repository_pk):
    """
    Sync Collections with ``remote_pk``, and save a new RepositoryVersion for ``repository_pk``.

    Args:
        remote_pk (str): The remote PK.
        repository_pk (str): The repository PK.

    Raises:
        ValueError: If the remote does not specify a URL to sync or a ``whitelist`` of Collections
            to sync.

    """
    remote = CollectionRemote.objects.get(pk=remote_pk)
    repository = Repository.objects.get(pk=repository_pk)

    if not remote.url:
        raise ValueError(
            _("A CollectionRemote must have a 'url' specified to synchronize.")
        )

    if not remote.whitelist:
        raise ValueError(
            _("A CollectionRemote must have a 'whitelist' specified to synchronize."
              ))

    repository_spec_strings = remote.whitelist.split(' ')

    def nowhere(*args, **kwargs):
        pass

    collections_pks = []
    download_pb = ProgressBar(message='Downloading Collections',
                              total=len(repository_spec_strings))
    import_pb = ProgressBar(message='Importing Collections',
                            total=len(repository_spec_strings))

    with RepositoryVersion.create(repository) as new_version:
        with tempfile.TemporaryDirectory() as temp_ansible_path:
            with download_pb:
                # workaround: mazer logs errors without this dir https://pulp.plan.io/issues/4999
                os.mkdir(os.path.join(temp_ansible_path,
                                      'ansible_collections'))

                galaxy_context = GalaxyContext(
                    collections_path=temp_ansible_path,
                    server={
                        'url': remote.url,
                        'ignore_certs': False,
                    },
                )

                install_repository_specs_loop(
                    display_callback=nowhere,
                    galaxy_context=galaxy_context,
                    repository_spec_strings=repository_spec_strings,
                )

                download_pb.done = len(repository_spec_strings)

            with import_pb:
                content_walk_generator = os.walk(temp_ansible_path)
                for dirpath, dirnames, filenames in content_walk_generator:
                    if 'MANIFEST.json' in filenames:
                        manifest_path = os.path.join(dirpath, 'MANIFEST.json')
                        with open(manifest_path) as manifest_file:
                            manifest_data = json.load(manifest_file)
                        info = manifest_data['collection_info']
                        filename = '{namespace}-{name}-{version}'.format(
                            namespace=info['namespace'],
                            name=info['name'],
                            version=info['version'],
                        )
                        tarfile_path = os.path.join(temp_ansible_path,
                                                    filename + '.tar.gz')
                        with tarfile.open(name=tarfile_path,
                                          mode='w|gz') as newtar:
                            newtar.add(dirpath, arcname=filename)

                        with transaction.atomic():
                            collection, created = Collection.objects.get_or_create(
                                namespace=info['namespace'],
                                name=info['name'],
                                version=info['version'])

                            if created:
                                artifact = Artifact.init_and_validate(
                                    newtar.name)
                                artifact.save()

                                ContentArtifact.objects.create(
                                    artifact=artifact,
                                    content=collection,
                                    relative_path=collection.relative_path,
                                )

                            collections_pks.append(collection)
                        import_pb.increment()

        collections = Collection.objects.filter(pk__in=collections_pks)
        new_version.add_content(collections)
示例#4
0
    async def run(self):
        """
        DockerFirstStage.
        """
        future_manifests = []
        tag_list = []
        to_download = []
        man_dcs = {}
        total_blobs = []

        with ProgressBar(message='Downloading tag list', total=1) as pb:
            repo_name = self.remote.namespaced_upstream_name
            relative_url = '/v2/{name}/tags/list'.format(name=repo_name)
            tag_list_url = urljoin(self.remote.url, relative_url)
            list_downloader = self.remote.get_downloader(url=tag_list_url)
            await list_downloader.run(extra_data={'repo_name': repo_name})

            with open(list_downloader.path) as tags_raw:
                tags_dict = json.loads(tags_raw.read())
                tag_list = tags_dict['tags']

            # check for the presence of the pagination link header
            link = list_downloader.response_headers.get('Link')
            await self.handle_pagination(link, repo_name, tag_list)
            whitelist_tags = self.remote.whitelist_tags
            if whitelist_tags:
                tag_list = list(set(tag_list) & set(whitelist_tags.split(',')))
            pb.increment()

        msg = 'Creating Download requests for v2 Tags'
        with ProgressBar(message=msg, total=len(tag_list)) as pb:
            for tag_name in tag_list:
                relative_url = '/v2/{name}/manifests/{tag}'.format(
                    name=self.remote.namespaced_upstream_name,
                    tag=tag_name,
                )
                url = urljoin(self.remote.url, relative_url)
                downloader = self.remote.get_downloader(url=url)
                to_download.append(downloader.run(extra_data={'headers': V2_ACCEPT_HEADERS}))
                pb.increment()

        pb_parsed_tags = ProgressBar(message='Processing v2 Tags', state='running')
        pb_parsed_ml_tags = ProgressBar(message='Parsing Manifest List Tags', state='running')
        pb_parsed_m_tags = ProgressBar(message='Parsing Manifests Tags', state='running')
        global pb_parsed_blobs
        pb_parsed_blobs = ProgressBar(message='Parsing Blobs', state='running')
        pb_parsed_man = ProgressBar(message='Parsing Manifests', state='running')

        for download_tag in asyncio.as_completed(to_download):
            tag = await download_tag
            with open(tag.path) as content_file:
                raw = content_file.read()
            content_data = json.loads(raw)
            mediatype = content_data.get('mediaType')
            tag.artifact_attributes['file'] = tag.path
            saved_artifact = Artifact(**tag.artifact_attributes)
            try:
                saved_artifact.save()
            except IntegrityError:
                del tag.artifact_attributes['file']
                saved_artifact = Artifact.objects.get(**tag.artifact_attributes)
            tag_dc = self.create_tag(mediatype, saved_artifact, tag.url)

            if type(tag_dc.content) is ManifestListTag:
                list_dc = self.create_tagged_manifest_list(
                    tag_dc, content_data)
                await self.put(list_dc)
                pb_parsed_ml_tags.increment()
                tag_dc.extra_data['list_relation'] = list_dc
                for manifest_data in content_data.get('manifests'):
                    man_dc = self.create_manifest(list_dc, manifest_data)
                    future_manifests.append(man_dc.get_or_create_future())
                    man_dcs[man_dc.content.digest] = man_dc
                    await self.put(man_dc)
                    pb_parsed_man.increment()
            elif type(tag_dc.content) is ManifestTag:
                man_dc = self.create_tagged_manifest(tag_dc, content_data)
                await self.put(man_dc)
                pb_parsed_m_tags.increment()
                tag_dc.extra_data['man_relation'] = man_dc
                self.handle_blobs(man_dc, content_data, total_blobs)
            await self.put(tag_dc)
            pb_parsed_tags.increment()

        pb_parsed_tags.state = 'completed'
        pb_parsed_tags.total = pb_parsed_tags.done
        pb_parsed_tags.save()
        pb_parsed_ml_tags.state = 'completed'
        pb_parsed_ml_tags.total = pb_parsed_ml_tags.done
        pb_parsed_ml_tags.save()
        pb_parsed_m_tags.state = 'completed'
        pb_parsed_m_tags.total = pb_parsed_m_tags.done
        pb_parsed_m_tags.save()
        pb_parsed_man.state = 'completed'
        pb_parsed_man.total = pb_parsed_man.done
        pb_parsed_man.save()

        for manifest_future in asyncio.as_completed(future_manifests):
            man = await manifest_future
            with man._artifacts.get().file.open() as content_file:
                raw = content_file.read()
            content_data = json.loads(raw)
            man_dc = man_dcs[man.digest]
            self.handle_blobs(man_dc, content_data, total_blobs)
        for blob in total_blobs:
            await self.put(blob)

        pb_parsed_blobs.state = 'completed'
        pb_parsed_blobs.total = pb_parsed_blobs.done
        pb_parsed_blobs.save()
示例#5
0
    async def run(self):
        """
        Build `DeclarativeContent` from the repodata.
        """
        packages_pb = ProgressBar(message='Parsed Packages')
        erratum_pb = ProgressBar(message='Parsed Erratum')

        packages_pb.save()
        erratum_pb.save()

        with ProgressBar(message='Downloading Metadata Files') as metadata_pb:
            downloader = self.remote.get_downloader(
                url=urljoin(self.remote.url, 'repodata/repomd.xml'))
            # TODO: decide how to distinguish between a mirror list and a normal repo
            result = await downloader.run()
            metadata_pb.increment()

            repomd_path = result.path
            repomd = cr.Repomd(repomd_path)
            package_repodata_urls = {}
            downloaders = []

            for record in repomd.records:
                if record.type in PACKAGE_REPODATA:
                    package_repodata_urls[record.type] = urljoin(
                        self.remote.url, record.location_href)
                elif record.type in UPDATE_REPODATA:
                    updateinfo_url = urljoin(self.remote.url,
                                             record.location_href)
                    downloader = self.remote.get_downloader(url=updateinfo_url)
                    downloaders.append([downloader.run()])
                else:
                    log.info(
                        _('Unknown repodata type: {t}. Skipped.').format(
                            t=record.type))
                    # TODO: skip databases, save unknown types to publish them as-is

            # to preserve order, downloaders are created after all repodata urls are identified
            package_repodata_downloaders = []
            for repodata_type in PACKAGE_REPODATA:
                downloader = self.remote.get_downloader(
                    url=package_repodata_urls[repodata_type])
                package_repodata_downloaders.append(downloader.run())

            downloaders.append(package_repodata_downloaders)

            # asyncio.gather is used to preserve the order of results for package repodata
            pending = [
                asyncio.gather(*downloaders_group)
                for downloaders_group in downloaders
            ]

            while pending:
                done, pending = await asyncio.wait(
                    pending, return_when=asyncio.FIRST_COMPLETED)
                for downloader in done:
                    results = downloader.result()
                    if results[0].url == package_repodata_urls['primary']:
                        primary_xml_path = results[0].path
                        filelists_xml_path = results[1].path
                        other_xml_path = results[2].path
                        metadata_pb.done += 3
                        metadata_pb.save()

                        packages = await RpmFirstStage.parse_repodata(
                            primary_xml_path, filelists_xml_path,
                            other_xml_path)
                        packages_pb.total = len(packages)
                        packages_pb.state = 'running'
                        packages_pb.save()

                        for pkg in packages.values():
                            package = Package(
                                **Package.createrepo_to_dict(pkg))
                            artifact = Artifact(size=package.size_package)
                            checksum_type = getattr(
                                CHECKSUM_TYPES, package.checksum_type.upper())
                            setattr(artifact, checksum_type, package.pkgId)
                            url = urljoin(self.remote.url,
                                          package.location_href)
                            filename = os.path.basename(package.location_href)
                            da = DeclarativeArtifact(
                                artifact=artifact,
                                url=url,
                                relative_path=filename,
                                remote=self.remote,
                                deferred_download=self.deferred_download)
                            dc = DeclarativeContent(content=package,
                                                    d_artifacts=[da])
                            packages_pb.increment()
                            await self.put(dc)

                    elif results[0].url == updateinfo_url:
                        updateinfo_xml_path = results[0].path
                        metadata_pb.increment()

                        updates = await RpmFirstStage.parse_updateinfo(
                            updateinfo_xml_path)

                        erratum_pb.total = len(updates)
                        erratum_pb.state = 'running'
                        erratum_pb.save()

                        for update in updates:
                            update_record = UpdateRecord(
                                **UpdateRecord.createrepo_to_dict(update))
                            update_record.digest = RpmFirstStage.hash_update_record(
                                update)
                            future_relations = {
                                'collections': defaultdict(list),
                                'references': []
                            }

                            for collection in update.collections:
                                coll_dict = UpdateCollection.createrepo_to_dict(
                                    collection)
                                coll = UpdateCollection(**coll_dict)

                                for package in collection.packages:
                                    pkg_dict = UpdateCollectionPackage.createrepo_to_dict(
                                        package)
                                    pkg = UpdateCollectionPackage(**pkg_dict)
                                    future_relations['collections'][
                                        coll].append(pkg)

                            for reference in update.references:
                                reference_dict = UpdateReference.createrepo_to_dict(
                                    reference)
                                ref = UpdateReference(**reference_dict)
                                future_relations['references'].append(ref)

                            erratum_pb.increment()
                            dc = DeclarativeContent(content=update_record)
                            dc.extra_data = future_relations
                            await self.put(dc)

        packages_pb.state = 'completed'
        erratum_pb.state = 'completed'
        packages_pb.save()
        erratum_pb.save()
示例#6
0
    async def run(self):
        """
        DockerFirstStage.
        """
        future_manifests = []
        tag_list = []
        to_download = []
        man_dcs = {}
        total_blobs = []

        with ProgressBar(message='Downloading tag list', total=1) as pb:
            repo_name = self.remote.namespaced_upstream_name
            relative_url = '/v2/{name}/tags/list'.format(name=repo_name)
            tag_list_url = urljoin(self.remote.url, relative_url)
            list_downloader = self.remote.get_downloader(url=tag_list_url)
            await list_downloader.run(extra_data={'repo_name': repo_name})

            with open(list_downloader.path) as tags_raw:
                tags_dict = json.loads(tags_raw.read())
                tag_list = tags_dict['tags']

            # check for the presence of the pagination link header
            link = list_downloader.response_headers.get('Link')
            await self.handle_pagination(link, repo_name, tag_list)
            whitelist_tags = self.remote.whitelist_tags
            if whitelist_tags:
                tag_list = list(set(tag_list) & set(whitelist_tags.split(',')))
            pb.increment()

        for tag_name in tag_list:
            relative_url = '/v2/{name}/manifests/{tag}'.format(
                name=self.remote.namespaced_upstream_name,
                tag=tag_name,
            )
            url = urljoin(self.remote.url, relative_url)
            downloader = self.remote.get_downloader(url=url)
            to_download.append(
                downloader.run(extra_data={'headers': V2_ACCEPT_HEADERS}))

        pb_parsed_tags = ProgressBar(message='Processing Tags',
                                     state='running')

        for download_tag in asyncio.as_completed(to_download):
            tag = await download_tag
            with open(tag.path, 'rb') as content_file:
                raw_data = content_file.read()
            content_data = json.loads(raw_data)
            media_type = content_data.get('mediaType')
            tag.artifact_attributes['file'] = tag.path
            saved_artifact = Artifact(**tag.artifact_attributes)
            try:
                saved_artifact.save()
            except IntegrityError:
                del tag.artifact_attributes['file']
                saved_artifact = Artifact.objects.get(
                    **tag.artifact_attributes)
            tag_dc = self.create_tag(saved_artifact, tag.url)

            if media_type == MEDIA_TYPE.MANIFEST_LIST:
                list_dc = self.create_tagged_manifest_list(
                    tag_dc, content_data)
                await self.put(list_dc)
                tag_dc.extra_data['man_relation'] = list_dc
                for manifest_data in content_data.get('manifests'):
                    man_dc = self.create_manifest(list_dc, manifest_data)
                    future_manifests.append(man_dc.get_or_create_future())
                    man_dcs[man_dc.content.digest] = man_dc
                    await self.put(man_dc)
            else:
                man_dc = self.create_tagged_manifest(tag_dc, content_data,
                                                     raw_data)
                await self.put(man_dc)
                tag_dc.extra_data['man_relation'] = man_dc
                self.handle_blobs(man_dc, content_data, total_blobs)
            await self.put(tag_dc)
            pb_parsed_tags.increment()

        pb_parsed_tags.state = 'completed'
        pb_parsed_tags.total = pb_parsed_tags.done
        pb_parsed_tags.save()

        for manifest_future in asyncio.as_completed(future_manifests):
            man = await manifest_future
            with man._artifacts.get().file.open() as content_file:
                raw = content_file.read()
            content_data = json.loads(raw)
            man_dc = man_dcs[man.digest]
            self.handle_blobs(man_dc, content_data, total_blobs)
        for blob in total_blobs:
            await self.put(blob)