示例#1
0
def fetch_roles(remote):
    """
    Fetch the roles in a remote repository

    Args:
        remote (AnsibleRemote): A remote.

    Returns:
        list: a list of dicts that represent roles
    """
    page_count = 0

    def role_page_url(remote, page=1):
        parsed = urlparse(remote.url)
        new_query = parse_qs(parsed.query)
        new_query['page'] = page
        return parsed.scheme + '://' + parsed.netloc + parsed.path + '?' + urlencode(
            new_query, doseq=True)

    def parse_metadata(path):
        metadata = json.load(open(path))
        page_count = metadata['num_pages']
        return page_count, parse_roles(metadata)

    downloader = remote.get_downloader(role_page_url(remote))
    downloader.fetch()

    page_count, roles = parse_metadata(downloader.path)

    progress_bar = ProgressBar(message='Parsing Pages from Galaxy Roles API',
                               total=page_count,
                               done=1,
                               state='running')
    progress_bar.save()

    def downloader_coroutines():
        for page in range(2, page_count + 1):
            downloader = remote.get_downloader(role_page_url(remote, page))
            yield downloader.run()

    loop = asyncio.get_event_loop()
    downloaders = downloader_coroutines()

    not_done = set()
    with suppress(StopIteration):
        for i in range(20):
            not_done.add(next(downloaders))

    while True:
        if not_done == set():
            break
        done, not_done = loop.run_until_complete(
            asyncio.wait(not_done, return_when=FIRST_COMPLETED))
        for item in done:
            download_result = item.result()
            new_page_count, new_roles = parse_metadata(download_result.path)
            roles.extend(new_roles)
            progress_bar.increment()
            with suppress(StopIteration):
                not_done.add(next(downloaders))

    progress_bar.state = 'completed'
    progress_bar.save()

    return roles
示例#2
0
    async def run(self):
        """
        DockerFirstStage.
        """
        future_manifests = []
        tag_list = []
        to_download = []
        man_dcs = {}
        total_blobs = []

        with ProgressBar(message='Downloading tag list', total=1) as pb:
            repo_name = self.remote.namespaced_upstream_name
            relative_url = '/v2/{name}/tags/list'.format(name=repo_name)
            tag_list_url = urljoin(self.remote.url, relative_url)
            list_downloader = self.remote.get_downloader(url=tag_list_url)
            await list_downloader.run(extra_data={'repo_name': repo_name})

            with open(list_downloader.path) as tags_raw:
                tags_dict = json.loads(tags_raw.read())
                tag_list = tags_dict['tags']

            # check for the presence of the pagination link header
            link = list_downloader.response_headers.get('Link')
            await self.handle_pagination(link, repo_name, tag_list)
            whitelist_tags = self.remote.whitelist_tags
            if whitelist_tags:
                tag_list = list(set(tag_list) & set(whitelist_tags.split(',')))
            pb.increment()

        msg = 'Creating Download requests for v2 Tags'
        with ProgressBar(message=msg, total=len(tag_list)) as pb:
            for tag_name in tag_list:
                relative_url = '/v2/{name}/manifests/{tag}'.format(
                    name=self.remote.namespaced_upstream_name,
                    tag=tag_name,
                )
                url = urljoin(self.remote.url, relative_url)
                downloader = self.remote.get_downloader(url=url)
                to_download.append(downloader.run(extra_data={'headers': V2_ACCEPT_HEADERS}))
                pb.increment()

        pb_parsed_tags = ProgressBar(message='Processing v2 Tags', state='running')
        pb_parsed_ml_tags = ProgressBar(message='Parsing Manifest List Tags', state='running')
        pb_parsed_m_tags = ProgressBar(message='Parsing Manifests Tags', state='running')
        global pb_parsed_blobs
        pb_parsed_blobs = ProgressBar(message='Parsing Blobs', state='running')
        pb_parsed_man = ProgressBar(message='Parsing Manifests', state='running')

        for download_tag in asyncio.as_completed(to_download):
            tag = await download_tag
            with open(tag.path) as content_file:
                raw = content_file.read()
            content_data = json.loads(raw)
            mediatype = content_data.get('mediaType')
            tag.artifact_attributes['file'] = tag.path
            saved_artifact = Artifact(**tag.artifact_attributes)
            try:
                saved_artifact.save()
            except IntegrityError:
                del tag.artifact_attributes['file']
                saved_artifact = Artifact.objects.get(**tag.artifact_attributes)
            tag_dc = self.create_tag(mediatype, saved_artifact, tag.url)

            if type(tag_dc.content) is ManifestListTag:
                list_dc = self.create_tagged_manifest_list(
                    tag_dc, content_data)
                await self.put(list_dc)
                pb_parsed_ml_tags.increment()
                tag_dc.extra_data['list_relation'] = list_dc
                for manifest_data in content_data.get('manifests'):
                    man_dc = self.create_manifest(list_dc, manifest_data)
                    future_manifests.append(man_dc.get_or_create_future())
                    man_dcs[man_dc.content.digest] = man_dc
                    await self.put(man_dc)
                    pb_parsed_man.increment()
            elif type(tag_dc.content) is ManifestTag:
                man_dc = self.create_tagged_manifest(tag_dc, content_data)
                await self.put(man_dc)
                pb_parsed_m_tags.increment()
                tag_dc.extra_data['man_relation'] = man_dc
                self.handle_blobs(man_dc, content_data, total_blobs)
            await self.put(tag_dc)
            pb_parsed_tags.increment()

        pb_parsed_tags.state = 'completed'
        pb_parsed_tags.total = pb_parsed_tags.done
        pb_parsed_tags.save()
        pb_parsed_ml_tags.state = 'completed'
        pb_parsed_ml_tags.total = pb_parsed_ml_tags.done
        pb_parsed_ml_tags.save()
        pb_parsed_m_tags.state = 'completed'
        pb_parsed_m_tags.total = pb_parsed_m_tags.done
        pb_parsed_m_tags.save()
        pb_parsed_man.state = 'completed'
        pb_parsed_man.total = pb_parsed_man.done
        pb_parsed_man.save()

        for manifest_future in asyncio.as_completed(future_manifests):
            man = await manifest_future
            with man._artifacts.get().file.open() as content_file:
                raw = content_file.read()
            content_data = json.loads(raw)
            man_dc = man_dcs[man.digest]
            self.handle_blobs(man_dc, content_data, total_blobs)
        for blob in total_blobs:
            await self.put(blob)

        pb_parsed_blobs.state = 'completed'
        pb_parsed_blobs.total = pb_parsed_blobs.done
        pb_parsed_blobs.save()
示例#3
0
    async def run(self):
        """
        DockerFirstStage.
        """
        future_manifests = []
        tag_list = []
        to_download = []
        man_dcs = {}
        total_blobs = []

        with ProgressBar(message='Downloading tag list', total=1) as pb:
            repo_name = self.remote.namespaced_upstream_name
            relative_url = '/v2/{name}/tags/list'.format(name=repo_name)
            tag_list_url = urljoin(self.remote.url, relative_url)
            list_downloader = self.remote.get_downloader(url=tag_list_url)
            await list_downloader.run(extra_data={'repo_name': repo_name})

            with open(list_downloader.path) as tags_raw:
                tags_dict = json.loads(tags_raw.read())
                tag_list = tags_dict['tags']

            # check for the presence of the pagination link header
            link = list_downloader.response_headers.get('Link')
            await self.handle_pagination(link, repo_name, tag_list)
            whitelist_tags = self.remote.whitelist_tags
            if whitelist_tags:
                tag_list = list(set(tag_list) & set(whitelist_tags.split(',')))
            pb.increment()

        for tag_name in tag_list:
            relative_url = '/v2/{name}/manifests/{tag}'.format(
                name=self.remote.namespaced_upstream_name,
                tag=tag_name,
            )
            url = urljoin(self.remote.url, relative_url)
            downloader = self.remote.get_downloader(url=url)
            to_download.append(
                downloader.run(extra_data={'headers': V2_ACCEPT_HEADERS}))

        pb_parsed_tags = ProgressBar(message='Processing Tags',
                                     state='running')

        for download_tag in asyncio.as_completed(to_download):
            tag = await download_tag
            with open(tag.path, 'rb') as content_file:
                raw_data = content_file.read()
            content_data = json.loads(raw_data)
            media_type = content_data.get('mediaType')
            tag.artifact_attributes['file'] = tag.path
            saved_artifact = Artifact(**tag.artifact_attributes)
            try:
                saved_artifact.save()
            except IntegrityError:
                del tag.artifact_attributes['file']
                saved_artifact = Artifact.objects.get(
                    **tag.artifact_attributes)
            tag_dc = self.create_tag(saved_artifact, tag.url)

            if media_type == MEDIA_TYPE.MANIFEST_LIST:
                list_dc = self.create_tagged_manifest_list(
                    tag_dc, content_data)
                await self.put(list_dc)
                tag_dc.extra_data['man_relation'] = list_dc
                for manifest_data in content_data.get('manifests'):
                    man_dc = self.create_manifest(list_dc, manifest_data)
                    future_manifests.append(man_dc.get_or_create_future())
                    man_dcs[man_dc.content.digest] = man_dc
                    await self.put(man_dc)
            else:
                man_dc = self.create_tagged_manifest(tag_dc, content_data,
                                                     raw_data)
                await self.put(man_dc)
                tag_dc.extra_data['man_relation'] = man_dc
                self.handle_blobs(man_dc, content_data, total_blobs)
            await self.put(tag_dc)
            pb_parsed_tags.increment()

        pb_parsed_tags.state = 'completed'
        pb_parsed_tags.total = pb_parsed_tags.done
        pb_parsed_tags.save()

        for manifest_future in asyncio.as_completed(future_manifests):
            man = await manifest_future
            with man._artifacts.get().file.open() as content_file:
                raw = content_file.read()
            content_data = json.loads(raw)
            man_dc = man_dcs[man.digest]
            self.handle_blobs(man_dc, content_data, total_blobs)
        for blob in total_blobs:
            await self.put(blob)
示例#4
0
    async def run(self):
        """
        Build `DeclarativeContent` from the repodata.
        """
        packages_pb = ProgressBar(message='Parsed Packages')
        erratum_pb = ProgressBar(message='Parsed Erratum')

        packages_pb.save()
        erratum_pb.save()

        with ProgressBar(message='Downloading Metadata Files') as metadata_pb:
            downloader = self.remote.get_downloader(
                url=urljoin(self.remote.url, 'repodata/repomd.xml'))
            # TODO: decide how to distinguish between a mirror list and a normal repo
            result = await downloader.run()
            metadata_pb.increment()

            repomd_path = result.path
            repomd = cr.Repomd(repomd_path)
            package_repodata_urls = {}
            downloaders = []

            for record in repomd.records:
                if record.type in PACKAGE_REPODATA:
                    package_repodata_urls[record.type] = urljoin(
                        self.remote.url, record.location_href)
                elif record.type in UPDATE_REPODATA:
                    updateinfo_url = urljoin(self.remote.url,
                                             record.location_href)
                    downloader = self.remote.get_downloader(url=updateinfo_url)
                    downloaders.append([downloader.run()])
                else:
                    log.info(
                        _('Unknown repodata type: {t}. Skipped.').format(
                            t=record.type))
                    # TODO: skip databases, save unknown types to publish them as-is

            # to preserve order, downloaders are created after all repodata urls are identified
            package_repodata_downloaders = []
            for repodata_type in PACKAGE_REPODATA:
                downloader = self.remote.get_downloader(
                    url=package_repodata_urls[repodata_type])
                package_repodata_downloaders.append(downloader.run())

            downloaders.append(package_repodata_downloaders)

            # asyncio.gather is used to preserve the order of results for package repodata
            pending = [
                asyncio.gather(*downloaders_group)
                for downloaders_group in downloaders
            ]

            while pending:
                done, pending = await asyncio.wait(
                    pending, return_when=asyncio.FIRST_COMPLETED)
                for downloader in done:
                    results = downloader.result()
                    if results[0].url == package_repodata_urls['primary']:
                        primary_xml_path = results[0].path
                        filelists_xml_path = results[1].path
                        other_xml_path = results[2].path
                        metadata_pb.done += 3
                        metadata_pb.save()

                        packages = await RpmFirstStage.parse_repodata(
                            primary_xml_path, filelists_xml_path,
                            other_xml_path)
                        packages_pb.total = len(packages)
                        packages_pb.state = 'running'
                        packages_pb.save()

                        for pkg in packages.values():
                            package = Package(
                                **Package.createrepo_to_dict(pkg))
                            artifact = Artifact(size=package.size_package)
                            checksum_type = getattr(
                                CHECKSUM_TYPES, package.checksum_type.upper())
                            setattr(artifact, checksum_type, package.pkgId)
                            url = urljoin(self.remote.url,
                                          package.location_href)
                            filename = os.path.basename(package.location_href)
                            da = DeclarativeArtifact(
                                artifact=artifact,
                                url=url,
                                relative_path=filename,
                                remote=self.remote,
                                deferred_download=self.deferred_download)
                            dc = DeclarativeContent(content=package,
                                                    d_artifacts=[da])
                            packages_pb.increment()
                            await self.put(dc)

                    elif results[0].url == updateinfo_url:
                        updateinfo_xml_path = results[0].path
                        metadata_pb.increment()

                        updates = await RpmFirstStage.parse_updateinfo(
                            updateinfo_xml_path)

                        erratum_pb.total = len(updates)
                        erratum_pb.state = 'running'
                        erratum_pb.save()

                        for update in updates:
                            update_record = UpdateRecord(
                                **UpdateRecord.createrepo_to_dict(update))
                            update_record.digest = RpmFirstStage.hash_update_record(
                                update)
                            future_relations = {
                                'collections': defaultdict(list),
                                'references': []
                            }

                            for collection in update.collections:
                                coll_dict = UpdateCollection.createrepo_to_dict(
                                    collection)
                                coll = UpdateCollection(**coll_dict)

                                for package in collection.packages:
                                    pkg_dict = UpdateCollectionPackage.createrepo_to_dict(
                                        package)
                                    pkg = UpdateCollectionPackage(**pkg_dict)
                                    future_relations['collections'][
                                        coll].append(pkg)

                            for reference in update.references:
                                reference_dict = UpdateReference.createrepo_to_dict(
                                    reference)
                                ref = UpdateReference(**reference_dict)
                                future_relations['references'].append(ref)

                            erratum_pb.increment()
                            dc = DeclarativeContent(content=update_record)
                            dc.extra_data = future_relations
                            await self.put(dc)

        packages_pb.state = 'completed'
        erratum_pb.state = 'completed'
        packages_pb.save()
        erratum_pb.save()
async def pre_migrate_content(content_model):
    """
    A coroutine to pre-migrate Pulp 2 content.

    Args:
        content_model: Models for content which is being migrated.
    """
    batch_size = 10000
    content_type = content_model.pulp2.type
    pulp2content = []

    # the latest timestamp we have in the migration tool Pulp2Content table for this content type
    content_qs = Pulp2Content.objects.filter(
        pulp2_content_type_id=content_type)
    last_updated = content_qs.aggregate(
        Max('pulp2_last_updated'))['pulp2_last_updated__max'] or 0
    _logger.debug(
        'The latest migrated {type} content has {timestamp} timestamp.'.format(
            type=content_type, timestamp=last_updated))

    # query only newly created/updated items
    mongo_content_qs = content_model.pulp2.objects(
        _last_updated__gte=last_updated)
    total_content = mongo_content_qs.count()
    _logger.debug('Total count for {type} content to migrate: {total}'.format(
        type=content_type, total=total_content))

    pulp2content_pb = ProgressBar(
        message='Pre-migrating Pulp 2 {} content (general info)'.format(
            content_type.upper()),
        total=total_content,
        state=TASK_STATES.RUNNING)
    pulp2content_pb.save()
    pulp2detail_pb = ProgressBar(
        message='Pre-migrating Pulp 2 {} content (detail info)'.format(
            content_type.upper()),
        total=total_content,
        state=TASK_STATES.RUNNING)
    pulp2detail_pb.save()

    existing_count = 0
    for i, record in enumerate(
            mongo_content_qs.only('id', '_storage_path', '_last_updated',
                                  '_content_type_id',
                                  'downloaded').batch_size(batch_size)):
        if record['_last_updated'] == last_updated:
            # corner case - content with the last``last_updated`` date might be pre-migrated;
            # check if this content is already pre-migrated
            migrated = Pulp2Content.objects.filter(
                pulp2_last_updated=last_updated, pulp2_id=record['id'])
            if migrated:
                existing_count += 1

                # it has to be updated here and not later, in case all items were migrated before
                # and no new content will be saved.
                pulp2content_pb.total -= 1
                pulp2content_pb.save()
                pulp2detail_pb.total -= 1
                pulp2detail_pb.save()
                continue

        item = Pulp2Content(pulp2_id=record['id'],
                            pulp2_content_type_id=record['_content_type_id'],
                            pulp2_last_updated=record['_last_updated'],
                            pulp2_storage_path=record['_storage_path'],
                            downloaded=record['downloaded'])
        _logger.debug('Add content item to the list to migrate: {item}'.format(
            item=item))
        pulp2content.append(item)

        save_batch = (i and not (i + 1) % batch_size or i == total_content - 1)
        if save_batch:
            _logger.debug(
                'Bulk save for generic content info, saved so far: {index}'.
                format(index=i + 1))
            pulp2content_batch = Pulp2Content.objects.bulk_create(
                pulp2content, ignore_conflicts=True)
            content_saved = len(pulp2content_batch) - existing_count
            pulp2content_pb.done += content_saved
            pulp2content_pb.save()

            await content_model.pulp_2to3_detail.pre_migrate_content_detail(
                pulp2content_batch)

            pulp2detail_pb.done += content_saved
            pulp2detail_pb.save()

            pulp2content = []
            existing_count = 0

    pulp2content_pb.state = TASK_STATES.COMPLETED
    pulp2content_pb.save()
    pulp2detail_pb.state = TASK_STATES.COMPLETED
    pulp2detail_pb.save()