Python TarFile.extract示例，tarfile.TarFile.extract Python示例

示例#1

0

显示文件

文件： test_tar_download.py 项目： keithschulze/mytardis

 def test_tar_experiment_download(self):
     self.assertTrue(all(df.verified for df in self.dfs))
     response = self.client.get(reverse(
         'tardis.tardis_portal.download.streaming_download_experiment',
         args=(self.exp.id, 'tar')))
     with NamedTemporaryFile('w') as tarfile:
         for c in response.streaming_content:
             tarfile.write(c)
         tarfile.flush()
         self.assertEqual(int(response['Content-Length']),
                          os.stat(tarfile.name).st_size)
         tf = TarFile(tarfile.name)
         if settings.EXP_SPACES_TO_UNDERSCORES:
             exp_title = self.exp.title.replace(' ', '_')
         else:
             exp_title = self.exp.title
         exp_title = quote(exp_title,
                           safe=settings.SAFE_FILESYSTEM_CHARACTERS)
         for df in self.dfs:
             full_path = os.path.join(
                 exp_title,
                 quote(self.ds.description,
                       safe=settings.SAFE_FILESYSTEM_CHARACTERS),
                 df.directory, df.filename)
             # docker has a file path limit of ~240 characters
             if os.environ.get('DOCKER_BUILD', 'false') != 'true':
                 tf.extract(full_path, '/tmp')
                 self.assertEqual(
                     os.stat(os.path.join('/tmp', full_path)).st_size,
                     int(df.size))

示例#2

0

显示文件

文件： test_tar_download.py 项目： tmatsuzawa/mytardis

 def test_tar_experiment_download(self):
     self.assertTrue(all(df.verified for df in self.dfs))
     response = self.client.get(reverse(
         'tardis.tardis_portal.download.streaming_download_experiment',
         args=(self.exp.id, 'tar')))
     with NamedTemporaryFile('w') as tarfile:
         for c in response.streaming_content:
             tarfile.write(c)
         tarfile.flush()
         self.assertEqual(int(response['Content-Length']),
                          os.stat(tarfile.name).st_size)
         tf = TarFile(tarfile.name)
         if settings.EXP_SPACES_TO_UNDERSCORES:
             exp_title = self.exp.title.replace(' ', '_')
         else:
             exp_title = self.exp.title
         exp_title = urllib.parse.quote(exp_title,
                           safe=settings.SAFE_FILESYSTEM_CHARACTERS)
         for df in self.dfs:
             full_path = os.path.join(
                 exp_title,
                 urllib.parse.quote(self.ds.description,
                       safe=settings.SAFE_FILESYSTEM_CHARACTERS),
                 df.directory, df.filename)
             # docker has a file path limit of ~240 characters
             if os.environ.get('DOCKER_BUILD', 'false') != 'true':
                 tf.extract(full_path, '/tmp')
                 self.assertEqual(
                     os.stat(os.path.join('/tmp', full_path)).st_size,
                     int(df.size))

示例#3

0

显示文件

文件： archive_utils.py 项目： DresvyanskiyDenis/datatools

def _extract_files_by_provided_names_tar(t_file: tarfile.TarFile,
                                         names: List[str],
                                         output_path: str) -> None:
    # TODO: write description
    for name in names:
        member = t_file.getmember(name)
        extracting_path = output_path
        t_file.extract(member, path=extracting_path)

示例#4

0

显示文件

    def extract(self, path, cb=None):
        tarfile = TarFile(fileobj=self.source.extractfile('./DATA'))
        for member in tarfile.getmembers():
            if member.name in (
                    '/',
                    ''):  # extract can't handle making '/' when installing '/'
                continue

            tarfile.extract(member, path)
            if member.isfile() and cb:
                cb(self.name, os.path.join(path, member.name))

示例#5

0

显示文件

文件： util.py 项目： seeker1943/gobbli

def _extract_tar_junk_path(tarfile_obj: tarfile.TarFile, archive_extract_dir: Path):
    """
    Extract a tarfile while flattening any directory hierarchy
    in the archive.
    """
    for member in tarfile_obj.getmembers():
        if member.isdir():
            # Skip directories
            continue
        # Remove the directory hierarchy from the file
        member.name = Path(member.name).name
        output_file = archive_extract_dir / member.name
        LOGGER.debug(f"Extracting member '{member.name}' to '{output_file}'")
        tarfile_obj.extract(member, path=archive_extract_dir)

示例#6

0

显示文件

def _untar_layers(dir, layers):
    output = {}
    # Untar layer filesystem bundle
    for layer in layers:
        tarfile = TarFile(dir + "/" + layer)
        for member in tarfile.getmembers():
            output[member.name] = member
    for member_name in output:
        try:
            tarfile.extract(output[member_name], path=dir, set_attrs=False)
        except (ValueError, ReadError):
            pass

    # Clean up
    for layer in layers:
        clean_up(dir + "/" + layer[:-10])

示例#7

0

显示文件

def unpack_archive(archive_staging_dir,
                   archive,
                   external_id,
                   target_path,
                   filelist=None):
    """Unpack a tar file containing the files that are in the
       MigrationArchive object"""
    # create the name of the archive
    archive_path = archive.get_archive_name(archive_staging_dir)
    # create the target directory if it doesn't exist
    try:
        os.makedirs(target_path)
    except:
        pass

    try:
        tar_file = TarFile(archive_path, 'r')
        # check that the tar_file digest matches the digest in the database
        digest = calculate_digest(archive_path)
        if digest != archive.digest:
            error_string = (
                "Digest does not match for archive: {}").format(archive_path)
            raise Exception(error_string)
    except:
        error_string = ("Could not find archive path: {}").format(archive_path)
        raise Exception(error_string)

    # untar each file
    for tar_info in tar_file.getmembers():
        try:
            # if filelist only extract those in the filelist
            if filelist:
                if tar_info.name in filelist:
                    tar_file.extract(tar_info, path=target_path)
            else:
                tar_file.extract(tar_info, path=target_path)
            logging.debug(
                ("    Extracting file: {} from archive: {} to directory: {}"
                 ).format(tar_info.name, archive.get_id(), target_path))
        except Exception as e:
            error_string = (
                "Could not extract file: {} from archive {} to path: {}, exception: {}"
            ).format(tar_info.name, archive.get_id(), target_path, str(e))
            logging.error(error_string)
            raise Exception(error_string)

    tar_file.close()

示例#8

0

显示文件

文件： test_tar_download.py 项目： jasonrig/mytardis

 def test_tar_experiment_download(self):
     self.assertTrue(all(df.verified for df in self.dfs))
     response = self.client.get(
         reverse("tardis.tardis_portal.download.streaming_download_experiment", args=(self.exp.id, "tar"))
     )
     with NamedTemporaryFile("w") as tarfile:
         for c in response.streaming_content:
             tarfile.write(c)
         tarfile.flush()
         self.assertEqual(int(response["Content-Length"]), os.stat(tarfile.name).st_size)
         tf = TarFile(tarfile.name)
         for df in self.dfs:
             full_path = os.path.join(
                 self.exp.title.replace(" ", "_"), self.ds.description, df.directory, df.filename
             )
             tf.extract(full_path, "/tmp")
             self.assertEqual(os.stat(os.path.join("/tmp", full_path)).st_size, int(df.size))

示例#9

0

显示文件

文件： reader.py 项目： pnhowe/respkg

    def extract(self, path, cb=None):
        file_path_list = []
        tarfile = TarFile(fileobj=self.source.extractfile('./DATA'))
        for member in tarfile.getmembers():
            if member.name in (
                    '/',
                    ''):  # extract can't handle making '/' when installing '/'
                continue

            file_path = os.path.join(path, member.name)
            if cb is not None:
                cb(file_path)

            tarfile.extract(member, path)

            if member.isfile():
                file_path_list.append(file_path)

        return file_path_list

示例#10

0

显示文件

文件： search_files.py 项目： thehappydinoa/iLEAPP

class FileSeekerTar(FileSeekerBase):
    def __init__(self, tar_file_path, temp_folder):
        FileSeekerBase.__init__(self)
        self.tar_file = TarFile(tar_file_path)
        self.temp_folder = temp_folder

    def search(self, filepattern):
        pathlist = []
        for member in self.tar_file.getmembers():
            if fnmatch.fnmatch(member.name, filepattern):
                try:
                    self.tar_file.extract(member.name, path=self.temp_folder)
                    pathlist.append(
                        os.path.join(self.temp_folder, Path(member.name)))
                except:
                    logfunc('Could not write file to filesystem')
        return pathlist

    def cleanup(self):
        self.tar_file.close()

示例#11

0

显示文件

文件： utils.py 项目： wp19990105/dagda

def _untar_layers(dir, layers):
    output = {}
    # Untar layer filesystem bundle
    for layer in layers:
        tarfile = TarFile(dir + "/" + layer)
        for member in tarfile.getmembers():
            try:
                tarfile.extract(member, path=dir, set_attrs=False)
            except (ValueError, ReadError) as ex:
                if InternalServer.is_debug_logging_enabled():
                    message = "Unexpected exception of type {0} occurred while untaring the docker image: {1!r}" \
                        .format(type(ex).__name__, ex.get_message() if type(ex).__name__ == 'DagdaError' else ex.args)
                    DagdaLogger.get_logger().debug(message)
            except PermissionError as ex:
                message = "Unexpected error occurred while untaring the docker image: " + \
                          "Operation not permitted on {0!r}".format(member.name)
                DagdaLogger.get_logger().warn(message)

    # Clean up
    for layer in layers:
        clean_up(dir + "/" + layer[:-10])

示例#12

0

显示文件

文件： tasks.py 项目： cynddl/pleaseshare

def _extract_tar(archivefile: tarfile.TarFile, name: str, rep: str) -> bool:
    """Extract a tar archive """
    mkdir(rep, mode=0o711)
    if not check_archive(archivefile, rep):
        raise BadArchive("malicious archive")
    try:
        for member in archivefile:
            archivefile.extract(member, rep, set_attrs=False)
            member_location = joinpath(rep, member.name)
            # python has no option to use umask while extracting, so…
            if isdir(member_location):
                chmod(member_location, 0o711)
            else:
                chmod(member_location, 0o644)
    except: # extraction failed, remove leftover files
        log.info('Extraction of %s failed, falling back to single-file upload', name, exc_info=True)
        rmtree(rep)
        return False
    else: # remove old tar file
        remove(name)
        log.info('Successfully extracted tarfile %s into %s', name, rep)
        return True

示例#13

0

显示文件

def extract_tar_stream(tar: tarfile.TarFile, src: Text, dst: Text) -> None:
    for member in tar:
        if os.path.isdir(dst):
            if posixpath.join('/', member.path) == src:
                member.path = posixpath.basename(member.path)
                tar.extract(member, dst)
                if member.isdir():
                    dst = os.path.join(dst, member.path)
            else:
                member.path = posixpath.relpath(
                    posixpath.join('/', member.path), src)
                tar.extract(member, dst)
        elif member.isfile():
            with tar.extractfile(member) as inputfile:
                with open(dst, 'wb') as outputfile:
                    outputfile.write(inputfile.read())
        else:
            parent_dir = str(Path(dst).parent)
            member.path = posixpath.basename(member.path)
            tar.extract(member, parent_dir)

示例#14

0

显示文件

文件： tar.py 项目： DREAM-ODA-OS/eoxserver

 def extract(self, package_filename, location, path):
     tarfile = TarFile(package_filename, "r")
     tarfile.extract(location, path)

示例#15

0

显示文件

文件： compression.py 项目： janluke/embfile

def _tar_extract(archive: TarFile, member: Union[str, TarInfo],
                 outdir: PathType) -> Path:
    archive.extract(member, outdir)
    member_name = member.name if isinstance(member, TarInfo) else member
    return Path(outdir) / member_name

示例#16

0

显示文件

文件： tar.py 项目： darshangan/eoxserver

 def extract(self, package_filename, location, path):
     tarfile = TarFile(package_filename, "r")
     tarfile.extract(location, path)

示例#17

0

显示文件

文件： process_incoming.py 项目： edgedb/edgedb-pkg

def process_rpm(
    cfg: Config,
    s3session: s3.S3ServiceResource,
    tf: tarfile.TarFile,
    metadata: dict[str, Any],
    temp_dir: pathlib.Path,
    local_dir: pathlib.Path,
) -> None:
    bucket = s3session.Bucket(BUCKET)
    incoming_dir = temp_dir / "incoming"
    incoming_dir.mkdir()
    local_rpm_dir = local_dir / "rpm"
    local_rpm_dir.mkdir(parents=True, exist_ok=True)
    index_dir = local_rpm_dir / ".jsonindexes"
    index_dir.mkdir(exist_ok=True)

    rpms = []
    for member in tf.getmembers():
        if member.name in {".", "build-metadata.json"}:
            continue
        tf.extract(member, incoming_dir)
        fn = pathlib.Path(member.name)
        if fn.suffix == ".rpm":
            rpms.append(fn)

    dist = metadata["dist"]
    channel = metadata["channel"]
    arch = metadata["architecture"]

    idx = dist
    if channel != "stable":
        idx += f".{channel}"

    dist_dir = pathlib.Path(dist) / channel / arch
    local_dist_dir = local_rpm_dir / dist_dir
    local_dist_dir.mkdir(parents=True, exist_ok=True)

    sync_to_local(
        bucket,
        pathlib.Path("/rpm") / dist_dir,
        local_dist_dir,
        exact_timestamps=True,
    )

    sync_to_local(
        bucket,
        pathlib.Path("/rpm") / ".jsonindexes",
        index_dir,
        exact_timestamps=True,
    )

    repomd = local_dist_dir / "repodata" / "repomd.xml"
    if not repomd.exists():
        subprocess.run(
            [
                "createrepo_c",
                "--database",
                local_dist_dir,
            ],
            cwd=incoming_dir,
            check=True,
        )

    for rpm in rpms:
        subprocess.run(
            [
                "rpm",
                "--resign",
                rpm,
            ],
            input=b"\n",
            cwd=incoming_dir,
            check=True,
        )

        shutil.copy(incoming_dir / rpm, local_dist_dir / rpm)

    subprocess.run(
        [
            "createrepo_c",
            "--update",
            local_dist_dir,
        ],
        check=True,
    )

    gpg_detach_sign(repomd)

    existing: dict[tuple[str, str], Package] = {}
    packages: dict[tuple[str, str], Package] = {}
    idxfile = index_dir / f"{idx}.json"
    if idxfile.exists():
        with open(idxfile, "r") as f:
            data = json.load(f)
            if isinstance(data, dict) and (pkglist := data.get("packages")):
                for pkg in pkglist:
                    index_key = (pkg["basename"], pkg["version_key"])
                    existing[index_key] = Package(**pkg)

示例#18

0

显示文件

文件： process_incoming.py 项目： edgedb/edgedb-pkg

def process_generic(
    cfg: Config,
    s3session: s3.S3ServiceResource,
    tf: tarfile.TarFile,
    metadata: dict[str, Any],
    temp_dir: pathlib.Path,
    local_dir: pathlib.Path,
) -> None:
    bucket = s3session.Bucket(BUCKET)
    pkg_directories = set()
    rrules = {}
    basename = metadata["name"]
    slot = metadata.get("version_slot")
    slot_suf = f"-{slot}" if slot else ""
    channel = metadata["channel"]
    channel_suf = f".{channel}" if channel and channel != "stable" else ""
    target = metadata["target"]
    contents = metadata["contents"]
    pkg_dir = f"{target}{channel_suf}"
    pkg_directories.add(pkg_dir)

    staging_dir = temp_dir / pkg_dir
    os.makedirs(staging_dir)

    for member in tf.getmembers():
        if member.name in {".", "build-metadata.json"}:
            continue

        leaf = pathlib.Path(member.name)
        tf.extract(member, staging_dir)

        desc = contents[member.name]
        ext = desc["suffix"]
        asc_path = gpg_detach_sign(staging_dir / leaf)
        sha256_path = sha256(staging_dir / leaf)
        blake2b_path = blake2b(staging_dir / leaf)
        metadata_path = staging_dir / f"{leaf}.metadata.json"

        with open(metadata_path, "w") as f:
            json.dump(metadata, f)

        print(f"metadata={metadata}")
        print(f"target={target} leaf={leaf}")
        print(f"basename={basename} slot={slot}")
        print(f"channel={channel} pkg_dir={pkg_dir}")
        print(f"ext={ext}")

        # Store the fully-qualified artifact to archive/
        archive_dir = ARCHIVE / pkg_dir
        put(bucket, staging_dir / leaf, archive_dir, cache=True)
        put(bucket, asc_path, archive_dir, cache=True)
        put(bucket, sha256_path, archive_dir, cache=True)
        put(bucket, blake2b_path, archive_dir, cache=True)
        put(bucket, metadata_path, archive_dir, cache=True)

        if metadata.get("publish_link_to_latest"):
            # And record a copy of it in the dist/ directory as an
            # unversioned key for ease of reference in download
            # scripts.  Note: the archive/ entry is cached, but the
            # dist/ entry MUST NOT be cached for obvious reasons.
            # However, we still want the benefit of CDN for it, so
            # we generate a bucket-wide redirect policy for the
            # dist/ object to point to the archive/ object.  See
            # below for details.
            target_dir = DIST / pkg_dir
            dist_name = f"{basename}{slot_suf}{ext}"
            put(bucket, b"", target_dir, name=dist_name)

            asc_name = f"{dist_name}.asc"
            put(bucket, b"", target_dir, name=asc_name)

            sha_name = f"{dist_name}.sha256"
            put(bucket, b"", target_dir, name=sha_name)

            sha_name = f"{dist_name}.blake2b"
            put(bucket, b"", target_dir, name=sha_name)

            rrules[target_dir / dist_name] = archive_dir / leaf

    for pkg_dir in pkg_directories:
        remove_old(bucket, ARCHIVE / pkg_dir, keep=1, channel="nightly")
        make_generic_index(bucket, ARCHIVE, pkg_dir)

    if rrules:
        # We can't use per-object redirects, because in that case S3
        # generates the `301 Moved Permanently` response, and, adding
        # insult to injury, forgets to send the `Cache-Control` header,
        # which makes the response cacheable and useless for the purpose.
        # Luckily the "website" functionality of the bucket allows setting
        # redirection rules centrally, so that's what we do.
        #
        # The redirection rules are key prefix-based, and so we can use just
        # one redirect rule to handle both the main artifact and its
        # accompanying signature and checksum files.
        #
        # NOTE: Amazon S3 has a limitation of 50 routing rules per
        #       website configuration.
        website = s3session.BucketWebsite(BUCKET)
        existing_rrules = list(website.routing_rules)
        for src, tgt in rrules.items():
            src_key = str(src)
            tgt_key = str(tgt)
            for rule in existing_rrules:
                condition = rule.get("Condition")
                if not condition:
                    continue
                if condition.get("KeyPrefixEquals") == src_key:
                    try:
                        redirect = rule["Redirect"]
                    except KeyError:
                        redirect = rule["Redirect"] = {}

                    redirect["ReplaceKeyPrefixWith"] = tgt_key
                    redirect["HttpRedirectCode"] = "307"
                    break
            else:
                existing_rrules.append({
                    "Condition": {
                        "KeyPrefixEquals": src_key,
                    },
                    "Redirect": {
                        "HttpRedirectCode": "307",
                        "Protocol": "https",
                        "HostName": "packages.edgedb.com",
                        "ReplaceKeyPrefixWith": tgt_key,
                    },
                })

        website_config: s3types.WebsiteConfigurationTypeDef = {
            "RoutingRules": existing_rrules,
        }

        if website.error_document is not None:
            website_config["ErrorDocument"] = cast(
                s3types.ErrorDocumentTypeDef,
                website.error_document,
            )

        if website.index_document is not None:
            website_config["IndexDocument"] = cast(
                s3types.IndexDocumentTypeDef,
                website.index_document,
            )

        if website.redirect_all_requests_to is not None:
            website_config["RedirectAllRequestsTo"] = cast(
                s3types.RedirectAllRequestsToTypeDef,
                website.redirect_all_requests_to,
            )

        print("updating bucket website config:")
        pprint.pprint(website_config)
        website.put(WebsiteConfiguration=website_config)

示例#19

0

显示文件

文件： process_incoming.py 项目： edgedb/edgedb-pkg

def process_apt(
    cfg: Config,
    s3session: s3.S3ServiceResource,
    tf: tarfile.TarFile,
    metadata: dict[str, Any],
    temp_dir: pathlib.Path,
    local_dir: pathlib.Path,
) -> None:
    bucket = s3session.Bucket(BUCKET)
    changes = None
    incoming_dir = temp_dir / "incoming"
    incoming_dir.mkdir()
    reprepro_logs = temp_dir / "reprepro-logs"
    reprepro_logs.mkdir()
    reprepro_tmp = temp_dir / "reprepro-tmp"
    reprepro_tmp.mkdir()
    reprepro_conf = temp_dir / "reprepro-conf"
    reprepro_conf.mkdir()
    local_apt_dir = local_dir / "apt"
    local_apt_dir.mkdir(parents=True, exist_ok=True)
    index_dir = local_apt_dir / ".jsonindexes"
    index_dir.mkdir(exist_ok=True)

    with open(reprepro_conf / "incoming", "wt") as f:
        dists = " ".join(d["codename"] for d in cfg["apt"]["distributions"])
        incoming = textwrap.dedent(f"""\
            Name: default
            IncomingDir: {str(incoming_dir)}
            TempDir: {str(reprepro_tmp)}
            Allow: {dists}
            """)
        f.write(incoming)

    with open(reprepro_conf / "distributions", "wt") as f:
        distributions = generate_reprepro_distributions(cfg)
        f.write(distributions)

    for member in tf.getmembers():
        if member.name in {".", "build-metadata.json"}:
            continue

        tf.extract(member, incoming_dir)
        fn = pathlib.Path(member.name)
        if fn.suffix == ".changes":
            if changes is not None:
                print("Multiple .changes files in apt tarball")
                return
            changes = fn

    for sub in [".jsonindexes", "db", "dists"]:
        sync_to_local(
            bucket,
            pathlib.Path("/apt") / sub,
            local_apt_dir / sub,
            exact_timestamps=True,
        )

    sync_to_local(
        bucket,
        pathlib.Path("/apt") / "pool",
        local_apt_dir / "pool",
    )

    subprocess.run(
        [
            "reprepro",
            "-V",
            "-V",
            f"--confdir={str(reprepro_conf)}",
            f"--basedir={str(local_apt_dir)}",
            f"--logdir={str(reprepro_logs)}",
            "processincoming",
            "default",
            str(changes),
        ],
        cwd=incoming_dir,
        check=True,
    )

    result = subprocess.run(
        [
            "reprepro",
            f"--confdir={str(reprepro_conf)}",
            f"--basedir={str(local_apt_dir)}",
            f"--logdir={str(reprepro_logs)}",
            "dumpreferences",
        ],
        text=True,
        check=True,
        stdout=subprocess.PIPE,
        stderr=None,
    )

    repo_dists = set()
    for line in result.stdout.split("\n"):
        if not line.strip():
            continue

        dist, _, _ = line.partition("|")
        repo_dists.add(dist)

    list_format = (r"\0".join((
        r"${$architecture}",
        r"${$component}",
        r"${package}",
        r"${version}",
        r"${$fullfilename}",
        r"${Installed-Size}",
        r"${Metapkg-Metadata}",
    )) + r"\n")

    existing: dict[str, dict[tuple[str, str], Package]] = {}
    packages: dict[str, dict[tuple[str, str], Package]] = {}

    for dist in repo_dists:
        result = subprocess.run(
            [
                "reprepro",
                f"--confdir={str(reprepro_conf)}",
                f"--basedir={str(local_apt_dir)}",
                f"--logdir={str(reprepro_logs)}",
                f"--list-format={list_format}",
                "list",
                dist,
            ],
            text=True,
            check=True,
            stdout=subprocess.PIPE,
            stderr=None,
        )

        for line in result.stdout.split("\n"):
            if not line.strip():
                continue

            (
                arch,
                component,
                pkgname,
                pkgver,
                pkgfile,
                size,
                pkgmetadata_json,
            ) = line.split("\0")

            if component != "main" and not dist.endswith(component):
                index_dist = f"{dist}.{component}"
            else:
                index_dist = dist

            prev_dist_packages = existing.get(index_dist)
            if prev_dist_packages is None:
                idxfile = index_dir / f"{index_dist}.json"
                prev_dist_packages = {}
                if idxfile.exists():
                    with open(idxfile, "r") as f:
                        data = json.load(f)
                        if isinstance(data, dict) and (pkglist :=
                                                       data.get("packages")):
                            for pkg in pkglist:
                                index_key = (
                                    pkg["basename"],
                                    pkg["version_key"],
                                )
                                prev_dist_packages[index_key] = Package(**pkg)
                existing[index_dist] = prev_dist_packages

            dist_packages = packages.get(index_dist)
            if dist_packages is None:
                packages[index_dist] = dist_packages = {}

            if arch == "amd64":
                arch = "x86_64"

            is_metapackage = int(size) < 20

            relver, _, revver = pkgver.rpartition("-")

            m = slot_regexp.match(pkgname)
            if not m:
                print("cannot parse package name: {}".format(pkgname))
                basename = pkgname
                slot = None
            else:
                basename = m.group(1)
                slot = m.group(2)

            if pkgmetadata_json:
                pkgmetadata = json.loads(pkgmetadata_json)
                if is_metapackage:
                    pkgmetadata["name"] = basename
                parsed_ver = pkgmetadata["version_details"]
            else:
                parsed_ver = parse_version(relver)
                pkgmetadata = {
                    "name": basename,
                    "version": relver,
                    "version_slot": slot,
                    "version_details": parsed_ver,
                    "architecture": arch,
                    "revision": revver,
                }

            version_key = format_version_key(parsed_ver, revver)
            ver_metadata = pkgmetadata["version_details"]["metadata"]
            index_key = (pkgmetadata["name"], version_key)

            if index_key in prev_dist_packages:
                dist_packages[index_key] = prev_dist_packages[index_key]
                dist_packages[index_key]["architecture"] = arch
            else:
                if basename == "edgedb-server" and not ver_metadata.get(
                        "catalog_version"):
                    if not pathlib.Path(pkgfile).exists():
                        print(f"package file does not exist: {pkgfile}")
                    else:
                        catver = extract_catver_from_deb(pkgfile)
                        if catver is None:
                            print(
                                f"cannot extract catalog version from {pkgfile}"
                            )
                        else:
                            ver_metadata["catalog_version"] = str(catver)
                            print(f"extracted catver {catver} from {pkgfile}")

                installref = InstallRef(
                    ref="{}={}-{}".format(pkgname, relver, revver),
                    type=None,
                    encoding=None,
                    verification={},
                )

                append_artifact(dist_packages, pkgmetadata, installref)

                print("makeindex: noted {}".format(installref["ref"]))