Пример #1
0
def get_staging_area_file_descriptors(
        storage_client: Client,
        staging_areas: set[str]) -> dict[str, set[PathWithCrc]]:
    """
    Given a set of GS staging areas, return the downloaded descriptors present in each area
    """

    expected: dict[str, set[PathWithCrc]] = defaultdict(set[PathWithCrc])
    for staging_area in staging_areas:
        url = urlparse(staging_area)

        for file_type in FileMetadataTypes:
            prefix = f"{url.path.lstrip('/')}/descriptors/{file_type.value}"
            blobs = list(storage_client.list_blobs(url.netloc, prefix=prefix))
            for blob in blobs:
                parsed = json.loads(blob.download_as_text())
                path_with_crc = PathWithCrc(
                    target_path_from_descriptor(parsed), parsed["crc32c"])
                expected[staging_area].add(path_with_crc)

    return expected
Пример #2
0
def path_has_any_data(bucket: str, prefix: str, gcs: Client) -> bool:
    """Checks the given path for any blobs of non-zero size"""
    blobs = [blob for blob in
             gcs.list_blobs(bucket, prefix=prefix)]
    return any([blob.size > 0 for blob in blobs])