Exemplo n.º 1
0
def publish_v3_location_subsets(
    location_key_from: str = None, location_key_until: str = None
) -> Response:
    location_key_from = _get_request_param("location_key_from", location_key_from)
    location_key_until = _get_request_param("location_key_until", location_key_until)

    with temporary_directory() as workdir:
        input_folder = workdir / "input"
        intermediate_folder = workdir / "temp"
        output_folder = workdir / "output"
        input_folder.mkdir(parents=True, exist_ok=True)
        output_folder.mkdir(parents=True, exist_ok=True)

        location_keys = list(table_read_column(SRC / "data" / "metadata.csv", "key"))
        if location_key_from is not None:
            location_keys = [key for key in location_keys if key >= location_key_from]
        if location_key_until is not None:
            location_keys = [key for key in location_keys if key <= location_key_until]
        logger.log_info(
            f"Publishing {len(location_keys)} location subsets "
            f"from {location_keys[0]} until {location_keys[-1]}"
        )

        # Download all the global tables into our local storage
        forbid_tokens = ("/", "main.", "aggregated.")
        download_folder(
            GCS_BUCKET_PROD,
            "v3",
            input_folder,
            lambda x: x.suffix == ".csv" and all(token not in str(x) for token in forbid_tokens),
        )
        logger.log_info(f"Downloaded {sum(1 for _ in input_folder.glob('**/*.csv'))} CSV files")

        # Break out each table into separate folders based on the location key
        publish_location_breakouts(input_folder, intermediate_folder, use_table_names=V3_TABLE_LIST)
        logger.log_info("Created all table location breakouts")

        # Create a folder which will host all the location aggregates
        location_aggregates_folder = output_folder / "location"
        location_aggregates_folder.mkdir(parents=True, exist_ok=True)

        # Aggregate the tables for each location independently
        publish_location_aggregates(
            intermediate_folder,
            location_aggregates_folder,
            location_keys,
            use_table_names=V3_TABLE_LIST,
        )
        logger.log_info("Aggregated all table breakouts by location")

        # Upload the results to the prod bucket
        upload_folder(GCS_BUCKET_PROD, "v3", output_folder)

    return Response("OK", status=200)
Exemplo n.º 2
0
def publish_v3_location_subsets(location_key_from: str = None,
                                location_key_until: str = None) -> Response:
    location_key_from = _get_request_param("location_key_from",
                                           location_key_from)
    location_key_until = _get_request_param("location_key_until",
                                            location_key_until)

    with temporary_directory() as workdir:
        input_folder = workdir / "input"
        intermediate_folder = workdir / "temp"
        output_folder = workdir / "output"
        input_folder.mkdir(parents=True, exist_ok=True)
        output_folder.mkdir(parents=True, exist_ok=True)

        location_keys = list(
            table_read_column(SRC / "data" / "metadata.csv", "key"))
        if location_key_from is not None:
            location_keys = [
                key for key in location_keys if key >= location_key_from
            ]
        if location_key_until is not None:
            location_keys = [
                key for key in location_keys if key <= location_key_until
            ]
        logger.log_info(f"Publishing {len(location_keys)} location subsets "
                        f"from {location_keys[0]} until {location_keys[-1]}")

        # Download all the global tables into our local storage
        download_folder(GCS_BUCKET_PROD, "v3", input_folder,
                        lambda x: "/" not in str(x))

        # Break out each table into separate folders based on the location key
        publish_location_breakouts(input_folder,
                                   intermediate_folder,
                                   use_table_names=V3_TABLE_LIST)

        # Aggregate the tables for each location independently
        publish_location_aggregates(intermediate_folder,
                                    output_folder,
                                    location_keys,
                                    use_table_names=V3_TABLE_LIST)

        # Upload the results to the prod bucket
        upload_folder(GCS_BUCKET_PROD, "v3", output_folder)

    return Response("OK", status=200)