Пример #1
0
def main():
    """Publish query data as JSON to GCS."""
    args, query_arguments = parser.parse_known_args()

    try:
        metadata = Metadata.of_sql_file(args.query_file)
    except FileNotFoundError:
        print("No metadata file for: {}".format(args.query_file))
        return

    # check if the data should be published as JSON
    if not metadata.is_public_json():
        return

    if not validate_public_data(metadata, args.query_file):
        sys.exit(1)

    storage_client = storage.Client()
    client = bigquery.Client(args.public_project_id)

    publisher = JsonPublisher(
        client,
        storage_client,
        args.public_project_id,
        args.query_file,
        args.api_version,
        args.target_bucket,
        args.parameter,
    )
    publisher.publish_json()
Пример #2
0
    def test_is_valid_public_data(self):
        metadata_not_public = Metadata("No public data", "No public data", {},
                                       {})
        assert validate_public_data(metadata_not_public,
                                    "test/path/metadata.yaml")

        metadata_valid_public = Metadata(
            "Public json data",
            "Public json data",
            [],
            {
                "public_json": True,
                "review_bug": 123456
            },
            {},
        )
        assert validate_public_data(metadata_valid_public,
                                    "test/path/metadata.yaml")

        metadata_valid_public = Metadata(
            "Public BigQuery data",
            "Public BigQuery data",
            [],
            {
                "public_bigquery": True,
                "review_bug": 123456
            },
            {},
        )
        assert validate_public_data(metadata_valid_public,
                                    "test/path/metadata.yaml")

        metadata_invalid_public = Metadata(
            "Public BigQuery data",
            "Public BigQuery data",
            [],
            {"public_bigquery": True},
            {},
        )
        assert (validate_public_data(metadata_invalid_public,
                                     "test/path/metadata.yaml") is False)
Пример #3
0
def run(
    query_file,
    dataset_id,
    destination_table,
    query_arguments,
    public_project_id=PUBLIC_PROJECT_ID,
):
    """Execute bq to run a query."""
    if dataset_id is not None:
        # dataset ID was parsed by argparse but needs to be passed as parameter
        # when running the query
        query_arguments.append("--dataset_id={}".format(dataset_id))

    use_public_table = False

    try:
        metadata = Metadata.of_query_file(query_file)
        if metadata.is_public_bigquery():
            if not validate_public_data(metadata, query_file):
                sys.exit(1)

            # change the destination table to write results to the public dataset;
            # a view to the public table in the internal dataset is created
            # when CI runs
            if (dataset_id is not None and destination_table is not None
                    and re.match(DESTINATION_TABLE_RE, destination_table)):
                destination_table = "{}:{}.{}".format(public_project_id,
                                                      dataset_id,
                                                      destination_table)
                query_arguments.append(
                    "--destination_table={}".format(destination_table))
                use_public_table = True
            else:
                print("ERROR: Cannot run public dataset query. Parameters"
                      " --destination_table=<table without dataset ID> and"
                      " --dataset_id=<dataset> required")
                sys.exit(1)
    except yaml.YAMLError as e:
        print(e)
        sys.exit(1)
    except FileNotFoundError:
        print("INFO: No metadata.yaml found for {}", query_file)

    if not use_public_table and destination_table is not None:
        # destination table was parsed by argparse, however if it wasn't modified to
        # point to a public table it needs to be passed as parameter for the query
        query_arguments.append(
            "--destination_table={}".format(destination_table))

    with open(query_file) as query_stream:
        # run the query as shell command so that passed parameters can be used as is
        subprocess.check_call(["bq"] + query_arguments, stdin=query_stream)