示例#1
0
def test_list_rows_page_size(bigquery_client: bigquery.Client, table_id: str):
    num_items = 7
    page_size = 3
    num_pages, num_last_page = divmod(num_items, page_size)

    to_insert = [{
        "string_col": "item%d" % i,
        "rowindex": i
    } for i in range(num_items)]
    bigquery_client.load_table_from_json(to_insert, table_id).result()

    df = bigquery_client.list_rows(
        table_id,
        selected_fields=[
            bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING)
        ],
        page_size=page_size,
    )
    pages = df.pages

    for i in range(num_pages):
        page = next(pages)
        assert page.num_items == page_size
    page = next(pages)
    assert page.num_items == num_last_page
def write_data_to_bigquery(
    client: bigquery.Client,
    table_id: str,
    rows_to_insert: List[Dict[str, str]],
    json_schema: Dict[str, str],
):
    bigquery_schema: List[
        bigquery.SchemaField] = convert_json_schema_to_bigquery_schema(
            json_schema)
    complete_table_id = get_complete_table_id(table_id)
    write_disposition = bigquery.WriteDisposition.WRITE_APPEND
    schema_update_options = [bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION]

    job_config = bigquery.LoadJobConfig(
        schema=bigquery_schema,
        write_disposition=write_disposition,
        source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
        schema_update_options=schema_update_options,
    )
    # batch write
    if len(rows_to_insert) > 0:
        load_job = client.load_table_from_json(
            rows_to_insert,
            complete_table_id,
            job_config=job_config,
        )
        load_job.result()
    else:
        print("uploading data row is empty")