Exemplo n.º 1
0
def test_to_arrow_max_results_no_progress_bar():
    from google.cloud.bigquery import table
    from google.cloud.bigquery.job import QueryJob as target_class
    from google.cloud.bigquery.schema import SchemaField

    connection = _make_connection({})
    client = _make_client(connection=connection)
    begun_resource = _make_job_resource(job_type="query")
    job = target_class.from_api_repr(begun_resource, client)

    schema = [
        SchemaField("name", "STRING", mode="REQUIRED"),
        SchemaField("age", "INTEGER", mode="REQUIRED"),
    ]
    rows = [
        {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
        {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]},
    ]
    path = "/foo"
    api_request = mock.Mock(return_value={"rows": rows})
    row_iterator = table.RowIterator(client, api_request, path, schema)

    result_patch = mock.patch(
        "google.cloud.bigquery.job.QueryJob.result", return_value=row_iterator,
    )
    with result_patch as result_patch_tqdm:
        tbl = job.to_arrow(create_bqstorage_client=False, max_results=123)

    result_patch_tqdm.assert_called_once_with(max_results=123)

    assert isinstance(tbl, pyarrow.Table)
    assert tbl.num_rows == 2
Exemplo n.º 2
0
def test_to_dataframe_w_tqdm():
    from google.cloud.bigquery import table
    from google.cloud.bigquery.job import QueryJob as target_class
    from google.cloud.bigquery.schema import SchemaField
    from google.cloud.bigquery._tqdm_helpers import _PROGRESS_BAR_UPDATE_INTERVAL

    begun_resource = _make_job_resource(job_type="query")
    schema = [
        SchemaField("name", "STRING", mode="NULLABLE"),
        SchemaField("age", "INTEGER", mode="NULLABLE"),
    ]
    rows = [
        {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
        {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
        {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]},
        {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]},
    ]

    connection = _make_connection({})
    client = _make_client(connection=connection)
    job = target_class.from_api_repr(begun_resource, client)

    path = "/foo"
    api_request = mock.Mock(return_value={"rows": rows})
    row_iterator = table.RowIterator(client, api_request, path, schema)

    job._properties["statistics"] = {
        "query": {
            "queryPlan": [
                {"name": "S00: Input", "id": "0", "status": "COMPLETE"},
                {"name": "S01: Output", "id": "1", "status": "COMPLETE"},
            ]
        },
    }
    reload_patch = mock.patch(
        "google.cloud.bigquery.job._AsyncJob.reload", autospec=True
    )
    result_patch = mock.patch(
        "google.cloud.bigquery.job.QueryJob.result",
        side_effect=[
            concurrent.futures.TimeoutError,
            concurrent.futures.TimeoutError,
            row_iterator,
        ],
    )

    with result_patch as result_patch_tqdm, reload_patch:
        df = job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False)

    assert result_patch_tqdm.call_count == 3
    assert isinstance(df, pandas.DataFrame)
    assert len(df) == 4  # verify the number of rows
    assert list(df), ["name", "age"]  # verify the column names
    result_patch_tqdm.assert_called_with(
        timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None
    )
Exemplo n.º 3
0
def test_to_arrow_w_tqdm_w_query_plan():
    from google.cloud.bigquery import table
    from google.cloud.bigquery.job import QueryJob as target_class
    from google.cloud.bigquery.schema import SchemaField
    from google.cloud.bigquery._tqdm_helpers import _PROGRESS_BAR_UPDATE_INTERVAL

    begun_resource = _make_job_resource(job_type="query")
    rows = [
        {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
        {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]},
    ]

    schema = [
        SchemaField("name", "STRING", mode="REQUIRED"),
        SchemaField("age", "INTEGER", mode="REQUIRED"),
    ]
    connection = _make_connection({})
    client = _make_client(connection=connection)
    job = target_class.from_api_repr(begun_resource, client)

    path = "/foo"
    api_request = mock.Mock(return_value={"rows": rows})
    row_iterator = table.RowIterator(client, api_request, path, schema)

    job._properties["statistics"] = {
        "query": {
            "queryPlan": [
                {"name": "S00: Input", "id": "0", "status": "COMPLETE"},
                {"name": "S01: Output", "id": "1", "status": "COMPLETE"},
            ]
        },
    }
    reload_patch = mock.patch(
        "google.cloud.bigquery.job._AsyncJob.reload", autospec=True
    )
    result_patch = mock.patch(
        "google.cloud.bigquery.job.QueryJob.result",
        side_effect=[
            concurrent.futures.TimeoutError,
            concurrent.futures.TimeoutError,
            row_iterator,
        ],
    )

    with result_patch as result_patch_tqdm, reload_patch:
        tbl = job.to_arrow(progress_bar_type="tqdm", create_bqstorage_client=False)

    assert result_patch_tqdm.call_count == 3
    assert isinstance(tbl, pyarrow.Table)
    assert tbl.num_rows == 2
    result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL)