示例#1
0
def test_to_dataframe_no_schema_set_raises_type_error(mut, class_under_test,
                                                      mock_client,
                                                      monkeypatch):
    reader = class_under_test([], mock_client,
                              bigquery_storage_v1beta1.types.StreamPosition(),
                              {})
    read_session = bigquery_storage_v1beta1.types.ReadSession()

    with pytest.raises(TypeError):
        reader.to_dataframe(read_session)
def test_to_dataframe_no_fastavro_raises_import_error(
    mut, class_under_test, mock_client, monkeypatch
):
    monkeypatch.setattr(mut, "fastavro", None)
    reader = class_under_test(
        [], mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {}
    )
    read_session = bigquery_storage_v1beta1.types.ReadSession()

    with pytest.raises(ImportError):
        reader.to_dataframe(read_session)
def test_to_dataframe_no_fastavro_raises_import_error(
    mut, class_under_test, mock_client, monkeypatch
):
    monkeypatch.setattr(mut, "fastavro", None)
    reader = class_under_test(
        [], mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {}
    )
    read_session = bigquery_storage_v1beta1.types.ReadSession()

    with pytest.raises(ImportError):
        reader.to_dataframe(read_session)
示例#4
0
def test_to_dataframe_w_scalars(class_under_test):
    avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS)
    read_session = _generate_avro_read_session(avro_schema)
    avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema)

    reader = class_under_test(avro_blocks, mock_client,
                              bigquery_storage_v1beta1.types.StreamPosition(),
                              {})
    got = reader.to_dataframe(read_session)

    expected = pandas.DataFrame(list(
        itertools.chain.from_iterable(SCALAR_BLOCKS)),
                                columns=SCALAR_COLUMN_NAMES)
    # fastavro provides its own UTC definition, so
    # compare the timestamp columns separately.
    got_ts = got["ts_col"]
    got = got.drop(columns=["ts_col"])
    expected_ts = expected["ts_col"]
    expected = expected.drop(columns=["ts_col"])

    pandas.testing.assert_frame_equal(
        got.reset_index(drop=True),  # reset_index to ignore row labels
        expected.reset_index(drop=True),
    )
    pandas.testing.assert_series_equal(
        got_ts.reset_index(drop=True),
        expected_ts.reset_index(drop=True),
        check_dtype=False,  # fastavro's UTC means different dtype
        check_datetimelike_compat=True,
    )
def test_to_dataframe_w_dtypes(class_under_test):
    avro_schema = _bq_to_avro_schema(
        [
            {"name": "bigfloat", "type": "float64"},
            {"name": "lilfloat", "type": "float64"},
        ]
    )
    read_session = _generate_read_session(avro_schema)
    blocks = [
        [{"bigfloat": 1.25, "lilfloat": 30.5}, {"bigfloat": 2.5, "lilfloat": 21.125}],
        [{"bigfloat": 3.75, "lilfloat": 11.0}],
    ]
    avro_blocks = _bq_to_avro_blocks(blocks, avro_schema)

    reader = class_under_test(
        avro_blocks, mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {}
    )
    got = reader.to_dataframe(read_session, dtypes={"lilfloat": "float16"})

    expected = pandas.DataFrame(
        {
            "bigfloat": [1.25, 2.5, 3.75],
            "lilfloat": pandas.Series([30.5, 21.125, 11.0], dtype="float16"),
        },
        columns=["bigfloat", "lilfloat"],
    )
    pandas.testing.assert_frame_equal(
        got.reset_index(drop=True),  # reset_index to ignore row labels
        expected.reset_index(drop=True),
    )
def test_to_dataframe_w_scalars(class_under_test):
    avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS)
    read_session = _generate_read_session(avro_schema)
    avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema)

    reader = class_under_test(
        avro_blocks, mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {}
    )
    got = reader.to_dataframe(read_session)

    expected = pandas.DataFrame(
        list(itertools.chain.from_iterable(SCALAR_BLOCKS)), columns=SCALAR_COLUMN_NAMES
    )
    # fastavro provides its own UTC definition, so
    # compare the timestamp columns separately.
    got_ts = got["ts_col"]
    got = got.drop(columns=["ts_col"])
    expected_ts = expected["ts_col"]
    expected = expected.drop(columns=["ts_col"])

    pandas.testing.assert_frame_equal(
        got.reset_index(drop=True),  # reset_index to ignore row labels
        expected.reset_index(drop=True),
    )
    pandas.testing.assert_series_equal(
        got_ts.reset_index(drop=True),
        expected_ts.reset_index(drop=True),
        check_dtype=False,  # fastavro's UTC means different dtype
        check_datetimelike_compat=True,
    )
def test_to_dataframe_w_dtypes_arrow(class_under_test):
    arrow_schema = _bq_to_arrow_schema(
        [
            {"name": "bigfloat", "type": "float64"},
            {"name": "lilfloat", "type": "float64"},
        ]
    )
    read_session = _generate_arrow_read_session(arrow_schema)
    blocks = [
        [{"bigfloat": 1.25, "lilfloat": 30.5}, {"bigfloat": 2.5, "lilfloat": 21.125}],
        [{"bigfloat": 3.75, "lilfloat": 11.0}],
    ]
    arrow_batches = _bq_to_arrow_batches(blocks, arrow_schema)

    reader = class_under_test(
        arrow_batches, mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {}
    )
    got = reader.to_dataframe(read_session, dtypes={"lilfloat": "float16"})

    expected = pandas.DataFrame(
        {
            "bigfloat": [1.25, 2.5, 3.75],
            "lilfloat": pandas.Series([30.5, 21.125, 11.0], dtype="float16"),
        },
        columns=["bigfloat", "lilfloat"],
    )
    pandas.testing.assert_frame_equal(
        got.reset_index(drop=True),  # reset_index to ignore row labels
        expected.reset_index(drop=True),
    )
示例#8
0
def test_to_dataframe_no_pandas_raises_import_error(mut, class_under_test,
                                                    mock_client, monkeypatch):
    monkeypatch.setattr(mut, "pandas", None)
    avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS)
    read_session = _generate_avro_read_session(avro_schema)
    avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema)

    reader = class_under_test(avro_blocks, mock_client,
                              bigquery_storage_v1beta1.types.StreamPosition(),
                              {})

    with pytest.raises(ImportError):
        reader.to_dataframe(read_session)

    with pytest.raises(ImportError):
        reader.rows(read_session).to_dataframe()

    with pytest.raises(ImportError):
        next(reader.rows(read_session).pages).to_dataframe()
def test_to_dataframe_no_pandas_raises_import_error(
    mut, class_under_test, mock_client, monkeypatch
):
    monkeypatch.setattr(mut, "pandas", None)
    avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS)
    read_session = _generate_read_session(avro_schema)
    avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema)

    reader = class_under_test(
        avro_blocks, mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {}
    )

    with pytest.raises(ImportError):
        reader.to_dataframe(read_session)

    with pytest.raises(ImportError):
        reader.rows(read_session).to_dataframe()

    with pytest.raises(ImportError):
        next(reader.rows(read_session).pages).to_dataframe()
示例#10
0
def test_to_dataframe_w_scalars_arrow(class_under_test):
    arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS)
    read_session = _generate_arrow_read_session(arrow_schema)
    arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema)

    reader = class_under_test(arrow_batches, mock_client,
                              bigquery_storage_v1beta1.types.StreamPosition(),
                              {})
    got = reader.to_dataframe(read_session)

    expected = pandas.DataFrame(list(
        itertools.chain.from_iterable(SCALAR_BLOCKS)),
                                columns=SCALAR_COLUMN_NAMES)

    pandas.testing.assert_frame_equal(
        got.reset_index(drop=True),  # reset_index to ignore row labels
        expected.reset_index(drop=True),
    )