def test_to_dataframe_no_schema_set_raises_type_error(mut, class_under_test,
                                                      mock_gapic_client,
                                                      monkeypatch):
    reader = class_under_test([], mock_gapic_client, "", 0, {})
    read_session = types.ReadSession()

    with pytest.raises(TypeError):
        reader.to_dataframe(read_session)
def test_to_dataframe_empty_w_dtypes_arrow(class_under_test,
                                           mock_gapic_client):
    arrow_schema = _bq_to_arrow_schema([
        {
            "name": "bigfloat",
            "type": "float64"
        },
        {
            "name": "lilfloat",
            "type": "float64"
        },
    ])
    read_session = _generate_arrow_read_session(arrow_schema)
    arrow_batches = _bq_to_arrow_batches([], arrow_schema)
    reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {})

    got = reader.to_dataframe(read_session, dtypes={"lilfloat": "float16"})

    expected = pandas.DataFrame([], columns=["bigfloat", "lilfloat"])
    expected["bigfloat"] = expected["bigfloat"].astype("float64")
    expected["lilfloat"] = expected["lilfloat"].astype("float16")

    pandas.testing.assert_frame_equal(
        got.reset_index(drop=True),  # reset_index to ignore row labels
        expected.reset_index(drop=True),
    )
def test_to_dataframe_w_scalars(class_under_test):
    avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS)
    read_session = _generate_avro_read_session(avro_schema)
    avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema)

    reader = class_under_test(avro_blocks, mock_gapic_client, "", 0, {})
    got = reader.to_dataframe(read_session)

    expected = pandas.DataFrame(list(
        itertools.chain.from_iterable(SCALAR_BLOCKS)),
                                columns=SCALAR_COLUMN_NAMES)
    # fastavro provides its own UTC definition, so
    # compare the timestamp columns separately.
    got_ts = got["ts_col"]
    got = got.drop(columns=["ts_col"])
    expected_ts = expected["ts_col"]
    expected = expected.drop(columns=["ts_col"])

    pandas.testing.assert_frame_equal(
        got.reset_index(drop=True),  # reset_index to ignore row labels
        expected.reset_index(drop=True),
    )
    pandas.testing.assert_series_equal(
        got_ts.reset_index(drop=True),
        expected_ts.reset_index(drop=True),
        check_dtype=False,  # fastavro's UTC means different dtype
        check_datetimelike_compat=True,
    )
Пример #4
0
def test_to_dataframe_w_dtypes(class_under_test):
    avro_schema = _bq_to_avro_schema(
        [
            {"name": "bigfloat", "type": "float64"},
            {"name": "lilfloat", "type": "float64"},
        ]
    )
    blocks = [
        [{"bigfloat": 1.25, "lilfloat": 30.5}, {"bigfloat": 2.5, "lilfloat": 21.125}],
        [{"bigfloat": 3.75, "lilfloat": 11.0}],
    ]
    avro_blocks = _bq_to_avro_blocks(blocks, avro_schema)

    reader = class_under_test(avro_blocks, mock_gapic_client, "", 0, {})
    got = reader.to_dataframe(dtypes={"lilfloat": "float16"})

    expected = pandas.DataFrame(
        {
            "bigfloat": [1.25, 2.5, 3.75],
            "lilfloat": pandas.Series([30.5, 21.125, 11.0], dtype="float16"),
        },
        columns=["bigfloat", "lilfloat"],
    )
    pandas.testing.assert_frame_equal(
        got.reset_index(drop=True),  # reset_index to ignore row labels
        expected.reset_index(drop=True),
    )
def test_to_dataframe_no_pandas_raises_import_error(mut, class_under_test,
                                                    mock_client, monkeypatch):
    monkeypatch.setattr(mut, "pandas", None)
    avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS)
    read_session = _generate_avro_read_session(avro_schema)
    avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema)

    reader = class_under_test(avro_blocks, mock_client, "", 0, {})

    with pytest.raises(ImportError):
        reader.to_dataframe(read_session)

    with pytest.raises(ImportError):
        reader.rows(read_session).to_dataframe()

    with pytest.raises(ImportError):
        next(reader.rows(read_session).pages).to_dataframe()
def test_to_dataframe_w_scalars_arrow(class_under_test):
    arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS)
    read_session = _generate_arrow_read_session(arrow_schema)
    arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema)

    reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {})
    got = reader.to_dataframe(read_session)

    expected = pandas.DataFrame(list(
        itertools.chain.from_iterable(SCALAR_BLOCKS)),
                                columns=SCALAR_COLUMN_NAMES)

    pandas.testing.assert_frame_equal(
        got.reset_index(drop=True),  # reset_index to ignore row labels
        expected.reset_index(drop=True),
    )
def test_to_dataframe_empty_w_scalars_arrow(class_under_test):
    arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS)
    read_session = _generate_arrow_read_session(arrow_schema)
    arrow_batches = _bq_to_arrow_batches([], arrow_schema)
    reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {})

    got = reader.to_dataframe(read_session)

    expected = pandas.DataFrame([], columns=SCALAR_COLUMN_NAMES)
    expected["int_col"] = expected["int_col"].astype("int64")
    expected["float_col"] = expected["float_col"].astype("float64")
    expected["bool_col"] = expected["bool_col"].astype("bool")
    expected["ts_col"] = expected["ts_col"].astype("datetime64[ns, UTC]")

    pandas.testing.assert_frame_equal(
        got.reset_index(drop=True),  # reset_index to ignore row labels
        expected.reset_index(drop=True),
    )
Пример #8
0
def test_to_dataframe_empty_w_scalars_avro(class_under_test):
    avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS)
    read_session = _generate_avro_read_session(avro_schema)
    avro_blocks = _bq_to_avro_blocks([], avro_schema)
    reader = class_under_test(avro_blocks, mock_gapic_client, "", 0, {})

    # Read session is needed to get a schema for empty streams.
    got = reader.to_dataframe(read_session)

    expected = pandas.DataFrame(columns=SCALAR_COLUMN_NAMES)
    expected["int_col"] = expected["int_col"].astype("int64")
    expected["float_col"] = expected["float_col"].astype("float64")
    expected["bool_col"] = expected["bool_col"].astype("bool")
    expected["ts_col"] = (
        expected["ts_col"].astype("datetime64[ns]").dt.tz_localize("UTC"))

    pandas.testing.assert_frame_equal(
        got.reset_index(drop=True),  # reset_index to ignore row labels
        expected.reset_index(drop=True),
    )