def test_to_dataframe_no_schema_set_raises_type_error(mut, class_under_test, mock_client, monkeypatch): reader = class_under_test([], mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {}) read_session = bigquery_storage_v1beta1.types.ReadSession() with pytest.raises(TypeError): reader.to_dataframe(read_session)
def test_to_dataframe_no_fastavro_raises_import_error( mut, class_under_test, mock_client, monkeypatch ): monkeypatch.setattr(mut, "fastavro", None) reader = class_under_test( [], mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {} ) read_session = bigquery_storage_v1beta1.types.ReadSession() with pytest.raises(ImportError): reader.to_dataframe(read_session)
def test_to_dataframe_no_fastavro_raises_import_error( mut, class_under_test, mock_client, monkeypatch ): monkeypatch.setattr(mut, "fastavro", None) reader = class_under_test( [], mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {} ) read_session = bigquery_storage_v1beta1.types.ReadSession() with pytest.raises(ImportError): reader.to_dataframe(read_session)
def test_to_dataframe_w_scalars(class_under_test): avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS) read_session = _generate_avro_read_session(avro_schema) avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema) reader = class_under_test(avro_blocks, mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {}) got = reader.to_dataframe(read_session) expected = pandas.DataFrame(list( itertools.chain.from_iterable(SCALAR_BLOCKS)), columns=SCALAR_COLUMN_NAMES) # fastavro provides its own UTC definition, so # compare the timestamp columns separately. got_ts = got["ts_col"] got = got.drop(columns=["ts_col"]) expected_ts = expected["ts_col"] expected = expected.drop(columns=["ts_col"]) pandas.testing.assert_frame_equal( got.reset_index(drop=True), # reset_index to ignore row labels expected.reset_index(drop=True), ) pandas.testing.assert_series_equal( got_ts.reset_index(drop=True), expected_ts.reset_index(drop=True), check_dtype=False, # fastavro's UTC means different dtype check_datetimelike_compat=True, )
def test_to_dataframe_w_dtypes(class_under_test): avro_schema = _bq_to_avro_schema( [ {"name": "bigfloat", "type": "float64"}, {"name": "lilfloat", "type": "float64"}, ] ) read_session = _generate_read_session(avro_schema) blocks = [ [{"bigfloat": 1.25, "lilfloat": 30.5}, {"bigfloat": 2.5, "lilfloat": 21.125}], [{"bigfloat": 3.75, "lilfloat": 11.0}], ] avro_blocks = _bq_to_avro_blocks(blocks, avro_schema) reader = class_under_test( avro_blocks, mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {} ) got = reader.to_dataframe(read_session, dtypes={"lilfloat": "float16"}) expected = pandas.DataFrame( { "bigfloat": [1.25, 2.5, 3.75], "lilfloat": pandas.Series([30.5, 21.125, 11.0], dtype="float16"), }, columns=["bigfloat", "lilfloat"], ) pandas.testing.assert_frame_equal( got.reset_index(drop=True), # reset_index to ignore row labels expected.reset_index(drop=True), )
def test_to_dataframe_w_scalars(class_under_test): avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS) read_session = _generate_read_session(avro_schema) avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema) reader = class_under_test( avro_blocks, mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {} ) got = reader.to_dataframe(read_session) expected = pandas.DataFrame( list(itertools.chain.from_iterable(SCALAR_BLOCKS)), columns=SCALAR_COLUMN_NAMES ) # fastavro provides its own UTC definition, so # compare the timestamp columns separately. got_ts = got["ts_col"] got = got.drop(columns=["ts_col"]) expected_ts = expected["ts_col"] expected = expected.drop(columns=["ts_col"]) pandas.testing.assert_frame_equal( got.reset_index(drop=True), # reset_index to ignore row labels expected.reset_index(drop=True), ) pandas.testing.assert_series_equal( got_ts.reset_index(drop=True), expected_ts.reset_index(drop=True), check_dtype=False, # fastavro's UTC means different dtype check_datetimelike_compat=True, )
def test_to_dataframe_w_dtypes_arrow(class_under_test): arrow_schema = _bq_to_arrow_schema( [ {"name": "bigfloat", "type": "float64"}, {"name": "lilfloat", "type": "float64"}, ] ) read_session = _generate_arrow_read_session(arrow_schema) blocks = [ [{"bigfloat": 1.25, "lilfloat": 30.5}, {"bigfloat": 2.5, "lilfloat": 21.125}], [{"bigfloat": 3.75, "lilfloat": 11.0}], ] arrow_batches = _bq_to_arrow_batches(blocks, arrow_schema) reader = class_under_test( arrow_batches, mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {} ) got = reader.to_dataframe(read_session, dtypes={"lilfloat": "float16"}) expected = pandas.DataFrame( { "bigfloat": [1.25, 2.5, 3.75], "lilfloat": pandas.Series([30.5, 21.125, 11.0], dtype="float16"), }, columns=["bigfloat", "lilfloat"], ) pandas.testing.assert_frame_equal( got.reset_index(drop=True), # reset_index to ignore row labels expected.reset_index(drop=True), )
def test_to_dataframe_no_pandas_raises_import_error(mut, class_under_test, mock_client, monkeypatch): monkeypatch.setattr(mut, "pandas", None) avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS) read_session = _generate_avro_read_session(avro_schema) avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema) reader = class_under_test(avro_blocks, mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {}) with pytest.raises(ImportError): reader.to_dataframe(read_session) with pytest.raises(ImportError): reader.rows(read_session).to_dataframe() with pytest.raises(ImportError): next(reader.rows(read_session).pages).to_dataframe()
def test_to_dataframe_no_pandas_raises_import_error( mut, class_under_test, mock_client, monkeypatch ): monkeypatch.setattr(mut, "pandas", None) avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS) read_session = _generate_read_session(avro_schema) avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema) reader = class_under_test( avro_blocks, mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {} ) with pytest.raises(ImportError): reader.to_dataframe(read_session) with pytest.raises(ImportError): reader.rows(read_session).to_dataframe() with pytest.raises(ImportError): next(reader.rows(read_session).pages).to_dataframe()
def test_to_dataframe_w_scalars_arrow(class_under_test): arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS) read_session = _generate_arrow_read_session(arrow_schema) arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema) reader = class_under_test(arrow_batches, mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {}) got = reader.to_dataframe(read_session) expected = pandas.DataFrame(list( itertools.chain.from_iterable(SCALAR_BLOCKS)), columns=SCALAR_COLUMN_NAMES) pandas.testing.assert_frame_equal( got.reset_index(drop=True), # reset_index to ignore row labels expected.reset_index(drop=True), )