示例#1
0
def test_rows_no_schema_set_raises_type_error(mut, class_under_test,
                                              mock_client, monkeypatch):
    reader = class_under_test([], mock_client,
                              bigquery_storage_v1beta1.types.StreamPosition(),
                              {})
    read_session = bigquery_storage_v1beta1.types.ReadSession()

    with pytest.raises(TypeError):
        reader.rows(read_session)
def test_rows_raises_import_error(mut, class_under_test, mock_client, monkeypatch):
    monkeypatch.setattr(mut, "fastavro", None)
    reader = class_under_test(
        [], mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {}
    )
    read_session = bigquery_storage_v1beta1.types.ReadSession()

    with pytest.raises(ImportError):
        reader.rows(read_session)
def test_rows_raises_import_error(mut, class_under_test, mock_client, monkeypatch):
    monkeypatch.setattr(mut, "fastavro", None)
    reader = class_under_test(
        [], mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {}
    )
    read_session = bigquery_storage_v1beta1.types.ReadSession()

    with pytest.raises(ImportError):
        reader.rows(read_session)
def test_avro_rows_raises_import_error(mut, class_under_test, mock_client, monkeypatch):
    monkeypatch.setattr(mut, "fastavro", None)
    reader = class_under_test(
        [], mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {}
    )

    bq_columns = [{"name": "int_col", "type": "int64"}]
    avro_schema = _bq_to_avro_schema(bq_columns)
    read_session = _generate_avro_read_session(avro_schema)

    with pytest.raises(ImportError):
        reader.rows(read_session)
def test_rows_w_timeout(class_under_test, mock_client):
    bq_columns = [{"name": "int_col", "type": "int64"}]
    avro_schema = _bq_to_avro_schema(bq_columns)
    read_session = _generate_avro_read_session(avro_schema)
    bq_blocks_1 = [
        [{"int_col": 123}, {"int_col": 234}],
        [{"int_col": 345}, {"int_col": 456}],
    ]
    avro_blocks_1 = _avro_blocks_w_deadline(
        _bq_to_avro_blocks(bq_blocks_1, avro_schema)
    )
    bq_blocks_2 = [[{"int_col": 567}, {"int_col": 789}], [{"int_col": 890}]]
    avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema)

    mock_client.read_rows.return_value = avro_blocks_2
    stream_position = bigquery_storage_v1beta1.types.StreamPosition(
        stream={"name": "test"}
    )

    reader = class_under_test(
        avro_blocks_1,
        mock_client,
        stream_position,
        {"metadata": {"test-key": "test-value"}},
    )

    with pytest.raises(google.api_core.exceptions.DeadlineExceeded):
        list(reader.rows(read_session))

    # Don't reconnect on DeadlineException. This allows user-specified timeouts
    # to be respected.
    mock_client.read_rows.assert_not_called()
def test_rows_w_timeout(class_under_test, mock_client):
    bq_columns = [{"name": "int_col", "type": "int64"}]
    avro_schema = _bq_to_avro_schema(bq_columns)
    read_session = _generate_read_session(avro_schema)
    bq_blocks_1 = [
        [{"int_col": 123}, {"int_col": 234}],
        [{"int_col": 345}, {"int_col": 456}],
    ]
    avro_blocks_1 = _avro_blocks_w_deadline(
        _bq_to_avro_blocks(bq_blocks_1, avro_schema)
    )
    bq_blocks_2 = [[{"int_col": 567}, {"int_col": 789}], [{"int_col": 890}]]
    avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema)

    mock_client.read_rows.return_value = avro_blocks_2
    stream_position = bigquery_storage_v1beta1.types.StreamPosition(
        stream={"name": "test"}
    )

    reader = class_under_test(
        avro_blocks_1,
        mock_client,
        stream_position,
        {"metadata": {"test-key": "test-value"}},
    )

    with pytest.raises(google.api_core.exceptions.DeadlineExceeded):
        list(reader.rows(read_session))

    # Don't reconnect on DeadlineException. This allows user-specified timeouts
    # to be respected.
    mock_client.read_rows.assert_not_called()
def test_rows_w_reconnect_by_page(class_under_test, mock_client):
    bq_columns = [{"name": "int_col", "type": "int64"}]
    avro_schema = _bq_to_avro_schema(bq_columns)
    read_session = _generate_read_session(avro_schema)
    bq_blocks_1 = [
        [{"int_col": 123}, {"int_col": 234}],
        [{"int_col": 345}, {"int_col": 456}],
    ]
    avro_blocks_1 = _bq_to_avro_blocks(bq_blocks_1, avro_schema)
    bq_blocks_2 = [[{"int_col": 567}, {"int_col": 789}], [{"int_col": 890}]]
    avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema)

    avro_blocks_1[0].status.estimated_row_count = 8
    avro_blocks_1[1].status.estimated_row_count = 6
    avro_blocks_2[0].status.estimated_row_count = 9
    avro_blocks_2[1].status.estimated_row_count = 7

    mock_client.read_rows.return_value = avro_blocks_2
    stream_position = bigquery_storage_v1beta1.types.StreamPosition(
        stream={"name": "test"}
    )

    reader = class_under_test(
        _avro_blocks_w_unavailable(avro_blocks_1),
        mock_client,
        stream_position,
        {"metadata": {"test-key": "test-value"}},
    )
    got = reader.rows(read_session)
    pages = iter(got.pages)

    assert got.total_rows is None

    page_1 = next(pages)
    assert got.total_rows == 8
    assert page_1.num_items == 2
    assert page_1.remaining == 2
    assert tuple(page_1) == tuple(bq_blocks_1[0])
    assert page_1.num_items == 2
    assert page_1.remaining == 0

    page_2 = next(pages)
    assert got.total_rows == 6
    assert next(page_2) == bq_blocks_1[1][0]
    assert page_2.num_items == 2
    assert page_2.remaining == 1
    assert next(page_2) == bq_blocks_1[1][1]

    page_3 = next(pages)
    assert tuple(page_3) == tuple(bq_blocks_2[0])
    assert page_3.num_items == 2
    assert page_3.remaining == 0
    assert got.total_rows == 9

    page_4 = next(pages)
    assert got.total_rows == 7
    assert tuple(page_4) == tuple(bq_blocks_2[1])
    assert page_4.num_items == 1
    assert page_4.remaining == 0
def test_rows_w_reconnect_by_page(class_under_test, mock_client):
    bq_columns = [{"name": "int_col", "type": "int64"}]
    avro_schema = _bq_to_avro_schema(bq_columns)
    read_session = _generate_avro_read_session(avro_schema)
    bq_blocks_1 = [
        [{"int_col": 123}, {"int_col": 234}],
        [{"int_col": 345}, {"int_col": 456}],
    ]
    avro_blocks_1 = _bq_to_avro_blocks(bq_blocks_1, avro_schema)
    bq_blocks_2 = [[{"int_col": 567}, {"int_col": 789}], [{"int_col": 890}]]
    avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema)

    avro_blocks_1[0].status.estimated_row_count = 8
    avro_blocks_1[1].status.estimated_row_count = 6
    avro_blocks_2[0].status.estimated_row_count = 9
    avro_blocks_2[1].status.estimated_row_count = 7

    mock_client.read_rows.return_value = avro_blocks_2
    stream_position = bigquery_storage_v1beta1.types.StreamPosition(
        stream={"name": "test"}
    )

    reader = class_under_test(
        _pages_w_unavailable(avro_blocks_1),
        mock_client,
        stream_position,
        {"metadata": {"test-key": "test-value"}},
    )
    got = reader.rows(read_session)
    pages = iter(got.pages)

    assert got.total_rows is None

    page_1 = next(pages)
    assert got.total_rows == 8
    assert page_1.num_items == 2
    assert page_1.remaining == 2
    assert tuple(page_1) == tuple(bq_blocks_1[0])
    assert page_1.num_items == 2
    assert page_1.remaining == 0

    page_2 = next(pages)
    assert got.total_rows == 6
    assert next(page_2) == bq_blocks_1[1][0]
    assert page_2.num_items == 2
    assert page_2.remaining == 1
    assert next(page_2) == bq_blocks_1[1][1]

    page_3 = next(pages)
    assert tuple(page_3) == tuple(bq_blocks_2[0])
    assert page_3.num_items == 2
    assert page_3.remaining == 0
    assert got.total_rows == 9

    page_4 = next(pages)
    assert got.total_rows == 7
    assert tuple(page_4) == tuple(bq_blocks_2[1])
    assert page_4.num_items == 1
    assert page_4.remaining == 0
示例#9
0
def test_to_arrow_no_pyarrow_raises_import_error(mut, class_under_test,
                                                 mock_client, monkeypatch):
    monkeypatch.setattr(mut, "pyarrow", None)
    arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS)
    read_session = _generate_arrow_read_session(arrow_schema)
    arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema)
    reader = class_under_test(arrow_batches, mock_client,
                              bigquery_storage_v1beta1.types.StreamPosition(),
                              {})

    with pytest.raises(ImportError):
        reader.to_arrow(read_session)

    with pytest.raises(ImportError):
        reader.rows(read_session).to_arrow()

    with pytest.raises(ImportError):
        next(reader.rows(read_session).pages).to_arrow()
示例#10
0
def test_to_dataframe_no_pandas_raises_import_error(mut, class_under_test,
                                                    mock_client, monkeypatch):
    monkeypatch.setattr(mut, "pandas", None)
    avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS)
    read_session = _generate_read_session(avro_schema)
    avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema)

    reader = class_under_test(avro_blocks, mock_client,
                              bigquery_storage_v1beta1.types.StreamPosition(),
                              {})

    with pytest.raises(ImportError):
        reader.to_dataframe(read_session)

    with pytest.raises(ImportError):
        reader.rows(read_session).to_dataframe()

    with pytest.raises(ImportError):
        next(reader.rows(read_session).pages).to_dataframe()
示例#11
0
def test_rows_w_empty_stream(class_under_test, mock_client):
    bq_columns = [{"name": "int_col", "type": "int64"}]
    avro_schema = _bq_to_avro_schema(bq_columns)
    read_session = _generate_read_session(avro_schema)
    reader = class_under_test(
        [], mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {}
    )

    got = tuple(reader.rows(read_session))
    assert got == ()
示例#12
0
def test_rows_w_empty_stream_arrow(class_under_test, mock_client):
    bq_columns = [{"name": "int_col", "type": "int64"}]
    arrow_schema = _bq_to_arrow_schema(bq_columns)
    read_session = _generate_arrow_read_session(arrow_schema)
    reader = class_under_test([], mock_client,
                              bigquery_storage_v1beta1.types.StreamPosition(),
                              {})

    got = reader.rows(read_session)
    assert got.total_rows is None
    assert tuple(got) == ()
def test_to_dataframe_no_pandas_raises_import_error(
    mut, class_under_test, mock_client, monkeypatch
):
    monkeypatch.setattr(mut, "pandas", None)
    avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS)
    read_session = _generate_read_session(avro_schema)
    avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema)

    reader = class_under_test(
        avro_blocks, mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {}
    )

    with pytest.raises(ImportError):
        reader.to_dataframe(read_session)

    with pytest.raises(ImportError):
        reader.rows(read_session).to_dataframe()

    with pytest.raises(ImportError):
        next(reader.rows(read_session).pages).to_dataframe()
def test_rows_w_empty_stream(class_under_test, mock_client):
    bq_columns = [{"name": "int_col", "type": "int64"}]
    avro_schema = _bq_to_avro_schema(bq_columns)
    read_session = _generate_read_session(avro_schema)
    reader = class_under_test(
        [], mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {}
    )

    got = reader.rows(read_session)
    assert got.total_rows is None
    assert tuple(got) == ()
示例#15
0
def test_rows_w_scalars_arrow(class_under_test, mock_client):
    arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS)
    read_session = _generate_arrow_read_session(arrow_schema)
    arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema)

    reader = class_under_test(arrow_batches, mock_client,
                              bigquery_storage_v1beta1.types.StreamPosition(),
                              {})
    got = tuple(reader.rows(read_session))

    expected = tuple(itertools.chain.from_iterable(SCALAR_BLOCKS))
    assert got == expected
def test_rows_w_scalars(class_under_test, mock_client):
    avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS)
    read_session = _generate_read_session(avro_schema)
    avro_blocks = _bq_to_avro_blocks(SCALAR_BLOCKS, avro_schema)

    reader = class_under_test(
        avro_blocks, mock_client, bigquery_storage_v1beta1.types.StreamPosition(), {}
    )
    got = tuple(reader.rows(read_session))

    expected = tuple(itertools.chain.from_iterable(SCALAR_BLOCKS))
    assert got == expected
示例#17
0
def test_rows_w_reconnect(class_under_test, mock_client):
    bq_columns = [{"name": "int_col", "type": "int64"}]
    avro_schema = _bq_to_avro_schema(bq_columns)
    read_session = _generate_read_session(avro_schema)
    bq_blocks_1 = [
        [{
            "int_col": 123
        }, {
            "int_col": 234
        }],
        [{
            "int_col": 345
        }, {
            "int_col": 456
        }],
    ]
    avro_blocks_1 = _avro_blocks_w_unavailable(
        _bq_to_avro_blocks(bq_blocks_1, avro_schema))
    bq_blocks_2 = [[{"int_col": 567}, {"int_col": 789}], [{"int_col": 890}]]
    avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema)

    for block in avro_blocks_2:
        block.status.estimated_row_count = 7

    mock_client.read_rows.return_value = avro_blocks_2
    stream_position = bigquery_storage_v1beta1.types.StreamPosition(
        stream={"name": "test"})

    reader = class_under_test(
        avro_blocks_1,
        mock_client,
        stream_position,
        {"metadata": {
            "test-key": "test-value"
        }},
    )
    got = reader.rows(read_session)

    expected = tuple(
        itertools.chain(
            itertools.chain.from_iterable(bq_blocks_1),
            itertools.chain.from_iterable(bq_blocks_2),
        ))

    assert tuple(got) == expected
    assert got.total_rows == 7
    mock_client.read_rows.assert_called_once_with(
        bigquery_storage_v1beta1.types.StreamPosition(stream={"name": "test"},
                                                      offset=4),
        metadata={"test-key": "test-value"},
    )
def test_rows_w_reconnect(class_under_test, mock_client):
    bq_columns = [{"name": "int_col", "type": "int64"}]
    avro_schema = _bq_to_avro_schema(bq_columns)
    read_session = _generate_read_session(avro_schema)
    bq_blocks_1 = [
        [{"int_col": 123}, {"int_col": 234}],
        [{"int_col": 345}, {"int_col": 456}],
    ]
    avro_blocks_1 = _avro_blocks_w_unavailable(
        _bq_to_avro_blocks(bq_blocks_1, avro_schema)
    )
    bq_blocks_2 = [[{"int_col": 567}, {"int_col": 789}], [{"int_col": 890}]]
    avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema)

    for block in avro_blocks_2:
        block.status.estimated_row_count = 7

    mock_client.read_rows.return_value = avro_blocks_2
    stream_position = bigquery_storage_v1beta1.types.StreamPosition(
        stream={"name": "test"}
    )

    reader = class_under_test(
        avro_blocks_1,
        mock_client,
        stream_position,
        {"metadata": {"test-key": "test-value"}},
    )
    got = reader.rows(read_session)

    expected = tuple(
        itertools.chain(
            itertools.chain.from_iterable(bq_blocks_1),
            itertools.chain.from_iterable(bq_blocks_2),
        )
    )

    assert tuple(got) == expected
    assert got.total_rows == 7
    mock_client.read_rows.assert_called_once_with(
        bigquery_storage_v1beta1.types.StreamPosition(
            stream={"name": "test"}, offset=4
        ),
        metadata={"test-key": "test-value"},
    )
示例#19
0
def test_rows_w_nonresumable_internal_error(class_under_test, mock_client):
    bq_columns = [{"name": "int_col", "type": "int64"}]
    avro_schema = _bq_to_avro_schema(bq_columns)
    read_session = _generate_avro_read_session(avro_schema)
    bq_blocks = [[{"int_col": 1024}, {"int_col": 512}], [{"int_col": 256}]]
    avro_blocks = _pages_w_nonresumable_internal_error(
        _bq_to_avro_blocks(bq_blocks, avro_schema))

    stream_position = bigquery_storage_v1beta1.types.StreamPosition(
        stream={"name": "test"})

    reader = class_under_test(avro_blocks, mock_client, stream_position, {})

    with pytest.raises(google.api_core.exceptions.InternalServerError,
                       match="nonresumable error"):
        list(reader.rows(read_session))

    mock_client.read_rows.assert_not_called()
示例#20
0
def test_to_dataframe_by_page_arrow(class_under_test, mock_client):
    bq_columns = [
        {
            "name": "int_col",
            "type": "int64"
        },
        {
            "name": "bool_col",
            "type": "bool"
        },
    ]
    arrow_schema = _bq_to_arrow_schema(bq_columns)
    read_session = _generate_arrow_read_session(arrow_schema)

    bq_block_1 = [
        {
            "int_col": 123,
            "bool_col": True
        },
        {
            "int_col": 234,
            "bool_col": False
        },
    ]
    bq_block_2 = [
        {
            "int_col": 345,
            "bool_col": True
        },
        {
            "int_col": 456,
            "bool_col": False
        },
    ]
    bq_block_3 = [
        {
            "int_col": 567,
            "bool_col": True
        },
        {
            "int_col": 789,
            "bool_col": False
        },
    ]
    bq_block_4 = [{"int_col": 890, "bool_col": True}]
    # Break blocks into two groups to test that iteration continues across
    # reconnection.
    bq_blocks_1 = [bq_block_1, bq_block_2]
    bq_blocks_2 = [bq_block_3, bq_block_4]
    batch_1 = _bq_to_arrow_batches(bq_blocks_1, arrow_schema)
    batch_2 = _bq_to_arrow_batches(bq_blocks_2, arrow_schema)

    mock_client.read_rows.return_value = batch_2

    reader = class_under_test(
        _pages_w_unavailable(batch_1),
        mock_client,
        bigquery_storage_v1beta1.types.StreamPosition(),
        {},
    )
    got = reader.rows(read_session)
    pages = iter(got.pages)

    page_1 = next(pages)
    pandas.testing.assert_frame_equal(
        page_1.to_dataframe(dtypes={
            "int_col": "int64",
            "bool_col": "bool"
        }).reset_index(drop=True),
        pandas.DataFrame(bq_block_1,
                         columns=["int_col",
                                  "bool_col"]).reset_index(drop=True),
    )

    page_2 = next(pages)
    pandas.testing.assert_frame_equal(
        page_2.to_dataframe().reset_index(drop=True),
        pandas.DataFrame(bq_block_2,
                         columns=["int_col",
                                  "bool_col"]).reset_index(drop=True),
    )

    page_3 = next(pages)
    pandas.testing.assert_frame_equal(
        page_3.to_dataframe().reset_index(drop=True),
        pandas.DataFrame(bq_block_3,
                         columns=["int_col",
                                  "bool_col"]).reset_index(drop=True),
    )

    page_4 = next(pages)
    pandas.testing.assert_frame_equal(
        page_4.to_dataframe().reset_index(drop=True),
        pandas.DataFrame(bq_block_4,
                         columns=["int_col",
                                  "bool_col"]).reset_index(drop=True),
    )
示例#21
0
def test_to_dataframe_by_page(class_under_test, mock_client):
    bq_columns = [
        {"name": "int_col", "type": "int64"},
        {"name": "bool_col", "type": "bool"},
    ]
    avro_schema = _bq_to_avro_schema(bq_columns)
    read_session = _generate_read_session(avro_schema)
    block_1 = [{"int_col": 123, "bool_col": True}, {"int_col": 234, "bool_col": False}]
    block_2 = [{"int_col": 345, "bool_col": True}, {"int_col": 456, "bool_col": False}]
    block_3 = [{"int_col": 567, "bool_col": True}, {"int_col": 789, "bool_col": False}]
    block_4 = [{"int_col": 890, "bool_col": True}]
    # Break blocks into two groups to test that iteration continues across
    # reconnection.
    bq_blocks_1 = [block_1, block_2]
    bq_blocks_2 = [block_3, block_4]
    avro_blocks_1 = _bq_to_avro_blocks(bq_blocks_1, avro_schema)
    avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema)

    mock_client.read_rows.return_value = avro_blocks_2
    stream_position = bigquery_storage_v1beta1.types.StreamPosition(
        stream={"name": "test"}
    )

    reader = class_under_test(
        _avro_blocks_w_deadline(avro_blocks_1),
        mock_client,
        stream_position,
        {"metadata": {"test-key": "test-value"}},
    )
    got = reader.rows(read_session)
    pages = iter(got.pages)

    page_1 = next(pages)
    pandas.testing.assert_frame_equal(
        page_1.to_dataframe().reset_index(drop=True),
        pandas.DataFrame(block_1, columns=["int_col", "bool_col"]).reset_index(
            drop=True
        ),
    )

    page_2 = next(pages)
    pandas.testing.assert_frame_equal(
        page_2.to_dataframe().reset_index(drop=True),
        pandas.DataFrame(block_2, columns=["int_col", "bool_col"]).reset_index(
            drop=True
        ),
    )

    page_3 = next(pages)
    pandas.testing.assert_frame_equal(
        page_3.to_dataframe().reset_index(drop=True),
        pandas.DataFrame(block_3, columns=["int_col", "bool_col"]).reset_index(
            drop=True
        ),
    )

    page_4 = next(pages)
    pandas.testing.assert_frame_equal(
        page_4.to_dataframe().reset_index(drop=True),
        pandas.DataFrame(block_4, columns=["int_col", "bool_col"]).reset_index(
            drop=True
        ),
    )
def test_to_dataframe_by_page(class_under_test, mock_client):
    bq_columns = [
        {"name": "int_col", "type": "int64"},
        {"name": "bool_col", "type": "bool"},
    ]
    avro_schema = _bq_to_avro_schema(bq_columns)
    read_session = _generate_read_session(avro_schema)
    block_1 = [{"int_col": 123, "bool_col": True}, {"int_col": 234, "bool_col": False}]
    block_2 = [{"int_col": 345, "bool_col": True}, {"int_col": 456, "bool_col": False}]
    block_3 = [{"int_col": 567, "bool_col": True}, {"int_col": 789, "bool_col": False}]
    block_4 = [{"int_col": 890, "bool_col": True}]
    # Break blocks into two groups to test that iteration continues across
    # reconnection.
    bq_blocks_1 = [block_1, block_2]
    bq_blocks_2 = [block_3, block_4]
    avro_blocks_1 = _bq_to_avro_blocks(bq_blocks_1, avro_schema)
    avro_blocks_2 = _bq_to_avro_blocks(bq_blocks_2, avro_schema)

    mock_client.read_rows.return_value = avro_blocks_2
    stream_position = bigquery_storage_v1beta1.types.StreamPosition(
        stream={"name": "test"}
    )

    reader = class_under_test(
        _avro_blocks_w_unavailable(avro_blocks_1),
        mock_client,
        stream_position,
        {"metadata": {"test-key": "test-value"}},
    )
    got = reader.rows(read_session)
    pages = iter(got.pages)

    page_1 = next(pages)
    pandas.testing.assert_frame_equal(
        page_1.to_dataframe().reset_index(drop=True),
        pandas.DataFrame(block_1, columns=["int_col", "bool_col"]).reset_index(
            drop=True
        ),
    )

    page_2 = next(pages)
    pandas.testing.assert_frame_equal(
        page_2.to_dataframe().reset_index(drop=True),
        pandas.DataFrame(block_2, columns=["int_col", "bool_col"]).reset_index(
            drop=True
        ),
    )

    page_3 = next(pages)
    pandas.testing.assert_frame_equal(
        page_3.to_dataframe().reset_index(drop=True),
        pandas.DataFrame(block_3, columns=["int_col", "bool_col"]).reset_index(
            drop=True
        ),
    )

    page_4 = next(pages)
    pandas.testing.assert_frame_equal(
        page_4.to_dataframe().reset_index(drop=True),
        pandas.DataFrame(block_4, columns=["int_col", "bool_col"]).reset_index(
            drop=True
        ),
    )