def test_get_batch_with_split_on_whole_table(test_df):
    split_df = PandasExecutionEngine().get_batch_data(
        RuntimeDataBatchSpec(batch_data=test_df,
                             splitter_method="_split_on_whole_table"))
    assert split_df.dataframe.shape == (120, 10)
def test_sample_using_random(test_df):
    random.seed(1)
    sampled_df = PandasExecutionEngine().get_batch_data(
        RuntimeDataBatchSpec(batch_data=test_df,
                             sampling_method="_sample_using_random"))
    assert sampled_df.dataframe.shape == (13, 10)
def test_get_batch_data(test_sparkdf, basic_spark_df_execution_engine):
    test_sparkdf = basic_spark_df_execution_engine.get_batch_data(
        RuntimeDataBatchSpec(batch_data=test_sparkdf, data_asset_name="DATA_ASSET")
    ).dataframe
    assert test_sparkdf.count() == 120
    assert len(test_sparkdf.columns) == 10