def test_with_original_granularity_multiple_aggregations(sample_df): step = AggregateStep( name='aggregate', keepOriginalGranularity=True, on=['Group'], aggregations=[ Aggregation(aggfunction='min', columns=['Value1'], newcolumns=['min_Value1']), Aggregation(aggfunction='max', columns=['Value2'], newcolumns=['max_Value2']), ], ) df_result = execute_aggregate(step, sample_df) assert_dataframes_equals( df_result, DataFrame({ 'Label': ['Label 1', 'Label 2', 'Label 3', 'Label 4', 'Label 5', 'Label 6'], 'Group': ['Group 1'] * 3 + ['Group 2'] * 3, 'Value1': [13, 7, 20, 1, 10, 5], 'min_Value1': [7] * 3 + [1] * 3, 'Value2': [10, 21, 4, 17, 12, 2], 'max_Value2': [21] * 3 + [17] * 3, }), )
def test_count_with_null(): df = DataFrame({ 'Label': ['Label 1', 'Label 2', 'Label 3', 'Label 4', 'Label 5', 'Label 6'], 'Group': ['Group 1'] * 3 + [None] * 3, # type: ignore 'Value1': [13, 7, 20, 1, 10, 5], }) step = AggregateStep( name='aggregate', on=['Group'], aggregations=[ Aggregation( aggfunction='count distinct including empty', columns=['Group'], newcolumns=['__VQB_COUNT'], ), ], ) df_result = execute_aggregate(step, df) assert_dataframes_equals( df_result.sort_values(by=['Group']), DataFrame({ 'Group': ['Group 1', np.nan], '__VQB_COUNT': [3, 3], }).sort_values(by=['Group']), )
def test_simple_aggregate(sample_df): step = AggregateStep( name='aggregate', on=['Group'], aggregations=[ Aggregation( aggfunction='sum', columns=['Value1', 'Value2'], newcolumns=['Sum-Value1', 'Sum-Value2'], ), Aggregation(aggfunction='avg', columns=['Value1'], newcolumns=['Avg-Value1']), ], ) df_result = execute_aggregate(step, sample_df) assert_dataframes_equals( df_result.sort_values(by=['Group']), DataFrame({ 'Group': ['Group 1', 'Group 2'], 'Sum-Value1': [40, 16], 'Sum-Value2': [35, 31], 'Avg-Value1': [np.average([13, 7, 20]), np.average([1, 10, 5])], }).sort_values(by=['Group']), )
def test_simple_aggregate_with_null(): df = DataFrame({ 'Label': ['Label 1', 'Label 2', 'Label 3', 'Label 4', 'Label 5', 'Label 6'], 'Group': ['Group 1'] * 3 + [None] * 3, # type: ignore 'Value1': [13, 7, 20, 1, 10, 5], }) step = AggregateStep( name='aggregate', on=['Group'], aggregations=[ Aggregation( aggfunction='sum', columns=['Value1'], newcolumns=['Sum-Value1'], ), ], ) df_result = execute_aggregate(step, df) assert_dataframes_equals( df_result.sort_values(by=['Group']), DataFrame({ 'Group': ['Group 1', None], 'Sum-Value1': [40, 16], }).sort_values(by=['Group']), )
def test_duplicate_aggregation_columns(): df = DataFrame({ 'Label': ['Label 1', 'Label 2', 'Label 3'], 'Group': ['Group 1'] * 3, # type: ignore 'Value1': [13, 7, 20], }) with pytest.raises(DuplicateColumnError): step = AggregateStep( name='aggregate', on=['Group'], aggregations=[ Aggregation( aggfunction='count distinct including empty', columns=['Group', 'Group'], newcolumns=['__VQB_COUNT'], ), ], ) execute_aggregate(step, df)
def test_keep_original_granularity_empty_on(sample_df): step = AggregateStep( name='aggregate', on=[], keepOriginalGranularity=True, aggregations=[ Aggregation(aggfunction='count', columns=['Group'], newcolumns=['__vqb_count__']), ], ) df_result = execute_aggregate(step, sample_df) assert_dataframes_equals(df_result, sample_df.assign(__vqb_count__=6))
def test_count_distinct(sample_df): step = AggregateStep( name='aggregate', keepOriginalGranularity=False, on=[], aggregations=[ Aggregation( aggfunction='count distinct', columns=['Group'], newcolumns=['Group_CD'], ) ], ) df_result = execute_aggregate(step, sample_df) assert_dataframes_equals(df_result, DataFrame({'Group_CD': [2]}))
def test_without_on(sample_df): step = AggregateStep( name='aggregate', keepOriginalGranularity=False, on=[], aggregations=[ Aggregation(aggfunction='sum', columns=['Value1'], newcolumns=['sum_value']), ], ) df_result = execute_aggregate(step, sample_df) assert_dataframes_equals( df_result, DataFrame({'sum_value': [56]}), )
def test_count(sample_df): step = AggregateStep( name='aggregate', keepOriginalGranularity=False, on=['Group'], aggregations=[ Aggregation(aggfunction='count', columns=['Label'], newcolumns=['count']), ], ) df_result = execute_aggregate(step, sample_df) assert_dataframes_equals( df_result, DataFrame({ 'Group': ['Group 1', 'Group 2'], 'count': [3, 3] }))
def test_legacy_syntax(sample_df): step = AggregateStep( name='aggregate', keepOriginalGranularity=False, on=[], aggregations=[ Aggregation(**{ 'aggfunction': 'sum', 'column': 'Value1', 'newcolumn': 'sum_value' }), ], ) df_result = execute_aggregate(step, sample_df) assert_dataframes_equals( df_result, DataFrame({'sum_value': [56]}), )