示例#1
0
def test_with_original_granularity_multiple_aggregations(sample_df):
    step = AggregateStep(
        name='aggregate',
        keepOriginalGranularity=True,
        on=['Group'],
        aggregations=[
            Aggregation(aggfunction='min',
                        columns=['Value1'],
                        newcolumns=['min_Value1']),
            Aggregation(aggfunction='max',
                        columns=['Value2'],
                        newcolumns=['max_Value2']),
        ],
    )
    df_result = execute_aggregate(step, sample_df)

    assert_dataframes_equals(
        df_result,
        DataFrame({
            'Label':
            ['Label 1', 'Label 2', 'Label 3', 'Label 4', 'Label 5', 'Label 6'],
            'Group': ['Group 1'] * 3 + ['Group 2'] * 3,
            'Value1': [13, 7, 20, 1, 10, 5],
            'min_Value1': [7] * 3 + [1] * 3,
            'Value2': [10, 21, 4, 17, 12, 2],
            'max_Value2': [21] * 3 + [17] * 3,
        }),
    )
示例#2
0
def test_count_with_null():
    df = DataFrame({
        'Label':
        ['Label 1', 'Label 2', 'Label 3', 'Label 4', 'Label 5', 'Label 6'],
        'Group': ['Group 1'] * 3 + [None] * 3,  # type: ignore
        'Value1': [13, 7, 20, 1, 10, 5],
    })
    step = AggregateStep(
        name='aggregate',
        on=['Group'],
        aggregations=[
            Aggregation(
                aggfunction='count distinct including empty',
                columns=['Group'],
                newcolumns=['__VQB_COUNT'],
            ),
        ],
    )
    df_result = execute_aggregate(step, df)

    assert_dataframes_equals(
        df_result.sort_values(by=['Group']),
        DataFrame({
            'Group': ['Group 1', np.nan],
            '__VQB_COUNT': [3, 3],
        }).sort_values(by=['Group']),
    )
示例#3
0
def test_simple_aggregate(sample_df):
    step = AggregateStep(
        name='aggregate',
        on=['Group'],
        aggregations=[
            Aggregation(
                aggfunction='sum',
                columns=['Value1', 'Value2'],
                newcolumns=['Sum-Value1', 'Sum-Value2'],
            ),
            Aggregation(aggfunction='avg',
                        columns=['Value1'],
                        newcolumns=['Avg-Value1']),
        ],
    )
    df_result = execute_aggregate(step, sample_df)

    assert_dataframes_equals(
        df_result.sort_values(by=['Group']),
        DataFrame({
            'Group': ['Group 1', 'Group 2'],
            'Sum-Value1': [40, 16],
            'Sum-Value2': [35, 31],
            'Avg-Value1': [np.average([13, 7, 20]),
                           np.average([1, 10, 5])],
        }).sort_values(by=['Group']),
    )
示例#4
0
def test_simple_aggregate_with_null():
    df = DataFrame({
        'Label':
        ['Label 1', 'Label 2', 'Label 3', 'Label 4', 'Label 5', 'Label 6'],
        'Group': ['Group 1'] * 3 + [None] * 3,  # type: ignore
        'Value1': [13, 7, 20, 1, 10, 5],
    })
    step = AggregateStep(
        name='aggregate',
        on=['Group'],
        aggregations=[
            Aggregation(
                aggfunction='sum',
                columns=['Value1'],
                newcolumns=['Sum-Value1'],
            ),
        ],
    )
    df_result = execute_aggregate(step, df)

    assert_dataframes_equals(
        df_result.sort_values(by=['Group']),
        DataFrame({
            'Group': ['Group 1', None],
            'Sum-Value1': [40, 16],
        }).sort_values(by=['Group']),
    )
示例#5
0
def test_duplicate_aggregation_columns():
    df = DataFrame({
        'Label': ['Label 1', 'Label 2', 'Label 3'],
        'Group': ['Group 1'] * 3,  # type: ignore
        'Value1': [13, 7, 20],
    })
    with pytest.raises(DuplicateColumnError):
        step = AggregateStep(
            name='aggregate',
            on=['Group'],
            aggregations=[
                Aggregation(
                    aggfunction='count distinct including empty',
                    columns=['Group', 'Group'],
                    newcolumns=['__VQB_COUNT'],
                ),
            ],
        )
        execute_aggregate(step, df)
示例#6
0
def test_keep_original_granularity_empty_on(sample_df):
    step = AggregateStep(
        name='aggregate',
        on=[],
        keepOriginalGranularity=True,
        aggregations=[
            Aggregation(aggfunction='count',
                        columns=['Group'],
                        newcolumns=['__vqb_count__']),
        ],
    )
    df_result = execute_aggregate(step, sample_df)

    assert_dataframes_equals(df_result, sample_df.assign(__vqb_count__=6))
示例#7
0
def test_count_distinct(sample_df):
    step = AggregateStep(
        name='aggregate',
        keepOriginalGranularity=False,
        on=[],
        aggregations=[
            Aggregation(
                aggfunction='count distinct',
                columns=['Group'],
                newcolumns=['Group_CD'],
            )
        ],
    )
    df_result = execute_aggregate(step, sample_df)

    assert_dataframes_equals(df_result, DataFrame({'Group_CD': [2]}))
示例#8
0
def test_without_on(sample_df):
    step = AggregateStep(
        name='aggregate',
        keepOriginalGranularity=False,
        on=[],
        aggregations=[
            Aggregation(aggfunction='sum',
                        columns=['Value1'],
                        newcolumns=['sum_value']),
        ],
    )
    df_result = execute_aggregate(step, sample_df)

    assert_dataframes_equals(
        df_result,
        DataFrame({'sum_value': [56]}),
    )
示例#9
0
def test_count(sample_df):
    step = AggregateStep(
        name='aggregate',
        keepOriginalGranularity=False,
        on=['Group'],
        aggregations=[
            Aggregation(aggfunction='count',
                        columns=['Label'],
                        newcolumns=['count']),
        ],
    )
    df_result = execute_aggregate(step, sample_df)

    assert_dataframes_equals(
        df_result, DataFrame({
            'Group': ['Group 1', 'Group 2'],
            'count': [3, 3]
        }))
示例#10
0
def test_legacy_syntax(sample_df):
    step = AggregateStep(
        name='aggregate',
        keepOriginalGranularity=False,
        on=[],
        aggregations=[
            Aggregation(**{
                'aggfunction': 'sum',
                'column': 'Value1',
                'newcolumn': 'sum_value'
            }),
        ],
    )
    df_result = execute_aggregate(step, sample_df)

    assert_dataframes_equals(
        df_result,
        DataFrame({'sum_value': [56]}),
    )