Пример #1
0
def test_join_match_case_param_success():
    """
    Match case converts a column to lower then, it adds a _lower to the column
    name and finally it drops the column. (Seems redundant...)
    """
    df1 = util.titanic(['name', 'embarked'], size=10)
    df2 = util.titanic(['homedest', 'name'], size=10)
    test_df = util.titanic(['name', 'embarked', 'homedest'], size=10)

    arguments = {
        'parameters': {
            'keep_right_keys': False,
            'match_case': False,
            'join_type': 'inner',
            'left_attributes': ['name'],
            'right_attributes': ['name']
        },
        'named_inputs': {
            'input data 1': 'df1',
            'input data 2': 'df2'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = JoinOperation(**arguments)
    result = util.execute(instance.generate_code(), {'df1': df1, 'df2': df2})

    test_df.columns = ['name_l', 'embarked_l', 'homedest_r']
    assert result['out'].equals(test_df)
Пример #2
0
def test_join_krk_param_success():
    df1 = util.titanic(['name', 'homedest'], size=10)
    df2 = util.titanic(['embarked', 'name'], size=10)
    test_df = util.titanic(['name', 'homedest', 'embarked', 'name'], size=10)

    arguments = {
        'parameters': {
            'keep_right_keys': 1,
            'match_case': '1',
            'join_type': 'inner',
            'left_attributes': ['name'],
            'right_attributes': ['name']
        },
        'named_inputs': {
            'input data 1': 'df1',
            'input data 2': 'df2'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = JoinOperation(**arguments)
    result = util.execute(instance.generate_code(), {'df1': df1, 'df2': df2})
    test_df.columns = ['name_l', 'homedest_l', 'embarked_r', 'name_r']
    assert result['out'].equals(test_df)
Пример #3
0
def test_join_outer_replace_success():
    """
    This only happens when you pass '_outer'
    """
    df1 = util.titanic(['name', 'homedest'], size=10)
    df2 = util.titanic(['embarked', 'name'], size=10)

    arguments = {
        'parameters': {
            'keep_right_keys': False,
            'match_case': True,
            'join_type': '_outer',
            'left_attributes': ['homedest'],
            'right_attributes': ['embarked']
        },
        'named_inputs': {
            'input data 1': 'df1',
            'input data 2': 'df2'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = JoinOperation(**arguments)
    with pytest.raises(KeyError) as key_err:
        util.execute(instance.generate_code(), {'df1': df1, 'df2': df2})
    assert '' in str(key_err.value)
Пример #4
0
def test_join_custom_suffixes_success():
    df1 = util.titanic(['name', 'homedest'], size=10)
    df2 = util.titanic(['embarked', 'name'], size=10)
    test_df = util.titanic(['name', 'homedest', 'embarked'], size=10)

    arguments = {
        'parameters': {
            'keep_right_keys': False,
            'match_case': True,
            'join_type': 'inner',
            'left_attributes': ['name'],
            'right_attributes': ['name'],
            'aliases': '_esquerdo,_direito'
        },
        'named_inputs': {
            'input data 1': 'df1',
            'input data 2': 'df2'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = JoinOperation(**arguments)
    result = util.execute(instance.generate_code(), {'df1': df1, 'df2': df2})
    test_df.columns = [
        'name_esquerdo', 'homedest_esquerdo', 'embarked_direito'
    ]
    assert result['out'].equals(test_df)
Пример #5
0
def test_aggregation_non_numeric_attributes_success():
    df = util.titanic(['homedest'], size=150)
    test_out = df.copy()

    arguments = {
        'parameters': {
            'attributes': ['homedest'],
            'function': return_funcs('homedest', drop='avg')
        },
        'named_inputs': {
            'input data': 'df'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = AggregationOperation(**arguments)
    result = util.execute(instance.generate_code(), {'df': df})

    test_out = test_out.groupby(['homedest']).agg(
        home_collect_list=('homedest', _collect_list),
        home_collect_set=('homedest', _collect_set),
        home_count=('homedest', 'count'),
        home_first=('homedest', 'first'),
        home_last=('homedest', 'last'),
        home_max=('homedest', 'max'),
        home_min=('homedest', 'min'),
        home_sum=('homedest', 'sum'),
        home_size=('homedest', 'size')).reset_index()
    assert result['out'].equals(test_out)
Пример #6
0
def test_join_merge_outer_parameter_success():
    """
    there's a line of code that replaces '_outer' to ''
    """
    df1 = util.titanic(['name', 'homedest'], size=10)
    df2 = util.titanic(['embarked', 'name'], size=10)
    test_df1 = df1.copy()
    test_df2 = df2.copy()

    arguments = {
        'parameters': {
            'keep_right_keys': False,
            'match_case': True,
            'join_type': 'outer',
            'left_attributes': ['homedest'],
            'right_attributes': ['embarked']
        },
        'named_inputs': {
            'input data 1': 'df1',
            'input data 2': 'df2'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = JoinOperation(**arguments)
    result = util.execute(instance.generate_code(), {'df1': df1, 'df2': df2})

    cols1 = [c + '_l' for c in test_df1.columns]
    cols2 = [c + '_r' for c in test_df2.columns]

    test_df1.columns = cols1
    test_df2.columns = cols2

    keys1 = [c + '_l' for c in ['homedest']]
    keys2 = [c + '_r' for c in ['embarked']]

    test_out = pd.merge(test_df1,
                        test_df2,
                        how='outer',
                        suffixes=['_l', '_r'],
                        left_on=keys1,
                        right_on=keys2)

    cols_to_remove = keys2
    test_out.drop(cols_to_remove, axis=1, inplace=True)
    assert result['out'].equals(test_out)
Пример #7
0
def test_join_invalid_right_attributes_param_fail():
    df1 = util.titanic(['name', 'homedest'], size=10)
    df2 = util.titanic(['embarked', 'name'], size=10)
    arguments = {
        'parameters': {
            'keep_right_keys': False,
            'match_case': False,
            'join_type': 'inner',
            'left_attributes': ['homedest'],
            'right_attributes': 'invalid'
        },
        'named_inputs': {
            'input data 1': 'df1',
            'input data 2': 'df2'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = JoinOperation(**arguments)
    with pytest.raises(NameError) as nam_err:
        util.execute(instance.generate_code(), {'df1': df1, 'df2': df2})
    assert "invalid" in str(nam_err.value)
Пример #8
0
def test_aggregation_non_numeric_attributes_fail():
    df = util.titanic(['homedest'], size=150)
    arguments = {
        'parameters': {
            'attributes': ['homedest'],
            'function': return_funcs('homedest')
        },
        'named_inputs': {
            'input data': 'df'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = AggregationOperation(**arguments)
    with pytest.raises(pd.core.base.DataError) as data_err:
        util.execute(instance.generate_code(), {'df': df})
    assert "No numeric types to aggregate" in str(data_err.value)