Пример #1
0
def test_create_col_names_numeric(data_raw):
    expander = DataFrameETL(cols_to_expand=['pid', 'fruits'],
                            cols_to_drop=['djinn_type', 'animal'],
                            dummy_na='expanded')
    expander._nan_sentinel = NAN_STRING
    expander._cols_to_drop = expander.cols_to_drop
    expander._cols_to_expand = expander.cols_to_expand
    expander._dummy_na = 'expanded'
    expander.levels_ = expander._create_levels(data_raw)
    expander._unexpanded_nans = expander._flag_unexpanded_nans(data_raw)
    (cnames, unexpanded) = expander._create_col_names(data_raw)
    cols_numeric = [
        'pid_a', 'pid_b', 'pid_c', 'pid_NaN', 'fruits_1.0', 'fruits_3.0',
        'fruits_NaN', 'age'
    ]
    assert cnames == cols_numeric
    assert unexpanded == ['pid', 'fruits', 'age']
Пример #2
0
def test_create_col_names_no_dummy(data_raw):
    expander = DataFrameETL(cols_to_expand=['pid', 'djinn_type', 'animal'],
                            cols_to_drop=['fruits'],
                            dummy_na=False)
    expander._nan_sentinel = NAN_STRING
    expander._cols_to_drop = expander.cols_to_drop
    expander._cols_to_expand = expander.cols_to_expand
    expander._dummy_na = False
    expander.levels_ = expander._create_levels(data_raw)
    expander._unexpanded_nans = expander._flag_unexpanded_nans(data_raw)
    (cnames, unexpanded) = expander._create_col_names(data_raw)
    cols_expected = [
        'pid_a', 'pid_b', 'pid_c', 'djinn_type_effrit', 'djinn_type_marid',
        'djinn_type_sila', 'age', 'animal_cat', 'animal_dog'
    ]
    assert cnames == cols_expected
    assert unexpanded == ['pid', 'djinn_type', 'age', 'animal']