Пример #1
0
def test_expand_col_no_dummy(data_raw):
    expander = DataFrameETL(cols_to_drop=['fruits'],
                            dummy_na=None,
                            fill_value=-1.0)
    expander.fit(data_raw)
    arr = expander._expand_col(data_raw, 'pid')
    arr_exp = np.array([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]])
    assert_almost_equal(arr, arr_exp)
    arr = expander._expand_col(data_raw, 'animal')
    arr_exp = np.array([[1., 0.], [0., 1.], [-1., -1.]])
    assert_almost_equal(arr, arr_exp)
Пример #2
0
def test_expand_col(data_raw):
    expander = DataFrameETL(cols_to_drop=['fruits'],
                            dummy_na='expanded',
                            fill_value=-1.0)
    expander.fit(data_raw)
    # should expand even if there are no NaNs
    arr = expander._expand_col(data_raw, 'pid')
    arr_exp = np.array([[1., 0., 0., 0.], [0., 1., 0., 0.], [0., 0., 1., 0.]])
    assert_almost_equal(arr, arr_exp)
    arr = expander._expand_col(data_raw, 'animal')
    arr_exp = np.array([[1., 0., 0.], [0., 1., 0.], [-1., -1., 1.]])
    assert_almost_equal(arr, arr_exp)
Пример #3
0
def test_expand_col_few_levels_no_dummy(data_few_levels, few_levels_expected):
    expander = DataFrameETL(cols_to_expand=['pid', 'fruits', 'animal'],
                            dummy_na=False,
                            fill_value=99.)
    expander.fit(data_few_levels)
    arr = expander._expand_col(data_few_levels, 'pid')
    expected_array = np.asarray(few_levels_expected[['pid_NaN', 'pid_a']])
    assert_almost_equal(arr, expected_array)

    arr = expander._expand_col(data_few_levels, 'fruits')
    expected_array = np.asarray(few_levels_expected[['fruits_1.0']])
    assert_almost_equal(arr, expected_array)

    arr = expander._expand_col(data_few_levels, 'animal')
    expected_array = np.asarray(few_levels_expected[['animal_cat']])
    assert_almost_equal(arr, expected_array)
Пример #4
0
def test_expand_col_numeric_no_dummy(data_raw):
    expander = DataFrameETL(cols_to_drop=['pid', 'djinn_type', 'animal'],
                            cols_to_expand=['fruits'],
                            dummy_na=False,
                            fill_value=np.nan)
    expander.fit(data_raw)
    arr = expander._expand_col(data_raw, 'fruits')
    arr_exp = np.array([[1., 0.], [np.nan, np.nan], [0., 1.]])
    assert_almost_equal(arr, arr_exp)