Exemplo n.º 1
0
def test_check_sentinels(data_raw):
    expander = DataFrameETL(
        cols_to_expand=['pid', 'djinn_type', 'animal', 'fruits'])
    # fill in necessary parameters
    expander._nan_sentinel = 'effrit'
    expander.levels_ = {}
    expander._cols_to_drop = expander.cols_to_drop
    expander._cols_to_expand = expander.cols_to_expand
    expander._check_sentinels(data_raw)
    assert expander._nan_sentinel != 'effrit'
    assert not (data_raw[['pid', 'djinn_type', 'animal']]
                == expander._nan_sentinel).any().any()

    # The NaN sentinel can't be in the "fruits" column because
    # "fruits" is numeric and the sentinel is not.
    assert np.issubdtype(data_raw['fruits'].dtype, np.number)
    assert not np.issubdtype(type(expander._nan_sentinel), np.number)
def test_check_sentinels(data_raw):
    expander = DataFrameETL(cols_to_expand=['pid', 'djinn_type',
                                            'animal', 'fruits'])
    # fill in necessary parameters
    expander._nan_string = 'effrit'
    expander._nan_numeric = 1.0
    expander._is_numeric = {}
    expander.levels_ = {}
    expander._cols_to_drop = expander.cols_to_drop
    expander._cols_to_expand = expander.cols_to_expand
    for col in expander.cols_to_expand:
        expander._is_numeric[col] = expander._flag_numeric(
            pd.unique(data_raw[col]))
    expander._check_sentinels(data_raw)
    assert expander._nan_string is not 'effrit'
    assert expander._nan_numeric is not 1.0
    assert not (data_raw[['pid', 'djinn_type', 'animal']] ==
                expander._nan_string).any().any()
    assert not (data_raw['fruits'] == expander._nan_numeric).any().any()