def test_categorical_mixed_type_levels(): # Use a category with both strings and integers in its categories raw = pd.concat([ pd.Series([1.0, np.NaN, 3.0], dtype='float', name='fruits'), pd.Series([500, np.NaN, 'cat'], dtype='category', name='mixed'), ], axis=1) expander = DataFrameETL(cols_to_expand='auto', dummy_na=False) tfm = expander.fit_transform(raw) exp = np.array([[1, 1, 0], [np.nan, 0, 0], [3, 0, 1]]) assert_almost_equal(tfm, exp) assert expander.columns_ == ['fruits', 'mixed_500', 'mixed_cat']
def test_categorical_looks_like_int(): # Verify that the right thing happens if the fit DataFrame has a # categorical with integer categories raw = pd.concat([ pd.Series([1.0, np.NaN, 3.0], dtype='float', name='fruits'), pd.Series([500, 1000, 1000], dtype='category', name='intcat'), ], axis=1) expander = DataFrameETL(cols_to_expand='auto', dummy_na='expanded') tfm = expander.fit_transform(raw) exp = np.array([[1, 1, 0, 0], [np.nan, 0, 1, 0], [3, 0, 1, 0]]) assert_almost_equal(tfm, exp) assert expander.columns_ == \ ['fruits', 'intcat_500', 'intcat_1000', 'intcat_NaN']
def test_pickle(data_raw): expander = DataFrameETL(cols_to_drop=['pid'], cols_to_expand=['djinn_type', 'fruits', 'animal'], dummy_na='all') expected_array = expander.fit_transform(data_raw) # pickle the transformer buff = io.BytesIO() pickle.dump(expander, buff) buff.seek(0) # transform data after unpickling transformer expander = pickle.load(buff) arr = expander.transform(data_raw) assert arr.shape == expected_array.shape assert_almost_equal(arr, expected_array)