def test_encode_nans(self, X, expected, columns): enc = LabelEncoder(cols=['cat']) result = enc.fit_transform(pd.DataFrame(X, columns=['cat'])) assert_array_equal(result, pd.DataFrame(expected)) ok_('cat' in enc._mapping) ok_(isinstance(enc._mapping['cat'], pd.DataFrame)) eq_(enc._mapping['cat'].index[1], -99999) assert_array_equal(enc._mapping['cat'].index, pd.Series(columns)) assert_array_equal(enc._mapping['cat'].columns, ['value'])
def test_encode_multiple_cols(self, X, expected): enc = LabelEncoder(cols=['cat1', 'cat2']) result = enc.fit_transform(pd.DataFrame(X, columns=['cat1', 'cat2'])) assert_array_equal(result, pd.DataFrame(expected)) ok_('cat1' in enc._mapping) ok_('cat2' in enc._mapping) ok_(isinstance(enc._mapping['cat1'], pd.DataFrame)) ok_(isinstance(enc._mapping['cat2'], pd.DataFrame)) assert_array_equal(enc._mapping['cat1'].index, ['a', 'b']) assert_array_equal(enc._mapping['cat2'].index, ['foo', 'bar']) assert_array_equal(enc._mapping['cat1'].columns, ['value']) assert_array_equal(enc._mapping['cat2'].columns, ['value'])
def test_encode_all(self, X, expected): # all columns are encoded if no cols arg passed enc = LabelEncoder() result = enc.fit_transform(pd.DataFrame(X, columns=['cat1', 'cat2'])) assert_array_equal(result, pd.DataFrame(expected)) assert_array_equal(enc.cols, ['cat1', 'cat2']) ok_('cat1' in enc._mapping) ok_('cat2' in enc._mapping) ok_(isinstance(enc._mapping['cat1'], pd.DataFrame)) ok_(isinstance(enc._mapping['cat2'], pd.DataFrame)) assert_array_equal(enc._mapping['cat1'].index, ['a', 'b']) assert_array_equal(enc._mapping['cat2'].index, ['foo', 'bar']) assert_array_equal(enc._mapping['cat1'].columns, ['value']) assert_array_equal(enc._mapping['cat2'].columns, ['value'])