Exemplo n.º 1
0
 def test_transform_unseen(self, X, handle_unseen, expected):
     enc = LabelEncoder(cols=['cat'], handle_unseen=handle_unseen)
     X = pd.DataFrame(X, columns=['cat'])
     enc.fit(X)
     X.iloc[0, 0] = 'foo'
     result = enc.transform(X)
     assert_array_equal(result, pd.DataFrame(expected))
 def test_encode_nans(self, X, expected, columns):
     enc = LabelEncoder(cols=['cat'])
     result = enc.fit_transform(pd.DataFrame(X, columns=['cat']))
     assert_array_equal(result, pd.DataFrame(expected))
     ok_('cat' in enc._mapping)
     ok_(isinstance(enc._mapping['cat'], pd.DataFrame))
     eq_(enc._mapping['cat'].index[1], -99999)
     assert_array_equal(enc._mapping['cat'].index, pd.Series(columns))
     assert_array_equal(enc._mapping['cat'].columns, ['value'])
 def test_encode_multiple_cols(self, X, expected):
     enc = LabelEncoder(cols=['cat1', 'cat2'])
     result = enc.fit_transform(pd.DataFrame(X, columns=['cat1', 'cat2']))
     assert_array_equal(result, pd.DataFrame(expected))
     ok_('cat1' in enc._mapping)
     ok_('cat2' in enc._mapping)
     ok_(isinstance(enc._mapping['cat1'], pd.DataFrame))
     ok_(isinstance(enc._mapping['cat2'], pd.DataFrame))
     assert_array_equal(enc._mapping['cat1'].index, ['a', 'b'])
     assert_array_equal(enc._mapping['cat2'].index, ['foo', 'bar'])
     assert_array_equal(enc._mapping['cat1'].columns, ['value'])
     assert_array_equal(enc._mapping['cat2'].columns, ['value'])
 def test_encode_all(self, X, expected):
     # all columns are encoded if no cols arg passed
     enc = LabelEncoder()
     result = enc.fit_transform(pd.DataFrame(X, columns=['cat1', 'cat2']))
     assert_array_equal(result, pd.DataFrame(expected))
     assert_array_equal(enc.cols, ['cat1', 'cat2'])
     ok_('cat1' in enc._mapping)
     ok_('cat2' in enc._mapping)
     ok_(isinstance(enc._mapping['cat1'], pd.DataFrame))
     ok_(isinstance(enc._mapping['cat2'], pd.DataFrame))
     assert_array_equal(enc._mapping['cat1'].index, ['a', 'b'])
     assert_array_equal(enc._mapping['cat2'].index, ['foo', 'bar'])
     assert_array_equal(enc._mapping['cat1'].columns, ['value'])
     assert_array_equal(enc._mapping['cat2'].columns, ['value'])
 def test_transform_error(self, X, expected):
     enc = LabelEncoder(cols=['cat'], handle_unseen='error')
     X = pd.DataFrame(X, columns=['cat'])
     enc.fit(X)
     X.iloc[0, 0] = 'foo'
     assert_raises(ValueError, enc.transform, X)
 def test_transform_before_fit(self):
     enc = LabelEncoder()
     assert_raises(ValueError, enc.transform, 1)
 def test_init(self, kwargs, cols, handle_unseen):
     enc = LabelEncoder(**kwargs)
     eq_(enc.cols, cols)
     eq_(enc.handle_unseen, handle_unseen)
     eq_(enc._imputed, -99999)
     eq_(enc._mapping, {})