def test_transform_unseen(self, X, y, handle_unseen, expected):
     enc = TargetEncoder(cols=['cat'], handle_unseen=handle_unseen)
     X = pd.DataFrame(X, columns=['cat'])
     enc.fit(X, pd.Series(y))
     X.iloc[0, 0] = 'foo'
     result = enc.transform(X)
     assert_array_almost_equal(result, pd.DataFrame(expected), decimal=3)
示例#2
0
 def target_encoding(cls, X, Y=None, encoder=None):
     cols = ['some_id', 'other_id']
     if encoder is None:
         encoder = TargetEncoder(cols=cols, min_samples=5, smoothing=5)
         encoder.fit(X, Y)
     encoded = encoder.transform(X).rename(
         columns={c: 'tgt_enc_{}'.format(c)
                  for c in cols})
     return pd.concat([X[cols], encoded], axis=1), encoder
 def test_encode_nans(self, X, y, expected, columns):
     enc = TargetEncoder(cols=['cat'])
     result = enc.fit_transform(pd.DataFrame(X, columns=['cat']), pd.Series(y))
     assert_array_almost_equal(result, pd.DataFrame(expected), decimal=3)
     ok_('cat' in enc._mapping)
     ok_(isinstance(enc._mapping['cat'], pd.DataFrame))
     eq_(enc._mapping['cat'].index[0], -99999)
     assert_array_equal(enc._mapping['cat'].index[1:], columns)
     assert_array_equal(enc._mapping['cat'].columns, ['mean', 'count', 'value'])
 def test_encode_col(self, X, y, min_samples, smoothing, expected, imputed, columns):
     enc = TargetEncoder(cols=['cat'], min_samples=min_samples, smoothing=smoothing)
     result = enc.fit_transform(pd.DataFrame(X, columns=['cat']), pd.Series(y))
     assert_array_almost_equal(result, pd.DataFrame(expected), decimal=3)
     eq_(enc._imputed, imputed)
     ok_('cat' in enc._mapping)
     ok_(isinstance(enc._mapping['cat'], pd.DataFrame))
     assert_array_equal(enc._mapping['cat'].index, columns)
     assert_array_equal(enc._mapping['cat'].columns, ['mean', 'count', 'value'])
 def test_encode_multiple_cols(self, X, y, expected):
     enc = TargetEncoder(cols=['cat1', 'cat2'])
     result = enc.fit_transform(pd.DataFrame(X, columns=['cat1', 'cat2']), pd.Series(y))
     assert_array_almost_equal(result, pd.DataFrame(expected), decimal=2)
     ok_('cat1' in enc._mapping)
     ok_('cat2' in enc._mapping)
     ok_(isinstance(enc._mapping['cat1'], pd.DataFrame))
     ok_(isinstance(enc._mapping['cat2'], pd.DataFrame))
     assert_array_equal(enc._mapping['cat1'].index, ['a', 'b'])
     assert_array_equal(enc._mapping['cat2'].index, ['bar', 'foo'])
     assert_array_equal(enc._mapping['cat1'].columns, ['mean', 'count', 'value'])
     assert_array_equal(enc._mapping['cat2'].columns, ['mean', 'count', 'value'])
 def test_init(self, kwargs, cols, handle_unseen, min_samples, smoothing):
     enc = TargetEncoder(**kwargs)
     eq_(enc.cols, cols)
     eq_(enc.handle_unseen, handle_unseen)
     eq_(enc.min_samples, min_samples)
     eq_(enc.smoothing, smoothing)
     eq_(enc._imputed, None)
     eq_(enc._mapping, {})
 def test_transform_before_fit(self):
     enc = TargetEncoder()
     assert_raises(ValueError, enc.transform, 1)
 def test_transform_error(self, X, y, expected):
     enc = TargetEncoder(cols=['cat'], handle_unseen='error')
     X = pd.DataFrame(X, columns=['cat'])
     enc.fit(X, pd.Series(y))
     X.iloc[0, 0] = 'foo'
     assert_raises(ValueError, enc.transform, X)