def test_transform_unseen(self, X, y, handle_unseen, expected): enc = WeightOfEvidenceEncoder(cols=['cat'], handle_unseen=handle_unseen) X = pd.DataFrame(X, columns=['cat']) enc.fit(X, pd.Series(y)) X.iloc[0, 0] = 'foo' result = enc.transform(X) assert_array_almost_equal(result, pd.DataFrame(expected), decimal=3)
def woe_encoding(cls, X, Y=None, encoder=None): cols = ['some_id', 'other_id'] if encoder is None: encoder = WeightOfEvidenceEncoder(cols=cols, min_samples=5) encoder.fit(X, Y) encoded = encoder.transform(X).rename( columns={c: 'woe_enc_{}'.format(c) for c in cols}) return pd.concat([X[cols], encoded], axis=1), encoder
def test_transform_error(self, X, y, expected): enc = WeightOfEvidenceEncoder(cols=['cat'], handle_unseen='error') X = pd.DataFrame(X, columns=['cat']) enc.fit(X, pd.Series(y)) X.iloc[0, 0] = 'foo' assert_raises(ValueError, enc.transform, X)