def test_woe_transformer_conv_weights_onnx_noonehot(self): x = numpy.array( [[0.2, 0.7, 0.9], [0.51, 0.71, 0.91], [0.7, 1.5, 0.92]], dtype=numpy.float32) woe = WOETransformer(intervals=[[(0.4, 0.6, False, True)], [(0.9, numpy.inf), (-numpy.inf, 0.9)]], weights=[[2.7], [3.5, 6.7]], onehot=False) woe.fit(x) expected = woe.transform(x) manual = numpy.array( [[0., 6.7, 0.9], [2.7, 6.7, 0.91], [0., 3.5, 0.92]], dtype=numpy.float32) assert_almost_equal(manual, expected) with self.subTest(way='skl2onnx'): onnx_model = to_onnx(woe, x, target_opset=TARGET_OPSET, verbose=0) sess = InferenceSession(onnx_model.SerializeToString()) got = sess.run(None, {'X': x})[0] assert_almost_equal(expected, got) with self.subTest(way='onnx'): onnx_model = woe_transformer_to_onnx(woe, TARGET_OPSET) try: sess = InferenceSession(onnx_model.SerializeToString()) except InvalidArgument as e: raise AssertionError("Cannot load model:\n%s" % str(onnx_model)) from e got = sess.run(None, {'X': x})[0] assert_almost_equal(expected, got)
def test_woe_transformer_bigger(self): x = numpy.array([[0, 1, 2, 3, 4, 5, 6, -1]], dtype=numpy.float32).T intervals = [[(0.0, 1.0, False, True), (1.0, 2.0, False, True), (2.0, 3.0, False, True), (3.0, 4.0, False, True)]] weights = [[ -1.4057124469769924, -1.7241661780955269, 2.545531271604435, 0.9614111671546247 ]] woe = WOETransformer(intervals=intervals, weights=weights, onehot=False) woe.fit(x) expected = woe.transform(x) onnx_model = to_onnx(woe, x, target_opset=TARGET_OPSET) try: sess = InferenceSession(onnx_model.SerializeToString()) except InvalidArgument as e: raise AssertionError("Cannot load model:\n%s" % str(onnx_model)) from e got = sess.run(None, {'X': x})[0] assert_almost_equal(expected, got) onnx_model = woe_transformer_to_onnx(woe, TARGET_OPSET) try: sess = InferenceSession(onnx_model.SerializeToString()) except InvalidArgument as e: raise AssertionError("Cannot load model:\n%s" % str(onnx_model)) from e got = sess.run(None, {'X': x})[0] assert_almost_equal(expected, got)
def fit(self, X, y, sample_weight=None): self.encoder_ = OrdinalEncoder().fit(X) tr = self.encoder_.transform(X) maxi = (tr.max(axis=1) + 1).astype(np.int64) intervals = [[(i - 1, i, False, True) for i in range(0, m)] for m in maxi] weights = [[10 * j + i for i in range(len(inter))] for j, inter in enumerate(intervals)] self.woe_ = WOETransformer(intervals, onehot=False, weights=weights) self.woe_.fit(tr) return self
def test_woe_transformer_conv_weights(self): x = numpy.array( [[0.2, 0.7, 0.9], [0.51, 0.71, 0.91], [0.7, 1.5, 0.92]], dtype=numpy.float32) woe = WOETransformer(intervals=[[(0.4, 0.6, False, True)], [(0.9, numpy.inf), (-numpy.inf, 0.9)]], weights=[[2.7], [3.5, 6.7]]) woe.fit(x) expected = woe.transform(x) onnx_model = to_onnx(woe, x, target_opset=TARGET_OPSET) sess = InferenceSession(onnx_model.SerializeToString()) got = sess.run(None, {'X': x})[0] assert_almost_equal(expected, got)
def test_woe_transformer_conv_ext(self): x = numpy.array([[0.4, 1.4, 2.4, 3.4], [0.5, 1.5, 2.5, 3.5], [0.6, 1.6, 2.6, 3.6], [0.7, 1.7, 2.7, 3.7]], dtype=numpy.float32) woe = WOETransformer( intervals=[[(0.4, 0.6, False, False)], [(1.4, 1.6, False, True)], [(2.4, 2.6, True, False)], [(3.4, 3.6, True, True)]]) woe.fit(x) expected = woe.transform(x) onnx_model = to_onnx(woe, x, target_opset=TARGET_OPSET) sess = InferenceSession(onnx_model.SerializeToString()) got = sess.run(None, {'X': x})[0] assert_almost_equal(expected, got)
def test_woe_transformer_conv_ext2(self): for inca, incb in [(False, False), (True, True), (False, True), (True, False)]: with self.subTest(inca=inca, incb=incb): x = numpy.array([[0.45], [0.5], [0.55]], dtype=numpy.float32) woe = WOETransformer(intervals=[[(0.4, 0.5, False, inca), (0.5, 0.6, incb, False)]]) woe.fit(x) expected = woe.transform(x) onnx_model = to_onnx(woe, x, target_opset=TARGET_OPSET, verbose=0) sess = InferenceSession(onnx_model.SerializeToString()) got = sess.run(None, {'X': x})[0] assert_almost_equal(expected, got)
def test_woe_transformer_conv_weights_onnx(self): x = numpy.array( [[0.2, 0.7, 0.9], [0.51, 0.71, 0.91], [0.7, 1.5, 0.92]], dtype=numpy.float32) woe = WOETransformer(intervals=[[(0.4, 0.6, False, True)], [(0.9, numpy.inf), (-numpy.inf, 0.9)]], weights=[[2.7], [3.5, 6.7]]) woe.fit(x) expected = woe.transform(x) onnx_model = woe_transformer_to_onnx(woe, TARGET_OPSET) try: sess = InferenceSession(onnx_model.SerializeToString()) except InvalidArgument as e: raise AssertionError("Cannot load model:\n%s" % str(onnx_model)) from e got = sess.run(None, {'X': x})[0] assert_almost_equal(expected, got)
class OrdinalWOETransformer(BaseEstimator, TransformerMixin): def __init__(self): TransformerMixin.__init__(self) BaseEstimator.__init__(self) def fit(self, X, y, sample_weight=None): self.encoder_ = OrdinalEncoder().fit(X) tr = self.encoder_.transform(X) maxi = (tr.max(axis=1) + 1).astype(np.int64) intervals = [[(i - 1, i, False, True) for i in range(0, m)] for m in maxi] weights = [[10 * j + i for i in range(len(inter))] for j, inter in enumerate(intervals)] self.woe_ = WOETransformer(intervals, onehot=False, weights=weights) self.woe_.fit(tr) return self def transform(self, X): tr = self.encoder_.transform(X) return self.woe_.transform(tr)
def test_woe_transformer(self): x = numpy.array( [[0.5, 0.7, 0.9], [0.51, 0.71, 0.91], [0.7, 0.5, 0.92]], dtype=numpy.float32) woe = WOETransformer( intervals=[[(0.5, 0.7, False, False), ( 0.5, 0.7, True, False), (0.5, 0.7, False, True), (0.5, 0.7, True, True)], [(0.9, numpy.inf), (-numpy.inf, 0.9)]]) woe.fit(x) self.assertEqual(woe.indices_, [(0, 4), (4, 6), (6, 7)]) self.assertEqual(woe.n_dims_, 7) self.assertEqual(woe.intervals_, [[(0.5, 0.7, False, False), (0.5, 0.7, True, False), (0.5, 0.7, False, True), (0.5, 0.7, True, True)], [(0.9, numpy.inf, False, True), (-numpy.inf, 0.9, False, True)], None]) self.assertEqual(woe.weights_, [[1, 1, 1, 1], [1, 1], None]) names = woe.get_feature_names() self.assertEqual(names, [ ']0.5,0.7[', '[0.5,0.7[', ']0.5,0.7]', '[0.5,0.7]', ']0.9,inf]', ']-inf,0.9]', 'X2' ]) x2 = woe.transform(x) expected = numpy.array( [[0, 1, 0, 1, 0, 1, 0.9], [1, 1, 1, 1, 0, 1, 0.91], [0, 0, 1, 1, 0, 1, 0.92]], dtype=numpy.float32) assert_almost_equal(expected, x2)
def woeenc_to_sklearn(op_mapping): "Converts WOEEncoder mapping to scikit-learn OrdinalEncoder." cats = [] ws = [] for column_map in op_mapping.items(): col = column_map[0] while len(cats) <= col: cats.append('passthrough') ws.append(None) mapping = column_map[1] intervals = [] weights = [] for i in range(mapping.shape[0]): ind = mapping.index[i] if ind < 0: continue intervals.append((float(ind - 1), float(ind), False, True)) weights.append(mapping.iloc[i]) cats[col] = intervals ws[col] = weights skl = WOETransformer(intervals=cats, weights=ws, onehot=False) skl.fit(None) return skl
from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer from onnxruntime import InferenceSession import matplotlib.pyplot as plt from skl2onnx import to_onnx from skl2onnx.sklapi import WOETransformer # automatically registers the converter for WOETransformer import skl2onnx.sklapi.register # noqa X = numpy.arange(10).astype(numpy.float32).reshape((-1, 1)) intervals = [ [(1., 3., False, False), (5., 7., True, True)]] weights = [[55, 107]] woe1 = WOETransformer(intervals, onehot=False, weights=weights) woe1.fit(X) prd = woe1.transform(X) df = pd.DataFrame({'X': X.ravel(), 'woe': prd.ravel()}) df ###################################### # One Hot # +++++++ # # The transformer outputs one column with the weights. # But it could return one column per interval. woe2 = WOETransformer(intervals, onehot=True, weights=weights) woe2.fit(X) prd = woe2.transform(X)