示例#1
0
 def common_test_cast_regressor(self, dtype, input_type):
     model = CastRegressor(DecisionTreeRegressor(max_depth=2), dtype=dtype)
     data = numpy.array(
         [[0.1, 0.2, 3.1], [1, 1, 0], [0, 2, 1], [1, 0, 2], [0.1, 2.1, 1.1],
          [1.1, 0.1, 2.2], [-0.1, -2.1, -1.1], [-1.1, -0.1, -2.2],
          [0.2, 2.2, 1.2], [1.2, 0.2, 2.2]],
         dtype=numpy.float32)
     y = (numpy.sum(data, axis=1, keepdims=0) +
          numpy.random.randn(data.shape[0]))
     model.fit(data, y)
     pred = model
     assert pred.dtype == dtype
     model_onnx = convert_sklearn(model,
                                  "cast",
                                  [("input", FloatTensorType([None, 3]))],
                                  target_opset=TARGET_OPSET)
     self.assertTrue(model_onnx is not None)
     dump_data_and_model(data,
                         model,
                         model_onnx,
                         basename="SklearnCastRegressor{}".format(
                             input_type.__class__.__name__))
示例#2
0
    def test_pipeline(self):
        def maxdiff(a1, a2):
            d = numpy.abs(a1.ravel() - a2.ravel())
            return d.max()

        X, y = make_regression(10000, 10, random_state=3)
        X_train, X_test, y_train, _ = train_test_split(X, y, random_state=3)
        Xi_train, yi_train = X_train.copy(), y_train.copy()
        Xi_test = X_test.copy()
        for i in range(X.shape[1]):
            Xi_train[:,
                     i] = (Xi_train[:, i] * math.pi * 2**i).astype(numpy.int64)
            Xi_test[:,
                    i] = (Xi_test[:, i] * math.pi * 2**i).astype(numpy.int64)
        max_depth = 10
        Xi_test = Xi_test.astype(numpy.float32)

        # model 1
        model1 = Pipeline([('scaler', StandardScaler()),
                           ('dt', DecisionTreeRegressor(max_depth=max_depth))])
        model1.fit(Xi_train, yi_train)
        exp1 = model1.predict(Xi_test)
        onx1 = to_onnx(model1,
                       X_train[:1].astype(numpy.float32),
                       target_opset=TARGET_OPSET)
        sess1 = InferenceSession(onx1.SerializeToString())
        got1 = sess1.run(None, {'X': Xi_test})[0]
        md1 = maxdiff(exp1, got1)

        # model 2
        model2 = Pipeline([
            ('cast64', CastTransformer(dtype=numpy.float64)),
            ('scaler', StandardScaler()), ('cast', CastTransformer()),
            ('dt',
             CastRegressor(DecisionTreeRegressor(max_depth=max_depth),
                           dtype=numpy.float32))
        ])
        model2.fit(Xi_train, yi_train)
        exp2 = model2.predict(Xi_test)
        onx = to_onnx(model2,
                      X_train[:1].astype(numpy.float32),
                      options={StandardScaler: {
                          'div': 'div_cast'
                      }},
                      target_opset=TARGET_OPSET)
        sess2 = InferenceSession(onx.SerializeToString())
        got2 = sess2.run(None, {'X': Xi_test})[0]
        md2 = maxdiff(exp2, got2)
        assert md2 <= md1
        assert md2 <= 0.0
############################################
# Perfect, no discrepencies at all.

print(diff(skl5, ort5))

##############################################
# CastRegressor
# +++++++++++++
#
# The previous example demonstrated the type difference for
# the predicted values explains the small differences between
# :epkg:`scikit-learn` and :epkg:`onnxruntime`. But it does not
# with the current ONNX. Another option is to cast the
# the predictions into floats in the :epkg:`scikit-learn` pipeline.

ctree = CastRegressor(DecisionTreeRegressor(max_depth=max_depth))
ctree.fit(Xi_train, yi_train)

onx6 = to_onnx(ctree, Xi_train[:1].astype(numpy.float32))

sess6 = InferenceSession(onx6.SerializeToString(),
                         providers=['CPUExecutionProvider'])

skl6 = ctree.predict(X32)
ort6 = sess6.run(None, {'X': X32})[0]

print(diff(skl6, ort6))

##############################
# Success!