def onnxrt_python_RandomForestRegressor_dtype( self, dtype, n=37, full=False, use_hist=False, ntrees=10, runtime='python'): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split( X, y, random_state=11 if not full else 13) X_test = X_test.astype(dtype) if use_hist: if full: clr = HistGradientBoostingRegressor() else: clr = HistGradientBoostingRegressor( max_iter=ntrees, max_depth=4) else: if full: clr = RandomForestRegressor(n_jobs=1) else: clr = RandomForestRegressor( n_estimators=ntrees, n_jobs=1, max_depth=4) clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(dtype), rewrite_ops=True) oinf = OnnxInference(model_def) text = "\n".join(map(lambda x: str(x.ops_), oinf.sequence_)) self.assertIn("TreeEnsembleRegressor", text) if full: n = 34 X_test = X_test[n:n + 5] else: n = 37 X_test = X_test[n:n + 5] X_test = numpy.vstack([X_test, X_test[:1].copy() * 1.01, X_test[:1].copy() * 0.99]) y = oinf.run({'X': X_test}) self.assertEqual(list(sorted(y)), ['variable']) lexp = clr.predict(X_test) if dtype == numpy.float32: self.assertEqualArray(lexp, y['variable'], decimal=5) else: try: self.assertEqualArray(lexp, y['variable']) except AssertionError as e: raise AssertionError( "---------\n{}\n-----".format(model_def)) from e self.assertEqual(oinf.sequence_[0].ops_.rt_.same_mode_, True) self.assertNotEmpty(oinf.sequence_[0].ops_.rt_.nodes_modes_)
def test_lightgbm_booster_classifier(self): from lightgbm import Dataset, train as lgb_train X = numpy.array([[0, 1], [1, 1], [2, 0], [1, 2]], dtype=numpy.float32) y = [0, 1, 0, 1] data = Dataset(X, label=y) model = lgb_train({'boosting_type': 'rf', 'objective': 'binary', 'n_estimators': 3, 'min_child_samples': 1, 'subsample_freq': 1, 'bagging_fraction': 0.5, 'feature_fraction': 0.5}, data) model_onnx = to_onnx(model, X, verbose=0, rewrite_ops=True, target_opset=TARGET_OPSET) self.assertNotEmpty(model_onnx)
def test_onnxt_knnimputer(self): x_train = numpy.array([[1, 2, numpy.nan, 12], [3, numpy.nan, 3, 13], [1, 4, numpy.nan, 1], [numpy.nan, 4, 3, 12]], dtype=numpy.float32) x_test = numpy.array( [[1.3, 2.4, numpy.nan, 1], [-1.3, numpy.nan, 3.1, numpy.nan]], dtype=numpy.float32) kn = KNNImputer(n_neighbors=3, metric='nan_euclidean') kn.fit(x_train) model_def = to_onnx(kn, x_train) oinf = OnnxInference(model_def, runtime='python') got = oinf.run({'X': x_test}) self.assertEqual(list(sorted(got)), ['variable']) self.assertEqualArray(kn.transform(x_test), got['variable'], decimal=6)
def test_speedup_kmeans64_onnx(self): data = load_iris() X, y = data.data, data.target spd = OnnxSpeedupCluster(KMeans(n_clusters=3), target_opset=self.opset(), enforce_float32=False) spd.fit(X, y) expected_label = spd.predict(X) expected_score = spd.transform(X) onx = to_onnx(spd, X[:1]) oinf = OnnxInference(onx) got = oinf.run({'X': X}) self.assertEqualArray(expected_score, got['scores']) self.assertEqualArray(expected_label, got['label'])
def test_onnxt_iris_adaboost_regressor_lr(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, __ = train_test_split(X, y, random_state=11) clr = AdaBoostRegressor(base_estimator=LinearRegression(), n_estimators=3) clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) X_test = X_test.astype(numpy.float32) oinf = OnnxInference(model_def) res0 = clr.predict(X_test).astype(numpy.float32) res1 = oinf.run({'X': X_test}) self.assertEqualArray(res0, res1['variable'].ravel(), decimal=5)
def test_onnxt_lrreg_iris_run(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = LinearRegression() clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32), target_opset=get_opset_number_from_onnx()) oinf = OnnxInference(model_def) y = oinf.run({'X': X_test}) exp = clr.predict(X_test) self.assertEqual(list(sorted(y)), ['variable']) self.assertEqualArray(exp, y['variable'].ravel(), decimal=6)
def test_speedup_regressor64_onnx_numba(self): data = load_iris() X, y = data.data, data.target spd = OnnxSpeedupRegressor(LinearRegression(), target_opset=self.opset(), enforce_float32=False, runtime='numba') spd.fit(X, y) # print(spd.numpy_code_) expected = spd.predict(X) onx = to_onnx(spd, X[:1]) oinf = OnnxInference(onx) got = oinf.run({'X': X})['variable'] self.assertEqualArray(expected, got)
def test_onnxt_gpr_iris(self): iris = load_iris() X, y = iris.data, iris.target X_train, _, y_train, __ = train_test_split(X, y, random_state=11) clr = GaussianProcessRegressor(ExpSineSquared(), alpha=20.) clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train) oinf = OnnxInference(model_def) res1 = oinf.run({'X': X_train}) new_model = onnx_optimisations(model_def) oinf = OnnxInference(new_model) res2 = oinf.run({'X': X_train}) self.assertEqualArray(res1['GPmean'], res2['GPmean']) self.assertNotIn('op_type: "CDist"', str(new_model))
def test_lgbm_regressor10(self): from lightgbm import LGBMRegressor data = load_iris() X, y = data.data, data.target X = X.astype(numpy.float32) X_train, X_test, y_train, _ = train_test_split(X, y, random_state=0) reg = LGBMRegressor(max_depth=2, n_estimators=4, seed=0) reg.fit(X_train, y_train) expected = reg.predict(X_test) # float onx = to_onnx(reg, X_train, rewrite_ops=True) oinf = OnnxInference(onx) got1 = oinf.run({'X': X_test})['variable'] # float split onx = to_onnx(reg, X_train, options={'split': 2}, rewrite_ops=True, target_opset=TARGET_OPSET) oinf = OnnxInference(onx) got2 = oinf.run({'X': X_test})['variable'] # final check self.assertEqualArray(expected, got1, decimal=5) self.assertEqualArray(expected, got2, decimal=5)
def test_speedup_classifier64_onnx_numba_python(self): data = load_iris() X, y = data.data, data.target spd = OnnxSpeedupClassifier( LogisticRegression(), target_opset=self.opset(), enforce_float32=False, runtime='numba', nopython=False) spd.fit(X, y) # print(spd.numpy_code_) expected_label = spd.predict(X) expected_proba = spd.predict_proba(X) onx = to_onnx(spd, X[:1]) oinf = OnnxInference(onx) got = oinf.run({'X': X}) self.assertEqualArray(expected_proba, got['probabilities']) self.assertEqualArray(expected_label, got['label'])
def test_function_regressor3_float64(self): X = numpy.random.randn(20, 2).astype(numpy.float64) y = (X.sum(axis=1) + numpy.random.randn(X.shape[0]).astype(numpy.float64)) dec = CustomLinearRegressor3() dec.fit(X, y) exp = dec.predict(X) self.assertIsInstance(exp, numpy.ndarray) onx = to_onnx(dec, X.astype(numpy.float64)) oinf = OnnxInference(onx) got = oinf.run({'X': X}) self.assertEqualArray(exp, got['variable']) X2 = custom_linear_regressor_converter3(X, op_=dec) self.assertEqualArray(X2, got['variable'])
def test_function_regressor_onnx(self): X = numpy.random.randn(20, 2).astype(numpy.float64) y = (X.sum(axis=1) + numpy.random.randn(X.shape[0]).astype(numpy.float64)) dec = CustomLinearRegressorOnnx() dec.fit(X, y) exp1 = dec.predict(X) # pylint: disable=E1101 self.assertIsInstance(exp1, numpy.ndarray) onx = to_onnx(dec, X.astype(numpy.float64)) oinf = OnnxInference(onx) exp2 = dec.predict(X) # pylint: disable=E1101 self.assertIsInstance(exp2, numpy.ndarray) got = oinf.run({'X': X}) self.assertEqualArray(exp1, got['variable']) self.assertEqualArray(exp2, got['variable'])
def test_onnxrt_python_KMeans(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, __, _ = train_test_split(X, y, random_state=11) clr = KMeans() clr.fit(X_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def) got = oinf.run({'X': X_test.astype(numpy.float32)}) self.assertEqual(list(sorted(got)), ['label', 'scores']) exp = clr.predict(X_test) self.assertEqualArray(exp, got['label']) exp = clr.transform(X_test) self.assertEqualArray(exp, got['scores'], decimal=4)
def test_onnxrt_python_one_class_svm(self): X = numpy.array([[0, 1, 2], [44, 36, 18], [-4, -7, -5]], dtype=numpy.float32) with self.subTest(dtype='float64'): for kernel in ['linear', 'sigmoid', 'rbf', 'poly']: model = OneClassSVM(kernel=kernel).fit(X) X64 = X.astype(numpy.float64) model_onnx = to_onnx(model, X64) model.decision_function(X64) self.assertIn("SVMRegressorDouble", str(model_onnx)) oinf = OnnxInference(model_onnx, runtime='python') res = oinf.run({'X': X64}) scores = res['scores'] dec = model.decision_function(X64) self.assertEqualArray(scores, dec, decimal=5) # print("64", kernel + ("-" * (7 - len(kernel))), scores - dec, "skl", dec) with self.subTest(dtype='floa32'): for kernel in ['linear', 'sigmoid', 'rbf', 'poly']: model = OneClassSVM(kernel=kernel).fit(X) X32 = X.astype(numpy.float32) model_onnx = to_onnx(model, X32) oinf = OnnxInference(model_onnx, runtime='python') res = oinf.run({'X': X32}) scores = res['scores'] dec = model.decision_function(X32) self.assertEqualArray(scores, dec, decimal=4) # print("32", kernel + ("-" * (7 - len(kernel))), scores - dec, "skl", dec) model_onnx.ir_version = get_ir_version(TARGET_OPSET) oinf = OnnxInference(model_onnx, runtime='onnxruntime1') res = oinf.run({'X': X32}) scores = res['scores'] dec = model.decision_function(X32) self.assertEqualArray(scores.ravel(), dec.ravel(), decimal=4)
def common_test_function_classifier_embedded(self, dtype, est): X = numpy.random.randn(20, 2).astype(dtype) y = ((X.sum(axis=1) + numpy.random.randn( X.shape[0]).astype(numpy.float32)) >= 0).astype(numpy.int64) dec = AnyCustomClassifierOnnx(est) dec.fit(X, y) onx = to_onnx(dec, X.astype(dtype), options={id(dec): {'zipmap': False}}) oinf = OnnxInference(onx) exp = dec.predict(X) # pylint: disable=E1101 prob = dec.predict_proba(X) # pylint: disable=E1101 got = oinf.run({'X': X}) self.assertEqual(dtype, prob.dtype) self.assertEqualArray(exp, got['label'].ravel()) self.assertEqualArray(prob, got['probabilities'])
def test_onnxrt_python_SVR_double(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = SVR() clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float64)) oinf = OnnxInference(model_def) text = "\n".join(map(lambda x: str(x.ops_), oinf.sequence_)) self.assertIn("SVMRegressor", text) y = oinf.run({'X': X_test.astype(numpy.float64)}) self.assertEqual(list(sorted(y)), ['variable']) lexp = clr.predict(X_test) self.assertEqual(lexp.shape, y['variable'].shape) self.assertEqualArray(lexp, y['variable'], decimal=5)
def test_get_train_initializer(self): from onnxcustom.utils.orttraining_helper import get_train_initializer X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, _, y_train, __ = train_test_split(X, y) reg = LinearRegression() reg.fit(X_train, y_train) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) inits = get_train_initializer(onx) self.assertEqual({'intercept', 'coef'}, set(inits))
def test_onnxrt_python_SimpleImputer(self): iris = load_iris() X, y = iris.data, iris.target for i in range(X.shape[1]): X[i::10, i] = numpy.nan X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = SimpleImputer() clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def) got = oinf.run({'X': X_test}) self.assertEqual(list(sorted(got)), ['variable']) exp = clr.transform(X_test) self.assertEqualArray(exp, got['variable'], decimal=6) self.assertRaise(lambda: oinf.run({'X': X_test[0]}), RuntimeError)
def test_ort_gradient_optimizers_use_numpy_pickle_w(self): from onnxcustom.utils.orttraining_helper import add_loss_output from onnxcustom.training.optimizers import OrtGradientOptimizer X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2, random_state=0) X = X.astype(numpy.float32) y = y.astype(numpy.float32) w = (numpy.random.rand(y.shape[0]) + 1).astype(X.dtype) X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w) reg = LinearRegression() reg.fit(X_train, y_train, w_train) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) set_model_props(onx, {'info': 'unit test'}) onx_loss = add_loss_output(onx, weight_name='weight') inits = ['intercept', 'coef'] train_session0 = OrtGradientOptimizer(onx_loss, inits) st = io.BytesIO() pickle.dump(train_session0, st) st2 = io.BytesIO(st.getvalue()) train_session1 = pickle.load(st2) train_session1.fit(X_train, y_train, w_train, use_numpy=True) st = io.BytesIO() pickle.dump(train_session1, st) st2 = io.BytesIO(st.getvalue()) train_session = pickle.load(st2) state_tensors = train_session.get_state() self.assertEqual(len(state_tensors), 2) train_session.fit(X_train, y_train, w_train, use_numpy=True) state_tensors = train_session.get_state() self.assertEqual(len(state_tensors), 2) r = repr(train_session) self.assertIn("OrtGradientOptimizer(model_onnx=", r) self.assertIn("learning_rate='invscaling'", r) losses = train_session.train_losses_ self.assertGreater(len(losses), 1) self.assertFalse(any(map(numpy.isnan, losses)))
def test_onnxrt_python_DecisionTreeRegressor2(self): iris = load_iris() X, y = iris.data, iris.target y = numpy.vstack([y, y]).T X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = DecisionTreeRegressor() clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def) text = "\n".join(map(lambda x: str(x.ops_), oinf.sequence_)) self.assertIn("TreeEnsembleRegressor", text) y = oinf.run({'X': X_test.astype(numpy.float32)}) self.assertEqual(list(sorted(y)), ['variable']) lexp = clr.predict(X_test) self.assertEqual(lexp.shape, y['variable'].shape) self.assertEqualArray(lexp, y['variable'])
def test_onnxt_iris_adaboost_regressor_dt(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, __ = train_test_split(X, y, random_state=11) y_train = y_train.astype(numpy.float32) clr = AdaBoostRegressor( base_estimator=DecisionTreeRegressor(max_depth=3), n_estimators=3) clr.fit(X_train, y_train) X_test = X_test.astype(numpy.float32) X_test = numpy.vstack([X_test[:3], X_test[-3:]]) res0 = clr.predict(X_test).astype(numpy.float32) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def, runtime='python') res1 = oinf.run({'X': X_test}) self.assertEqualArray(res0, res1['variable'].ravel())
def test_decisiontreeregressor_decision_path(self): model = DecisionTreeRegressor(max_depth=2) X, y = make_classification(10, n_features=4, random_state=42) X = X[:, :2].astype(numpy.float32) model.fit(X, y) model_onnx = to_onnx(model, X, options={id(model): { 'decision_path': True }}) sess = OnnxInference(model_onnx) res = sess.run({'X': X}) pred = model.predict(X) self.assertEqualArray(pred, res['variable'].ravel()) dec = model.decision_path(X) exp = binary_array_to_string(dec.todense()) self.assertEqual(exp, res['decision_path'].ravel().tolist())
def test_xgboost_regressor(self): try: from onnxmltools import __version__ except ImportError: return if compare_module_version(__version__, '1.11') <= 0: return from xgboost import XGBRegressor try: from onnxmltools.convert import convert_xgboost except ImportError: convert_xgboost = None X, y = self.data_X, self.data_y model = XGBRegressor(max_depth=8, n_estimators=100, learning_rate=0.000001) model.fit(X, y) expected = model.predict(X) model_onnx = to_onnx(model, X) if convert_xgboost is not None: try: model_onnx2 = convert_xgboost( model, initial_types=[('X', FloatTensorType([None, X.shape[1]]))]) except RuntimeError as e: if "is higher than the number of the installed" in str(e): model_onnx2 = None else: raise e else: model_onnx2 = None for i, mo in enumerate([model_onnx, model_onnx2]): if mo is None: continue for rt in ['python', 'onnxruntime1']: with self.subTest(i=i, rt=rt): oinf = OnnxInference(mo, runtime=rt) got = oinf.run({'X': X})['variable'] diff = numpy.abs(got.ravel() - expected.ravel()).max() if __name__ == "__main__": print("xgb32", "mlprod" if i == 0 else "mltool", rt, diff) self.assertLess(diff, 1e-5)
def wtest_ort_gradient_optimizers_fw_sgd_reg(self, use_weight): from onnxcustom.training.optimizers_partial import ( OrtGradientForwardBackwardOptimizer) from onnxcustom.training.sgd_learning_rate import (LearningRateSGD) from onnxcustom.training.sgd_learning_loss import SquareLearningLoss X = numpy.arange(60).astype(numpy.float32).reshape((-1, 3)) y = numpy.arange(X.shape[0]).astype(numpy.float32).reshape((-1, 1)) y[0, 0] += 1 y[-1, 0] += 1 w = (numpy.random.rand(y.shape[0]) + 1).astype(numpy.float32) X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w) reg = SGDRegressor() if use_weight: reg.fit(X_train, y_train.ravel(), sample_weight=w_train.astype(numpy.float64)) else: reg.fit(X_train, y_train.ravel()) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) inits = ['coef', 'intercept'] train_session = OrtGradientForwardBackwardOptimizer( onx, inits, weight_name='weight' if use_weight else None, learning_rate=LearningRateSGD(1e10), learning_loss=SquareLearningLoss(), warm_start=False, max_iter=100, batch_size=10, enable_logging=False) self.assertIsInstance(train_session.learning_loss, SquareLearningLoss) y_train = y_train.reshape((-1, 1)) if use_weight: self.assertRaise( lambda: train_session.fit(X_train, y_train, w_train.reshape((-1, 1))), ConvergenceError) else: self.assertRaise(lambda: train_session.fit(X_train, y_train), ConvergenceError) losses = train_session.train_losses_ self.assertLess(len(losses), 2)
def test_rt_MLPRegressor_simple_test(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") logger = getLogger('skl2onnx') logger.disabled = True iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y) clr = MLPRegressor() clr.fit(X_train, y_train) x2 = X_test.astype(numpy.float32) onx = to_onnx(clr, x2) pyrun = OnnxInference(onx, runtime="python") res = pyrun.run({'X': x2}) self.assertIn('variable', res) self.assertEqual(res['variable'].shape, (38, 1))
def test_function_cluster3_float64(self): X = numpy.random.randn(20, 2).astype(numpy.float64) y = ((X.sum(axis=1) + numpy.random.randn(X.shape[0]).astype(numpy.float32)) >= 0).astype(numpy.int64) dec = CustomCluster3() dec.fit(X, y) onx = to_onnx(dec, X.astype(numpy.float64)) oinf = OnnxInference(onx) exp = dec.predict(X) dist = dec.transform(X) got = oinf.run({'X': X}) self.assertEqualArray(exp, got['label']) self.assertEqualArray(dist, got['scores']) X2, P2 = custom_cluster_converter3( # pylint: disable=E0633 X, op_=dec) self.assertEqualArray(X2, got['label']) self.assertEqualArray(P2, got['scores'])
def test_graph_distance_profile(self): data = load_iris() X = data.data.astype(numpy.float32) model = KMeans(n_clusters=3) model.fit(X) model_onnx = to_onnx(model, X, target_opset=13) with open("temp_kmeans.onnx", "wb") as f: f.write(model_onnx.SerializeToString()) rootrem = os.path.normpath( os.path.join(os.path.abspath(os.path.dirname(__file__)), "..", "..", "..")) res = self.profile( lambda: onnx_graph_distance(model_onnx, model_onnx, verbose=1), rootrem=rootrem) if __name__ == "__main__": print(res[1]) self.assertIn("cumtime", res[1])
def test_function_classifier(self): X = numpy.random.randn(20, 2).astype(numpy.float32) y = ((X.sum(axis=1) + numpy.random.randn(X.shape[0]).astype(numpy.float32)) >= 0).astype(numpy.int64) dec = CustomLinearClassifier() dec.fit(X, y) onx = to_onnx(dec, X.astype(numpy.float32), options={id(dec): { 'zipmap': False }}) oinf = OnnxInference(onx) exp = dec.predict(X) prob = dec.predict_proba(X) got = oinf.run({'X': X}) self.assertEqualArray(exp, got['label'].ravel()) self.assertEqualArray(prob, got['probabilities'])
def test_onnxrt_python_LinearRegression(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = LinearRegression() clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def) y = oinf.run({'X': X_test}) exp = clr.predict(X_test) self.assertEqual(list(sorted(y)), ['variable']) self.assertEqualArray(exp, y['variable'].ravel(), decimal=6) seq = oinf.sequence_ text = "\n".join(map(lambda x: str(x.ops_), seq)) self.assertIn('op_type=LinearRegressor', text) self.assertIn("post_transform=b'NONE'", text)
def test_onnxrt_python_LogisticRegression_multi(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11) clr = LogisticRegression(solver="liblinear") clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def) y = oinf.run({'X': X_test}) self.assertEqual(list(sorted(y)), ['output_label', 'output_probability']) lexp = clr.predict(X_test) self.assertEqualArray(lexp, y['output_label']) exp = clr.predict_proba(X_test) got = pandas.DataFrame(list(y['output_probability'])).values self.assertEqualArray(exp, got, decimal=5)