def test_create_asv_benchmark_pyspy(self): self.assertNotEmpty(mlprodict) temp = get_temp_folder(__file__, "temp_create_asv_benchmark_pyspy") created = create_asv_benchmark(location=temp, verbose=0, runtime=('scikit-learn', 'python', 'onnxruntime1'), exc=False, execute=True, models={'DecisionTreeClassifier'}, add_pyspy=True) self.assertNotEmpty(created) ops = TARGET_OPSET verif = False allnames = [] for path, _, files in os.walk(os.path.join(temp, 'pyspy')): for zoo in files: if '__init__' in zoo: continue allnames.append(zoo) fullname = os.path.join(path, zoo) with open(fullname, 'r', encoding='utf-8') as f: content = f.read() if (zoo.endswith( "bench_DecisionTreeClas_default_b_cl_1_4_%d_float_nozipmap.py" % ops) and compare_module_version( sklearn.__version__, "0.21") >= 0): if "setup_profile" not in content: raise AssertionError(content) verif = True if not verif: raise AssertionError("Visited files\n{}".format( "\n".join(allnames)))
def test_create_asv_benchmark_pyspy_knn(self): self.assertNotEmpty(mlprodict) temp = get_temp_folder(__file__, "temp_create_asv_benchmark_pyspy_knn") created = create_asv_benchmark(location=temp, verbose=0, runtime=('scikit-learn', 'python', 'onnxruntime1'), exc=False, execute=True, models={'KNeighborsClassifier'}, add_pyspy=True) self.assertNotEmpty(created) verif = False target_opset = TARGET_OPSET allnames = [] for path, _, files in os.walk(os.path.join(temp, 'pyspy')): for zoo in files: if '__init__' in zoo: continue allnames.append(zoo) fullname = os.path.join(path, zoo) with open(fullname, 'r', encoding='utf-8') as f: content = f.read() if (zoo.endswith( "bench_KNNClas_default_k3_b_cl_64_algorithmbrute_n_neighbors3" "_10000_20_%d_double_optcdist-zm0.py" % target_opset) and compare_module_version(sklearn.__version__, "0.21") >= 0): if "setup_profile" not in content: raise AssertionError(content) verif = True if not verif: raise AssertionError("Visited files\n{}".format( "\n".join(allnames)))
def test_create_asv_benchmark_pyspy_compiled(self): self.assertNotEmpty(mlprodict) temp = get_temp_folder(__file__, "temp_create_asv_benchmark_pyspy_compiled") created = create_asv_benchmark(location=temp, verbose=0, runtime=('python', 'python_compiled'), exc=False, execute=True, models={'AdaBoostRegressor'}, add_pyspy=True) self.assertNotEmpty(created) ops = get_opset_number_from_onnx() verif = False allnames = [] for path, _, files in os.walk(os.path.join(temp, 'pyspy')): for zoo in files: if '__init__' in zoo: continue allnames.append(zoo) fullname = os.path.join(path, zoo) with open(fullname, 'r', encoding='utf-8') as f: content = f.read() if (zoo.endswith( "bench_AdaBoostReg_default_b_reg_nest10_1_4_%d_float_.py" % ops) and compare_module_version( sklearn.__version__, "0.21") >= 0): if "setup_profile" not in content: raise AssertionError(content) verif = True if not verif: raise AssertionError("Visited files\n{}".format( "\n".join(allnames)))
def _register_converters_skl2onnx(exc=True): """ This functions registers additional converters for :epkg:`skl2onnx`. @param exc if True, raises an exception if a converter cannot registered (missing package for example) @return list of models supported by the new converters """ registered = [] try: import skl2onnx.sklapi.register # pylint: disable=W0611 from skl2onnx.sklapi import WOETransformer model = [WOETransformer] except ImportError as e: # pragma: no cover try: import skl2onnx from pyquickhelper.texthelper.version_helper import ( compare_module_version) if compare_module_version(skl2onnx.__version__, '1.9.3') < 0: # Too old version of skl2onnx. return [] except ImportError: pass if exc: raise e else: warnings.warn("Cannot register models from 'skl2onnx' due to %r." % e) model = None if model is not None: registered.extend(model) return registered
class TestRtValidateGaussianProcessOrt2(ExtTestCase): @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning) ) @skipif_circleci("to investigate, shape of predictions are different") @unittest.skipIf( compare_module_version(ort_version, threshold) <= 0, reason="Node:Scan1 Field 'shape' of type is required but missing.") def test_rt_GaussianProcessRegressor_debug_std(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") logger = getLogger('skl2onnx') logger.disabled = True verbose = 4 buffer = [] def myprint(*args, **kwargs): buffer.append(" ".join(map(str, args))) debug = True rows = list( enumerate_validated_operator_opsets( verbose, models={"GaussianProcessRegressor"}, fLOG=myprint, runtime='onnxruntime2', debug=debug, filter_exp=lambda m, s: "b-reg-std-NSV" in s)) self.assertGreater(len(rows), 1) self.assertGreater(len(buffer), 1 if debug else 0)
class TestRtValidateIsolationForest(ExtTestCase): @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning) ) @unittest.skipIf(compare_module_version(skl2onnx_version, '1.11') < 0, reason="converter issue") def test_rt_IsolationForest_python(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") logger = getLogger('skl2onnx') logger.disabled = True verbose = 1 if __name__ == "__main__" else 0 debug = True buffer = [] def myprint(*args, **kwargs): buffer.append(" ".join(map(str, args))) rows = list( enumerate_validated_operator_opsets( verbose, models={"IsolationForest"}, fLOG=myprint, runtime='python', debug=debug, filter_exp=lambda m, p: '-64' not in p)) self.assertGreater(len(rows), 1) self.assertGreater(len(buffer), 1 if debug else 0)
def test_lightgbm_regressor(self): try: from onnxmltools import __version__ except ImportError: return if compare_module_version(__version__, '1.11') <= 0: return from lightgbm import LGBMRegressor try: from onnxmltools.convert import convert_lightgbm except ImportError: convert_lightgbm = None X, y = self.data_X, self.data_y for ne in [1, 2, 10, 50, 100, 200]: for mx in [1, 10]: if __name__ != "__main__" and mx > 5: break model = LGBMRegressor(max_depth=mx, n_estimators=ne, min_child_samples=1, learning_rate=0.0000001) model.fit(X, y) expected = model.predict(X) model_onnx = to_onnx(model, X) if convert_lightgbm is not None: try: model_onnx2 = convert_lightgbm( model, initial_types=[('X', FloatTensorType([None, X.shape[1]]))]) except RuntimeError as e: if "is higher than the number of the installed" in str( e): model_onnx2 = None else: raise e else: model_onnx2 = None for i, mo in enumerate([model_onnx, model_onnx2]): if mo is None: continue for rt in ['python', 'onnxruntime1']: with self.subTest(i=i, rt=rt, max_depth=mx, n_est=ne): oinf = OnnxInference(mo, runtime=rt) got = oinf.run({'X': X})['variable'] diff = numpy.abs(got.ravel() - expected.ravel()).max() if __name__ == "__main__": print("lgb1 mx=%d ne=%d" % (mx, ne), "mlprod" if i == 0 else "mltool", rt[:6], diff) self.assertLess(diff, 1e-3)
def test_create_asv_benchmark_rf(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") self.assertNotEmpty(mlprodict) temp = get_temp_folder(__file__, "temp_create_asv_benchmark_rf") created = create_asv_benchmark(location=temp, verbose=1, fLOG=fLOG, runtime=('scikit-learn', 'python', 'onnxruntime1'), exc=False, execute=True, models={'RandomForestRegressor'}) self.assertNotEmpty(created) reg = re.compile("class ([a-zA-Z0-9_]+)[(]") verif = False allnames = [] for path, _, files in os.walk(os.path.join(temp, 'benches')): for zoo in files: if '__init__' in zoo: continue fLOG("process '{}'".format(zoo)) fullname = os.path.join(path, zoo) with open(fullname, 'r', encoding='utf-8') as f: content = f.read() names = reg.findall(content) name = names[0] content += "\n\ncl = %s()\ncl.setup_cache()\n" % name allnames.append(fullname) with open(fullname, 'w', encoding='utf-8') as f: f.write(content) __, err = run_script(fullname, wait=True) lines = [_ for _ in err.split('\n') if _ and _[0] != ' '] lines = [_ for _ in lines if "Warning" not in _] lines = [ _ for _ in lines if "No module named 'mlprodict'" not in _ ] lines = [_ for _ in lines if "Traceback " not in _] err = "\n".join(lines).strip(' \n\r') if len(err) > 0: raise RuntimeError("Issue with '{}'\n{}".format( fullname, err)) if (zoo.endswith( "bench_RandomForestReg_default_b_reg_nest100.py") and compare_module_version(sklearn.__version__, "0.21") >= 0): if "random_state=42" not in content: raise AssertionError(content) else: verif = True if not verif: raise AssertionError("Visited files\n{}".format( "\n".join(allnames)))
def test_xgboost_regressor(self): try: from onnxmltools import __version__ except ImportError: return if compare_module_version(__version__, '1.11') <= 0: return from xgboost import XGBRegressor try: from onnxmltools.convert import convert_xgboost except ImportError: convert_xgboost = None X, y = self.data_X, self.data_y model = XGBRegressor(max_depth=8, n_estimators=100, learning_rate=0.000001) model.fit(X, y) expected = model.predict(X) model_onnx = to_onnx(model, X) if convert_xgboost is not None: try: model_onnx2 = convert_xgboost( model, initial_types=[('X', FloatTensorType([None, X.shape[1]]))]) except RuntimeError as e: if "is higher than the number of the installed" in str(e): model_onnx2 = None else: raise e else: model_onnx2 = None for i, mo in enumerate([model_onnx, model_onnx2]): if mo is None: continue for rt in ['python', 'onnxruntime1']: with self.subTest(i=i, rt=rt): oinf = OnnxInference(mo, runtime=rt) got = oinf.run({'X': X})['variable'] diff = numpy.abs(got.ravel() - expected.ravel()).max() if __name__ == "__main__": print("xgb32", "mlprod" if i == 0 else "mltool", rt, diff) self.assertLess(diff, 1e-5)
def test_create_asv_benchmark_tiny_same(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") self.assertNotEmpty(mlprodict) temp = get_temp_folder( __file__, "temp_create_asv_benchmark_all_tiny_same") created = create_asv_benchmark( location=temp, verbose=1, fLOG=fLOG, skip_models={ 'DictVectorizer', 'FeatureHasher', # 'CountVectorizer' }, runtime=('scikit-learn', 'python', 'onnxruntime1'), exc=False, execute=True, models={ 'SelectFromModel', 'NMF', 'LatentDirichletAllocation' }, env='same') self.assertNotEmpty(created) reg = re.compile("class ([a-zA-Z0-9_]+)[(]") for path, _, files in os.walk(os.path.join(temp, 'benches')): for zoo in files: if '__init__' in zoo: continue fLOG("process '{}'".format(zoo)) fullname = os.path.join(path, zoo) with open(fullname, 'r', encoding='utf-8') as f: content = f.read() names = reg.findall(content) name = names[0] content += "\n\ncl = %s()\ncl.setup_cache()\n" % name with open(fullname, 'w', encoding='utf-8') as f: f.write(content) __, err = run_script(fullname, wait=True) lines = [_ for _ in err.split('\n') if _ and _[0] != ' '] lines = [_ for _ in lines if "Warning" not in _] lines = [ _ for _ in lines if "No module named 'mlprodict'" not in _] lines = [_ for _ in lines if "Traceback " not in _] err = "\n".join(lines).strip(' \n\r') if len(err) > 0: raise RuntimeError( "Issue with '{}'\n{}".format(fullname, err)) if (zoo.endswith("bench_NMF_default_num_tr_pos.py") and compare_module_version(sklearn.__version__, "0.22") >= 0): if ("from sklearn.decomposition.nmf import NMF" not in content and "from sklearn.decomposition import NMF" not in content): raise AssertionError( "Unable to find 'import NMF' in\n{}".format(content))
class TestNotebookOnnxSbs(ExtTestCase): def setUp(self): add_missing_development_version(["jyquickhelper"], __file__, hide=True) @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning) ) @unittest.skipIf( compare_module_version(ort_version, "0.4.0") <= 0, reason="Node:Scan1 Field 'shape' of type is required but missing.") def test_notebook_onnx_sbs(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") self.assertNotEmpty(mlprodict is not None) folder = os.path.join(os.path.dirname(__file__), "..", "..", "_doc", "notebooks") test_notebook_execution_coverage(__file__, "onnx_sbs", folder, this_module_name="mlprodict", fLOG=fLOG)
def test_create_asv_benchmark_hist_gbc(self): self.assertNotEmpty(mlprodict) temp = get_temp_folder(__file__, "temp_create_asv_benchmark_hist_gbc") created = create_asv_benchmark( location=temp, verbose=0, runtime=('scikit-learn', 'python', 'onnxruntime1'), exc=False, execute=True, models={'HistGradientBoostingClassifier'}) self.assertNotEmpty(created) verif = False allnames = [] for path, _, files in os.walk(os.path.join(temp, 'benches')): for zoo in files: if '__init__' in zoo: continue fullname = os.path.join(path, zoo) if "_hist_gradient_boosting" in fullname: raise AssertionError(fullname) with open(fullname, 'r', encoding='utf-8') as f: content = f.read() if (zoo.endswith("bench_HGBClas_default_b_cl_mxit100.py") and compare_module_version(sklearn.__version__, "0.21") >= 0): if "random_state=42" not in content: raise AssertionError(content) if "from sklearn.ensemble._hist_gradient_boosting.gradient_boosting import" not in content: raise AssertionError(content) if "par_full_test_name = 'bench" not in content: raise AssertionError(content) verif = True if not verif: raise AssertionError("Visited files\n{}".format( "\n".join(allnames)))
def test_search_predictions_lr(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") iris = datasets.load_iris() X = iris.data[:, :2] y = iris.target clf = LogisticRegression() clf.fit(X, y) res = [] for i in range(20): h = i * 0.05 h2 = 1 - i * 0.05 res.append( dict(ind=i * 5, meta1="m%d" % i, meta2="m%d" % (i + 1), f1=h, f2=h2)) df = pandas.DataFrame(res) se = SearchEnginePredictions(clf, n_neighbors=5) r = repr(se) if compare_module_version(sklearn.__version__, '0.21.0') < 0: self.assertEqual( r.replace("\n", "").replace(" ", ""), "SearchEnginePredictions(fct=LogisticRegression(C=1.0,class_weight=None,dual=False," + "fit_intercept=True,intercept_scaling=1,max_iter=100,multi_class='warn',n_jobs=None," + "penalty='l2',random_state=None,solver='warn',tol=0.0001,verbose=0,warm_start=False)," + "fct_params=None,n_neighbors=5)") else: self.assertEqual( r.replace("\n", "").replace(" ", ""), "SearchEnginePredictions(fct=LogisticRegression(C=1.0,class_weight=None,dual=False," + "fit_intercept=True,intercept_scaling=1,l1_ratio=None,max_iter=100,multi_class='warn',n_jobs=None," + "penalty='l2',random_state=None,solver='warn',tol=0.0001,verbose=0,warm_start=False)," + "fct_params=None,n_neighbors=5)") se.fit(data=None, features=df[["f1", "f2"]].values, metadata=df[["ind", "meta1", "meta2"]]) score, ind, meta = se.kneighbors([0.5, 0.5]) self.assertIsInstance(ind, (list, numpy.ndarray)) self.assertEqual(len(ind), 5) self.assertEqual(ind[0], 10) self.assertIsInstance(score, numpy.ndarray) self.assertEqual(score.shape, (5, )) self.assertEqual(score[0], 0) self.assertIsInstance(meta, (numpy.ndarray, pandas.DataFrame)) self.assertEqual(meta.shape, (5, 3)) self.assertEqual(meta.iloc[0, 0], 50) se.fit(data=df, features=["f1", "f2"], metadata=["ind", "meta1", "meta2"]) score, ind, meta = se.kneighbors([0.5, 0.5]) self.assertIsInstance(ind, (list, numpy.ndarray)) self.assertEqual(len(ind), 5) self.assertEqual(ind[0], 10) self.assertIsInstance(score, numpy.ndarray) self.assertEqual(score.shape, (5, )) self.assertEqual(score[0], 0) self.assertIsInstance(meta, (numpy.ndarray, pandas.DataFrame)) self.assertEqual(meta.shape, (5, 3)) self.assertEqual(meta.iloc[0, 0], 50) se.fit(data=df, features=["f1", "f2"]) score, ind, meta = se.kneighbors([0.5, 0.5]) self.assertIsInstance(ind, (list, numpy.ndarray)) self.assertEqual(len(ind), 5) self.assertEqual(ind[0], 10) self.assertIsInstance(score, numpy.ndarray) self.assertEqual(score.shape, (5, )) self.assertEqual(score[0], 0) self.assertTrue(meta is None)
class TestRtValidateGaussianProcessOrt(ExtTestCase): @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning)) @unittest.skipIf(compare_module_version(ort_version, threshold) <= 0, reason="Node:Scan1 Field 'shape' of type is required but missing.") def test_kernel_rbf1(self): from skl2onnx.operator_converters.gaussian_process import convert_kernel ker = RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3)) onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=numpy.float32, op_version=10) model_onnx = onx.to_onnx( inputs=[('X', FloatTensorType([None, None]))]) model_onnx.ir_version = get_ir_version_from_onnx() sess = OnnxInference(model_onnx, runtime='onnxruntime1') Xtest_ = numpy.arange(6).reshape((3, 2)) res = sess.run({'X': Xtest_.astype(numpy.float32)}) m1 = res['Y'] m2 = ker(Xtest_) self.assertEqualArray(m1, m2) @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning)) @unittest.skipIf(compare_module_version(ort_version, threshold) <= 0, reason="Node:Scan1 Field 'shape' of type is required but missing.") def test_kernel_exp_sine_squared(self): from skl2onnx.operator_converters.gaussian_process import convert_kernel ker = ExpSineSquared() onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=numpy.float32, op_version=10) model_onnx = onx.to_onnx( inputs=[('X', FloatTensorType([None, None]))]) model_onnx.ir_version = get_ir_version_from_onnx() sess = OnnxInference(model_onnx, runtime='onnxruntime1') Xtest_ = numpy.arange(6).reshape((3, 2)) res = sess.run({'X': Xtest_.astype(numpy.float32)}) m1 = res['Y'] m2 = ker(Xtest_) self.assertEqualArray(m1, m2, decimal=5) @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning)) def test_rt_GaussianProcessRegressor_onnxruntime_nofit(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") logger = getLogger('skl2onnx') logger.disabled = True verbose = 1 buffer = [] def myprint(*args, **kwargs): buffer.append(" ".join(map(str, args))) debug = False rows = list(enumerate_validated_operator_opsets( verbose, models={"GaussianProcessRegressor"}, fLOG=myprint, runtime='onnxruntime1', debug=debug, filter_exp=lambda m, s: "NF-std" in s)) self.assertGreater(len(rows), 1) self.assertGreater(len(buffer), 1 if debug else 0) @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning)) def test_rt_GaussianProcessRegressor_python_nofit(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") logger = getLogger('skl2onnx') logger.disabled = True verbose = 1 buffer = [] def myprint(*args, **kwargs): buffer.append(" ".join(map(str, args))) debug = False rows = list(enumerate_validated_operator_opsets( verbose, models={"GaussianProcessRegressor"}, fLOG=myprint, runtime='onnxruntime1', debug=debug, filter_exp=lambda m, s: "NF" in s)) self.assertGreater(len(rows), 6) self.assertGreater(len(buffer), 1 if debug else 0) @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning)) def test_rt_GaussianProcessRegressor_python_fit(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") logger = getLogger('skl2onnx') logger.disabled = True verbose = 4 buffer = [] def myprint(*args, **kwargs): buffer.append(" ".join(map(str, args))) debug = False rows = list(enumerate_validated_operator_opsets( verbose, models={"GaussianProcessRegressor"}, fLOG=myprint, runtime='onnxruntime1', debug=debug, filter_exp=lambda m, s: "nofit" not in s and "multi" not in s)) self.assertGreater(len(rows), 6) self.assertGreater(len(buffer), 1 if debug else 0) @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning)) @unittest.skipIf(compare_module_version(ort_version, threshold) <= 0, reason="Node:Scan1 Field 'shape' of type is required but missing.") def test_rt_GaussianProcessRegressor_debug(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") logger = getLogger('skl2onnx') logger.disabled = True verbose = 2 buffer = [] def myprint(*args, **kwargs): buffer.append(" ".join(map(str, args))) def filter_scenario(a, b, c, d, e): if isinstance(e, dict) and GaussianProcessRegressor in e: opt = e[GaussianProcessRegressor] if opt.get('optim', '') == 'cdist': return False return True debug = True rows = list(enumerate_validated_operator_opsets( verbose, models={"GaussianProcessRegressor"}, fLOG=myprint, runtime='onnxruntime1', debug=debug, filter_exp=lambda m, s: "reg-NSV" in s, filter_scenario=filter_scenario)) self.assertGreater(len(rows), 1) self.assertGreater(len(buffer), 1 if debug else 0) @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning)) @skipif_circleci("to investigate, shape of predictions are different") @unittest.skipIf(compare_module_version(ort_version, threshold) <= 0, reason="Node:Scan1 Field 'shape' of type is required but missing.") def test_rt_GaussianProcessRegressor_debug_std(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") logger = getLogger('skl2onnx') logger.disabled = True verbose = 4 buffer = [] def myprint(*args, **kwargs): buffer.append(" ".join(map(str, args))) def filter_scenario(a, b, c, d, e): if isinstance(e, dict) and GaussianProcessRegressor in e: opt = e[GaussianProcessRegressor] if opt.get('optim', '') == 'cdist': return False return True debug = True rows = list(enumerate_validated_operator_opsets( verbose, models={"GaussianProcessRegressor"}, fLOG=myprint, runtime='onnxruntime1', debug=debug, filter_exp=lambda m, s: "b-reg-std-NSV" in s, filter_scenario=filter_scenario)) self.assertGreater(len(rows), 1) self.assertGreater(len(buffer), 1 if debug else 0) @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning)) @skipif_circleci("to investigate, shape of predictions are different") @unittest.skipIf(compare_module_version(ort_version, threshold) <= 0, reason="Node:Scan1 Field 'shape' of type is required but missing.") def test_rt_GaussianProcessRegressor_debug_multi(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") logger = getLogger('skl2onnx') logger.disabled = True verbose = 2 buffer = [] def myprint(*args, **kwargs): buffer.append(" ".join(map(str, args))) debug = True rows = list(enumerate_validated_operator_opsets( verbose, models={"GaussianProcessRegressor"}, fLOG=myprint, runtime='onnxruntime1', debug=debug, filter_exp=lambda m, s: 'm-reg-std-NSV' in s)) self.assertGreater(len(rows), 0) @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning)) @skipif_circleci("to investigate, shape of predictions are different") @unittest.skipIf(compare_module_version(ort_version, threshold) <= 0, reason="Node:Scan1 Field 'shape' of type is required but missing.") def test_rt_GaussianProcessRegressor_debug_all(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") logger = getLogger('skl2onnx') logger.disabled = True verbose = 2 buffer = [] def myprint(*args, **kwargs): buffer.append(" ".join(map(str, args))) debug = False rows = list(enumerate_validated_operator_opsets( verbose, models={"GaussianProcessRegressor"}, fLOG=myprint, runtime='onnxruntime1', debug=debug)) self.assertGreater(len(rows), 1) self.assertGreater(len(buffer), 1 if debug else 0)
@brief test log(time=4s) """ import unittest import numpy as np from scipy import sparse as sp from sklearn import __version__ as sklearn_vers from sklearn.utils._testing import (assert_array_equal, assert_array_almost_equal, assert_almost_equal, assert_raise_message) from sklearn.metrics.cluster import v_measure_score from sklearn.datasets import make_blobs from pyquickhelper.pycode import ExtTestCase, ignore_warnings from pyquickhelper.texthelper.version_helper import compare_module_version from mlinsights.mlmodel import KMeansL1L2 sklearn_023 = compare_module_version(sklearn_vers, "0.23.2") >= 0 class TestKMeansL1L2Sklearn(ExtTestCase): # non centered, sparse centers to check the centers = np.array([ [0.0, 5.0, 0.0, 0.0, 0.0], [1.0, 1.0, 4.0, 0.0, 0.0], [1.0, 0.0, 0.0, 5.0, 1.0], ]) n_samples = 100 n_clusters, n_features = centers.shape # pylint: disable=E0633 X, true_labels = make_blobs(n_samples=n_samples, centers=centers, cluster_std=1.,
class TestOnnxrtPythonRuntimeMlText(ExtTestCase): def setUp(self): logger = getLogger('skl2onnx') logger.disabled = True def test_onnxrt_label_encoder_strings(self): corpus = numpy.array(['AA', 'BB', 'AA', 'CC']) op = OnnxLabelEncoder('text', op_version=TARGET_OPSET, keys_strings=['AA', 'BB', 'CC'], values_strings=['LEAA', 'LEBB', 'LECC'], output_names=['out']) onx = op.to_onnx(inputs=[('text', StringTensorType())]) oinf = OnnxInference(onx) res = oinf.run({'text': corpus}) self.assertEqual(list(res['out']), ['LEAA', 'LEBB', 'LEAA', 'LECC']) def test_onnxrt_label_encoder_floats(self): corpus = numpy.array([0.1, 0.2, 0.3, 0.2], dtype=numpy.float32) op = OnnxLabelEncoder('text', op_version=TARGET_OPSET, keys_floats=[0.1, 0.2, 0.3], values_floats=[0.3, 0.4, 0.5], output_names=['out']) onx = op.to_onnx(inputs=[('text', FloatTensorType())]) oinf = OnnxInference(onx) res = oinf.run({'text': corpus}) self.assertEqualArray( res['out'], numpy.array([0.3, 0.4, 0.5, 0.4], dtype=numpy.float32)) def test_onnxrt_label_encoder_string_floats(self): op = OnnxLabelEncoder('text', op_version=TARGET_OPSET, keys_strings=['AA', 'BB', 'CC'], values_floats=[0.1, 0.2, 0.3], output_names=['out']) onx = op.to_onnx(inputs=[('text', StringTensorType())]) oinf = OnnxInference(onx) res = oinf.run({'text': numpy.array(['AA', 'DD']).reshape((-1, 1))}) self.assertEqualArray(res['out'], numpy.array([0.1, 0])) def test_onnxrt_label_encoder_raise(self): self.assertRaise( lambda: OnnxLabelEncoder('text', op_version=TARGET_OPSET, keys_strings=['AA', 'BB', 'CC'], classes_strings=['LEAA', 'LEBB', 'LECC'], output_names=['out']), TypeError) op = OnnxLabelEncoder('text', op_version=TARGET_OPSET, keys_strings=['AA', 'BB', 'CC'], values_strings=[], output_names=['out']) onx = op.to_onnx(inputs=[('text', StringTensorType())]) self.assertRaise(lambda: OnnxInference(onx), RuntimeError) def test_onnxrt_string_normalizer(self): corpus = numpy.array([ 'This is the first document.', 'This document is the second document.', 'And this is the third one.', 'Is this the first document?' ]) op = OnnxStringNormalizer('text', op_version=TARGET_OPSET, output_names=['out']) onx = op.to_onnx(inputs=[('text', StringTensorType())]) oinf = OnnxInference(onx) res = oinf.run({'text': corpus}) self.assertEqual(list(res['out']), list(corpus)) res = oinf.run({'text': corpus.reshape((2, 2))}) self.assertEqual(res['out'].tolist(), corpus.reshape((2, 2)).tolist()) op = OnnxStringNormalizer('text', op_version=TARGET_OPSET, output_names=['out'], case_change_action='LOWER') onx = op.to_onnx(inputs=[('text', StringTensorType())]) oinf = OnnxInference(onx) res = oinf.run({'text': corpus}) self.assertEqual(list(res['out']), list(_.lower() for _ in corpus)) op = OnnxStringNormalizer('text', op_version=TARGET_OPSET, output_names=['out'], case_change_action='UPPER') onx = op.to_onnx(inputs=[('text', StringTensorType())]) oinf = OnnxInference(onx) res = oinf.run({'text': corpus}) self.assertEqual(list(res['out']), list(_.upper() for _ in corpus)) op = OnnxStringNormalizer('text', op_version=TARGET_OPSET, output_names=['out'], case_change_action='UPPER2') onx = op.to_onnx(inputs=[('text', StringTensorType())]) oinf = OnnxInference(onx) self.assertRaise(lambda: oinf.run({'text': corpus}), RuntimeError) def test_onnxrt_string_normalizer_stopwords(self): corpus = numpy.array([ 'This is the first document.', 'This document is the second document.', 'And this is the third one.', 'Is this the first document?' ]) op = OnnxStringNormalizer('text', op_version=TARGET_OPSET, output_names=['out'], stopwords=['this']) onx = op.to_onnx(inputs=[('text', StringTensorType())]) oinf = OnnxInference(onx) res = oinf.run({'text': corpus}) self.assertEqual(list(res['out']), list(_.replace("this ", "") for _ in corpus)) op = OnnxStringNormalizer('text', op_version=TARGET_OPSET, output_names=['out'], stopwords=['this'], case_change_action='LOWER', is_case_sensitive=0) onx = op.to_onnx(inputs=[('text', StringTensorType())]) oinf = OnnxInference(onx) res = oinf.run({'text': corpus}) self.assertEqual(list(res['out']), list(_.lower().replace("this ", "") for _ in corpus)) def test_onnxrt_string_normalizer_stopwords_french(self): corpus = numpy.array([ 'A is the first document.', 'This document is the second document.', 'And a is the third one.', 'Is à the first document?' ]) exp = numpy.array([ 'a is the first document.', 'this document is the second document.', 'and a is the third one.', 'is a the first document?' ]) op = OnnxStringNormalizer('text', op_version=TARGET_OPSET, output_names=['out'], case_change_action='LOWER', locale='fr_FR') onx = op.to_onnx(inputs=[('text', StringTensorType())]) oinf = OnnxInference(onx) res = oinf.run({'text': corpus}) self.assertEqual(list(res['out']), list(exp)) def test_onnxrt_string_normalizer_empty(self): corpus = numpy.array([ 'This is the first document.', 'This document is the second document.', 'And this is the third one.', 'Is this the first document?' ]) op = OnnxStringNormalizer('text', op_version=TARGET_OPSET, output_names=['out']) onx = op.to_onnx(inputs=[('text', StringTensorType())]) oinf = OnnxInference(onx) corpus[-1] = "" res = oinf.run({'text': corpus}) self.assertEqual(list(res['out']), list(corpus)) def test_onnxrt_tokenizer_char(self): corpus = numpy.array(['abc', 'abc d', 'abc e']) exp = numpy.array([['a', 'b', 'c', '#', '#', '#'], ['a', 'b', 'c', ' ', 'd', '#'], ['a', 'b', 'c', ' ', ' ', 'e']]) op = OnnxTokenizer('text', op_version=TARGET_OPSET, output_names=['out'], tokenexp='.') onx = op.to_onnx(inputs=[('text', StringTensorType())], outputs=[('out', StringTensorType())]) self.assertIn('domain: "mlprodict"', str(onx)) self.assertIn('version: 1', str(onx)) oinf = OnnxInference(onx) res = oinf.run({'text': corpus}) self.assertEqual(res['out'].tolist(), exp.tolist()) res = oinf.run({'text': corpus.reshape((-1, 1))}) self.assertEqual(res['out'].tolist(), exp.reshape((3, 1, -1)).tolist()) def test_onnxrt_tokenizer_char_mark(self): corpus = numpy.array(['abc', 'abc d', 'abc e']) exp = numpy.array([['#', 'a', 'b', 'c', '#', '#', '#', '#'], ['#', 'a', 'b', 'c', ' ', 'd', '#', '#'], ['#', 'a', 'b', 'c', ' ', ' ', 'e', '#']]) op = OnnxTokenizer('text', op_version=TARGET_OPSET, output_names=['out'], tokenexp='.', mark=1) onx = op.to_onnx(inputs=[('text', StringTensorType())], outputs=[('out', StringTensorType())]) self.assertIn('domain: "mlprodict"', str(onx)) self.assertIn('version: 1', str(onx)) oinf = OnnxInference(onx) res = oinf.run({'text': corpus}) self.assertEqual(res['out'].tolist(), exp.tolist()) def test_onnxrt_tokenizer_word_mark(self): corpus = numpy.array(['abc ef zoo', 'abc,d', 'ab/e']) exp = numpy.array([['#', 'abc', 'ef', 'zoo', '#'], ['#', 'abc', 'd', '#', '#'], ['#', 'ab', 'e', '#', '#']]) op = OnnxTokenizer('text', op_version=TARGET_OPSET, output_names=['out'], separators=[' ', ',', '/'], mark=1) onx = op.to_onnx(inputs=[('text', StringTensorType())], outputs=[('out', StringTensorType())]) oinf = OnnxInference(onx) res = oinf.run({'text': corpus}) self.assertEqual(res['out'].tolist(), exp.tolist()) def test_onnxrt_tokenizer_word_stop(self): corpus = numpy.array(['abc ef zoo', 'abc,d', 'ab/e']) exp = numpy.array([['abc', 'ef', 'zoo'], ['abc', '#', '#'], ['ab', 'e', '#']]) op = OnnxTokenizer('text', op_version=TARGET_OPSET, output_names=['out'], separators=[' ', ',', '/'], mark=0, stopwords=['d']) onx = op.to_onnx(inputs=[('text', StringTensorType())], outputs=[('out', StringTensorType())]) oinf = OnnxInference(onx) res = oinf.run({'text': corpus}) self.assertEqual(res['out'].tolist(), exp.tolist()) def test_onnxrt_tokenizer_word_regex_mark_split(self): corpus = numpy.array(['abc ef zoo', 'abc,d', 'ab/e']) exp = numpy.array([['#', ' ef zoo', '#'], ['#', ',d', '#'], ['#', '/e', '#']]) op = OnnxTokenizer('text', op_version=TARGET_OPSET, output_names=['out'], mark=1, tokenexp='[a-c]+', tokenexpsplit=1) onx = op.to_onnx(inputs=[('text', StringTensorType())], outputs=[('out', StringTensorType())]) oinf = OnnxInference(onx) res = oinf.run({'text': corpus}) self.assertEqual(res['out'].tolist(), exp.tolist()) def test_onnxrt_tokenizer_word_regex_mark_findall(self): corpus = numpy.array(['abc ef zoo', 'abc,d', 'ab/e']) exp = numpy.array([['#', 'abc', '#'], ['#', 'abc', '#'], ['#', 'ab', '#']]) op = OnnxTokenizer('text', op_version=TARGET_OPSET, output_names=['out'], mark=1, tokenexp='[a-c]+', tokenexpsplit=0) onx = op.to_onnx(inputs=[('text', StringTensorType())], outputs=[('out', StringTensorType())]) oinf = OnnxInference(onx) res = oinf.run({'text': corpus}) self.assertEqual(res['out'].tolist(), exp.tolist()) def test_onnxrt_tfidf_vectorizer(self): inputi = numpy.array([[1, 1, 3, 3, 3, 7], [8, 6, 7, 5, 6, 8]]).astype(numpy.int64) output = numpy.array([[0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 1., 0., 1.]]).astype(numpy.float32) ngram_counts = numpy.array([0, 4]).astype(numpy.int64) ngram_indexes = numpy.array([0, 1, 2, 3, 4, 5, 6]).astype(numpy.int64) pool_int64s = numpy.array([ 2, 3, 5, 4, # unigrams 5, 6, 7, 8, 6, 7 ]).astype(numpy.int64) # bigrams op = OnnxTfIdfVectorizer('tokens', op_version=TARGET_OPSET, mode='TF', min_gram_length=2, max_gram_length=2, max_skip_count=0, ngram_counts=ngram_counts, ngram_indexes=ngram_indexes, pool_int64s=pool_int64s, output_names=['out']) onx = op.to_onnx(inputs=[('tokens', Int64TensorType())], outputs=[('out', FloatTensorType())]) oinf = OnnxInference(onx) res = oinf.run({'tokens': inputi}) self.assertEqual(output.tolist(), res['out'].tolist()) def test_onnxrt_tfidf_vectorizer_skip5(self): inputi = numpy.array([[1, 1, 3, 3, 3, 7], [8, 6, 7, 5, 6, 8]]).astype(numpy.int64) output = numpy.array([[0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 1., 1., 1.]]).astype(numpy.float32) ngram_counts = numpy.array([0, 4]).astype(numpy.int64) ngram_indexes = numpy.array([0, 1, 2, 3, 4, 5, 6]).astype(numpy.int64) pool_int64s = numpy.array([ 2, 3, 5, 4, # unigrams 5, 6, 7, 8, 6, 7 ]).astype(numpy.int64) # bigrams op = OnnxTfIdfVectorizer('tokens', op_version=TARGET_OPSET, mode='TF', min_gram_length=2, max_gram_length=2, max_skip_count=5, ngram_counts=ngram_counts, ngram_indexes=ngram_indexes, pool_int64s=pool_int64s, output_names=['out']) onx = op.to_onnx(inputs=[('tokens', Int64TensorType())], outputs=[('out', FloatTensorType())]) oinf = OnnxInference(onx) res = oinf.run({'tokens': inputi}) self.assertEqual(output.tolist(), res['out'].tolist()) def test_onnxrt_tfidf_vectorizer_unibi_skip5(self): inputi = numpy.array([[1, 1, 3, 3, 3, 7], [8, 6, 7, 5, 6, 8]]).astype(numpy.int64) output = numpy.array([[0., 3., 0., 0., 0., 0., 0.], [0., 0., 1., 0., 1., 1., 1.]]).astype(numpy.float32) ngram_counts = numpy.array([0, 4]).astype(numpy.int64) ngram_indexes = numpy.array([0, 1, 2, 3, 4, 5, 6]).astype(numpy.int64) pool_int64s = numpy.array([ 2, 3, 5, 4, # unigrams 5, 6, 7, 8, 6, 7 ]).astype(numpy.int64) # bigrams op = OnnxTfIdfVectorizer('tokens', op_version=TARGET_OPSET, mode='TF', min_gram_length=1, max_gram_length=2, max_skip_count=5, ngram_counts=ngram_counts, ngram_indexes=ngram_indexes, pool_int64s=pool_int64s, output_names=['out']) onx = op.to_onnx(inputs=[('tokens', Int64TensorType())], outputs=[('out', FloatTensorType())]) oinf = OnnxInference(onx) res = oinf.run({'tokens': inputi}) self.assertEqual(output.tolist(), res['out'].tolist()) def test_onnxrt_tfidf_vectorizer_bi_skip0(self): inputi = numpy.array([[1, 1, 3, 3, 3, 7, 8, 6, 7, 5, 6, 8]]).astype(numpy.int64) output = numpy.array([[0., 0., 0., 0., 1., 1., 1.]]).astype(numpy.float32) ngram_counts = numpy.array([0, 4]).astype(numpy.int64) ngram_indexes = numpy.array([0, 1, 2, 3, 4, 5, 6]).astype(numpy.int64) pool_int64s = numpy.array([ 2, 3, 5, 4, # unigrams 5, 6, 7, 8, 6, 7 ]).astype(numpy.int64) # bigrams op = OnnxTfIdfVectorizer('tokens', op_version=TARGET_OPSET, mode='TF', min_gram_length=2, max_gram_length=2, max_skip_count=0, ngram_counts=ngram_counts, ngram_indexes=ngram_indexes, pool_int64s=pool_int64s, output_names=['out']) onx = op.to_onnx(inputs=[('tokens', Int64TensorType())], outputs=[('out', FloatTensorType())]) oinf = OnnxInference(onx) res = oinf.run({'tokens': inputi}) self.assertEqual(output.tolist(), res['out'].tolist()) def test_onnxrt_tfidf_vectorizer_empty(self): inputi = numpy.array([[1, 1, 3, 3, 3, 7, 8, 6, 7, 5, 6, 8]]).astype(numpy.int64) output = numpy.array([[1., 1., 1.]]).astype(numpy.float32) ngram_counts = numpy.array([0, 0]).astype(numpy.int64) ngram_indexes = numpy.array([0, 1, 2]).astype(numpy.int64) pool_int64s = numpy.array([ # unigrams 5, 6, 7, 8, 6, 7 ]).astype(numpy.int64) # bigrams op = OnnxTfIdfVectorizer('tokens', op_version=TARGET_OPSET, mode='TF', min_gram_length=2, max_gram_length=2, max_skip_count=0, ngram_counts=ngram_counts, ngram_indexes=ngram_indexes, pool_int64s=pool_int64s, output_names=['out']) onx = op.to_onnx(inputs=[('tokens', Int64TensorType())], outputs=[('out', FloatTensorType())]) oinf = OnnxInference(onx) res = oinf.run({'tokens': inputi}) self.assertEqual(output.tolist(), res['out'].tolist()) @ignore_warnings(UserWarning) def test_onnxrt_python_count_vectorizer(self): corpus = numpy.array([ 'This is the first document.', 'This document is the second document.', 'And this is the third one.', 'Is this the first document?' ]) vect = CountVectorizer() vect.fit(corpus) exp = vect.transform(corpus) onx = to_onnx(vect, corpus, target_opset=TARGET_OPSET) oinf = OnnxInference(onx) got = oinf.run({'X': corpus}) self.assertEqualArray(exp.todense(), got['variable']) @unittest.skipIf(compare_module_version(sk2ver, '1.9.3') < 0, reason="fails on that example") @ignore_warnings(UserWarning) def test_multi_output_classifier(self): dfx = pandas.DataFrame({ 'CAT1': ['985332', '985333', '985334', '985335', '985336'], 'CAT2': ['1985332', '1985333', '1985334', '1985335', '1985336'], 'TEXT': ["abc abc", "abc def", "def ghj", "abcdef", "abc ii"] }) dfy = pandas.DataFrame({ 'REAL': [5, 6, 7, 6, 5], 'CATY': [0, 1, 0, 1, 0] }) cat_features = ['CAT1', 'CAT2'] categorical_transformer = OneHotEncoder(handle_unknown='ignore') textual_feature = 'TEXT' count_vect_transformer = Pipeline(steps=[( 'count_vect', CountVectorizer(max_df=0.8, min_df=0.05, max_features=1000))]) preprocessor = ColumnTransformer(transformers=[( 'cat_transform', categorical_transformer, cat_features ), ('count_vector', count_vect_transformer, textual_feature)]) model_RF = RandomForestClassifier(random_state=42, max_depth=50) rf_clf = Pipeline( steps=[('preprocessor', preprocessor ), ('classifier', MultiOutputClassifier(estimator=model_RF))]) rf_clf.fit(dfx, dfy) expected_label = rf_clf.predict(dfx) expected_proba = rf_clf.predict_proba(dfx) inputs = { 'CAT1': dfx['CAT1'].values.reshape((-1, 1)), 'CAT2': dfx['CAT2'].values.reshape((-1, 1)), 'TEXT': dfx['TEXT'].values.reshape((-1, 1)) } onx = to_onnx(rf_clf, dfx, target_opset=TARGET_OPSET) sess = OnnxInference(onx) got = sess.run(inputs) self.assertEqualArray(expected_label, got['label']) self.assertEqual(len(expected_proba), len(got['probabilities'])) for e, g in zip(expected_proba, got['probabilities']): self.assertEqualArray(e, g, decimal=5) def test_onnxrt_category_mapper_intstr(self): op = OnnxCategoryMapper('cat', op_version=TARGET_OPSET, cats_int64s=[1, 2], cats_strings=["cat1", "cat2"], output_names=['out']) onx = op.to_onnx(inputs=[('cat', Int64TensorType())], outputs=[('out', StringTensorType())]) oinf = OnnxInference(onx) res = oinf.run({'cat': numpy.array([1, 2, 1, 5], dtype=numpy.int64)}) self.assertEqual(res['out'].tolist(), ["cat1", "cat2", "cat1", ""]) def test_onnxrt_category_mapper_strint(self): op = OnnxCategoryMapper('cat', op_version=TARGET_OPSET, cats_int64s=[1, 2], cats_strings=["cat1", "cat2"], output_names=['out']) onx = op.to_onnx(inputs=[('cat', StringTensorType())], outputs=[('out', Int64TensorType())]) oinf = OnnxInference(onx) res = oinf.run({ 'cat': numpy.array(["cat1", "cat2", "cat1", "R"], dtype=numpy.str_) }) self.assertEqualArray(res['out'], numpy.array([1, 2, 1, -1], dtype=numpy.int64))
def test_create_asv_benchmark_logreg(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") self.assertNotEmpty(mlprodict) temp = get_temp_folder(__file__, "temp_create_asv_benchmark_logreg") created = create_asv_benchmark(location=temp, verbose=3, fLOG=fLOG, runtime=('scikit-learn', 'python', 'onnxruntime1'), exc=False, execute=True, models={'LogisticRegression'}) if len(created) < 6: raise AssertionError( "Number of created files is too small.\n{}".format("\n".join( sorted(created)))) reg = re.compile("class ([a-zA-Z0-9_]+)[(]") verif = 0 allnames = [] for path, _, files in os.walk(os.path.join(temp, 'benches')): for zoo in files: if '__init__' in zoo: continue fLOG("process '{}'".format(zoo)) fullname = os.path.join(path, zoo) with open(fullname, 'r', encoding='utf-8') as f: content = f.read() names = reg.findall(content) name = names[0] content += "\n\ncl = %s()\ncl.setup_cache()\n" % name allnames.append(fullname) with open(fullname, 'w', encoding='utf-8') as f: f.write(content) __, err = run_script(fullname, wait=True) lines = [_ for _ in err.split('\n') if _ and _[0] != ' '] lines = [_ for _ in lines if "Warning" not in _] lines = [ _ for _ in lines if "No module named 'mlprodict'" not in _ ] lines = [_ for _ in lines if "Traceback " not in _] err = "\n".join(lines).strip(' \n\r') if len(err) > 0: raise RuntimeError("Issue with '{}'\n{}".format( fullname, err)) if (zoo.endswith( "bench_LogReg_liblinear_m_cl_solverliblinear.py") and compare_module_version(sklearn.__version__, "0.21") >= 0): if "{LogisticRegression: {'zipmap': False}}" in content: raise AssertionError(content) elif "'nozipmap'" not in content: raise AssertionError(content) if 'predict_proba' not in content: raise AssertionError(content) verif += 1 if (zoo.endswith( "bench_LogReg_liblinear_dec_b_cl_dec_solverliblinear.py" ) and compare_module_version(sklearn.__version__, "0.21") >= 0): if "{LogisticRegression: {'raw_scores': True}}" in content: raise AssertionError(content) elif "'raw_scores'" not in content: raise AssertionError(content) if 'decision_function' not in content: raise AssertionError(content) verif += 1 if not verif: raise AssertionError("Visited files\n{}".format( "\n".join(allnames)))
class TestOnnxPipeline(ExtTestCase): def test_pipeline_iris(self): iris = load_iris() X, y = iris.data, iris.target pipe = OnnxPipeline([('pca', PCA(n_components=2)), ('no', StandardScaler()), ('lr', LogisticRegression())], enforce_float32=True, op_version=TARGET_OPSET) pipe.fit(X, y) pipe.fit(X, y) self.assertTrue(hasattr(pipe, 'raw_steps_')) self.assertEqual(len(pipe.steps), 3) self.assertEqual(len(pipe.raw_steps_), 3) self.assertIsInstance(pipe.steps[0][1], OnnxTransformer) self.assertIsInstance(pipe.steps[1][1], OnnxTransformer) X = X.astype(numpy.float32) model_def = to_onnx(pipe, X[:1], target_opset=pipe.op_version, options={id(pipe): { 'zipmap': False }}) sess = OnnxInference(model_def) res = sess.run({'X': X}) self.assertEqualArray(res["label"], pipe.predict(X)) self.assertEqualArray(res["probabilities"], pipe.predict_proba(X)) def test_pipeline_none_params(self): model_onx = OnnxPipeline([('scaler', StandardScaler()), ('dt', DecisionTreeRegressor(max_depth=2))]) self.assertNotEmpty(model_onx) def test_pipeline_iris_enforce_false(self): iris = load_iris() X, y = iris.data, iris.target pipe = OnnxPipeline([('pca', PCA(n_components=2)), ('no', StandardScaler()), ('lr', LogisticRegression())], enforce_float32=False, op_version=TARGET_OPSET) pipe.fit(X, y) pipe.fit(X, y) self.assertTrue(hasattr(pipe, 'raw_steps_')) self.assertEqual(len(pipe.steps), 3) self.assertEqual(len(pipe.raw_steps_), 3) self.assertIsInstance(pipe.steps[0][1], OnnxTransformer) self.assertIsInstance(pipe.steps[1][1], OnnxTransformer) X = X.astype(numpy.float64) model_def = to_onnx(pipe, X[:1], target_opset=pipe.op_version, options={id(pipe): { 'zipmap': False }}) sess = OnnxInference(model_def) res = sess.run({'X': X}) self.assertEqualArray(res["label"], pipe.predict(X)) self.assertEqualArray(res["probabilities"], pipe.predict_proba(X)) self.assertRaise(lambda: sess.run({'X': X.astype(numpy.float32)}), RuntimeError) self.assertRaise(lambda: sess.run({'X': X.reshape((2, -1, 4))}), (ValueError, IndexError)) self.assertRaise( lambda: sess.run({ 'X': X.astype(numpy.float64), 'Y': X.astype(numpy.float64) }), KeyError) @unittest.skipIf(compare_module_version(s2_ver, '1.9.3') < 0, reason="skl2onnx too old") def test_transfer_transformer(self): _register_converters_mlinsights(True) iris = load_iris() X, y = iris.data, iris.target pipe = TransferTransformer(StandardScaler(), trainable=True) pipe.fit(X, y) model_def = to_onnx(pipe, X[:1].astype(numpy.float32)) sess = OnnxInference(model_def) res = sess.run({'X': X.astype(numpy.float32)}) exp = pipe.transform(X.astype(numpy.float32)) self.assertEqualArray(exp, res['variable'], decimal=5) @unittest.skipIf(compare_module_version(s2_ver, '1.9.3') < 0, reason="skl2onnx too old") def test_transfer_logistic_regression(self): _register_converters_mlinsights(True) iris = load_iris() X, y = iris.data, iris.target pipe = TransferTransformer(LogisticRegression(solver='liblinear'), trainable=True) pipe.fit(X, y) model_def = to_onnx(pipe, X[:1]) sess = OnnxInference(model_def) res = sess.run({'X': X}) exp = pipe.transform(X) self.assertEqualArray(exp, res['probabilities'], decimal=5) @unittest.skipIf(compare_module_version(s2_ver, '1.9.3') < 0, reason="skl2onnx too old") def test_pipeline_pickable(self): _register_converters_mlinsights(True) iris = load_iris() X, y = iris.data, iris.target pipe = OnnxPipeline( [('gm', TransferTransformer(StandardScaler(), trainable=True)), ('lr', LogisticRegression())], enforce_float32=True, op_version=TARGET_OPSET) pipe.fit(X, y) pipe.fit(X, y) self.assertTrue(hasattr(pipe, 'raw_steps_')) self.assertEqual(len(pipe.steps), 2) self.assertEqual(len(pipe.raw_steps_), 2) self.assertIsInstance(pipe.steps[0][1], OnnxTransformer) X = X.astype(numpy.float32) model_def = to_onnx(pipe, X[:1], target_opset=pipe.op_version, options={id(pipe): { 'zipmap': False }}) sess = OnnxInference(model_def) res = sess.run({'X': X}) self.assertEqual(list(sorted(res)), ['label', 'probabilities']) self.assertEqualArray(res["label"], pipe.predict(X)) self.assertEqualArray(res["probabilities"], pipe.predict_proba(X)) @unittest.skipIf(compare_module_version(s2_ver, '1.9.3') < 0, reason="skl2onnx too old") @ignore_warnings(warns=FutureWarning) def test_pipeline_pickable_options(self): _register_converters_mlinsights(True) iris = load_iris() X, y = iris.data, iris.target pipe = OnnxPipeline([('gm', TransferTransformer(GaussianMixture( n_components=5, random_state=2), trainable=True, method='predict_proba')), ('lr', LogisticRegression(random_state=2))], enforce_float32=True, op_version=TARGET_OPSET, options={ 'gm__score_samples': True, 'lr__zipmap': False }) pipe.fit(X, y) pipe.fit(X, y) self.assertTrue(hasattr(pipe, 'raw_steps_')) self.assertEqual(len(pipe.steps), 2) self.assertEqual(len(pipe.raw_steps_), 2) self.assertIsInstance(pipe.steps[0][1], OnnxTransformer) X = X.astype(numpy.float32) model_def = to_onnx(pipe, X[:1], target_opset=pipe.op_version, options={id(pipe): { 'zipmap': False }}) sess = OnnxInference(model_def, runtime="python_compiled") self.assertIn("'probabilities': probabilities,", str(sess)) sess = InferenceSession(model_def.SerializeToString()) r = sess.run(None, {'X': X}) self.assertEqual(len(r), 2) sess = OnnxInference(model_def) res = sess.run({'X': X}) self.assertEqual(list(sorted(res)), ['label', 'probabilities']) self.assertEqualArray(res["probabilities"], pipe.predict_proba(X)) self.assertEqualArray(res["label"], pipe.predict(X)) def test_pipeline_iris_column_transformer(self): iris = load_iris() X, y = iris.data, iris.target pipe = OnnxPipeline( [('col', ColumnTransformer([('pca', PCA(n_components=2), [0, 1]), ('no', StandardScaler(), [2]), ('pass', 'passthrough', [3])])), ('lr', LogisticRegression())], enforce_float32=True, op_version=TARGET_OPSET) pipe.fit(X, y) pipe.fit(X, y) self.assertTrue(hasattr(pipe, 'raw_steps_')) self.assertEqual(len(pipe.steps), 2) self.assertEqual(len(pipe.raw_steps_), 2) self.assertIsInstance(pipe.steps[0][1], OnnxTransformer) self.assertIsInstance(pipe.steps[1][1], LogisticRegression) X = X.astype(numpy.float32) model_def = to_onnx(pipe, X[:1], target_opset=pipe.op_version, options={id(pipe): { 'zipmap': False }}) sess = OnnxInference(model_def) res = sess.run({'X': X}) self.assertEqualArray(res["label"], pipe.predict(X)) self.assertEqualArray(res["probabilities"], pipe.predict_proba(X), decimal=5) def test_pipeline_iris_column_transformer_nocache(self): class MyMemory: def __init__(self): pass def cache(self, obj): return obj iris = load_iris() X, y = iris.data, iris.target pipe = OnnxPipeline( [('col', ColumnTransformer([('pca', PCA(n_components=2), [0, 1]), ('no', StandardScaler(), [2]), ('pass', 'passthrough', [3])])), ('lr', LogisticRegression())], enforce_float32=True, op_version=TARGET_OPSET, memory=MyMemory()) pipe.fit(X, y) pipe.fit(X, y) self.assertTrue(hasattr(pipe, 'raw_steps_')) self.assertEqual(len(pipe.steps), 2) self.assertEqual(len(pipe.raw_steps_), 2) self.assertIsInstance(pipe.steps[0][1], OnnxTransformer) self.assertIsInstance(pipe.steps[1][1], LogisticRegression) X = X.astype(numpy.float32) model_def = to_onnx(pipe, X[:1], target_opset=pipe.op_version, options={id(pipe): { 'zipmap': False }}) sess = OnnxInference(model_def) res = sess.run({'X': X}) self.assertEqualArray(res["label"], pipe.predict(X)) self.assertEqualArray(res["probabilities"], pipe.predict_proba(X), decimal=5)
class TestOnnxrtSideBySide(ExtTestCase): def setUp(self): logger = getLogger('skl2onnx') logger.disabled = True @unittest.skipIf(convert_kernel is None, reason="not enough recent version") def test_kernel_ker12_def(self): ker = (Sum(CK(0.1, (1e-3, 1e3)), CK(0.1, (1e-3, 1e3)) * RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3)))) onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=numpy.float32, op_version=get_opset_number_from_onnx()) model_onnx = onx.to_onnx( inputs=[('X', FloatTensorType([None, None]))], outputs=[('Y', FloatTensorType([None, None]))], target_opset=get_opset_number_from_onnx()) sess = OnnxInference(model_onnx.SerializeToString()) res = sess.run({'X': Xtest_.astype(numpy.float32)}) m1 = res['Y'] m2 = ker(Xtest_) self.assertEqualArray(m1, m2) @unittest.skipIf(convert_kernel is None, reason="not enough recent version") def test_kernel_ker2_def(self): ker = Sum( CK(0.1, (1e-3, 1e3)) * RBF(length_scale=10, length_scale_bounds=(1e-3, 1e3)), CK(0.1, (1e-3, 1e3)) * RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3)) ) onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=numpy.float32, op_version=get_opset_number_from_onnx()) model_onnx = onx.to_onnx( inputs=[('X', FloatTensorType([None, None]))], outputs=[('Y', FloatTensorType([None, None]))], target_opset=get_opset_number_from_onnx()) sess = OnnxInference(model_onnx.SerializeToString()) res = sess.run({'X': Xtest_.astype(numpy.float32)}) m1 = res['Y'] m2 = ker(Xtest_) self.assertEqualArray(m1, m2) res = sess.run({'X': Xtest_.astype(numpy.float32)}, intermediate=True) self.assertGreater(len(res), 30) self.assertIsInstance(res, dict) @unittest.skipIf(convert_kernel is None, reason="not enough recent version") @unittest.skipIf(compare_module_version(ort_version, threshold) <= 0, reason="Node:Scan1 Field 'shape' of type is required but missing.") def test_kernel_ker2_def_ort(self): ker = Sum( CK(0.1, (1e-3, 1e3)) * RBF(length_scale=10, length_scale_bounds=(1e-3, 1e3)), CK(0.1, (1e-3, 1e3)) * RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3))) onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=numpy.float32, op_version=get_opset_number_from_onnx()) model_onnx = onx.to_onnx( inputs=[('X', FloatTensorType([None, None]))], outputs=[('Y', FloatTensorType([None, None]))], target_opset=get_opset_number_from_onnx()) model_onnx.ir_version = get_ir_version_from_onnx() sess = _capture_output( lambda: OnnxInference(model_onnx.SerializeToString(), runtime="onnxruntime2"), 'c')[0] try: res = sess.run({'X': Xtest_.astype(numpy.float32)}) except RuntimeError as e: if "Got invalid dimensions for input" in str(e): # probable bug somewhere return raise e m1 = res['Y'] m2 = ker(Xtest_) self.assertEqualArray(m1, m2, decimal=5) @unittest.skipIf(convert_kernel is None, reason="not enough recent version") @unittest.skipIf(compare_module_version(ort_version, threshold) <= 0, reason="Node:Scan1 Field 'shape' of type is required but missing.") def test_kernel_ker2_def_ort1(self): ker = Sum( CK(0.1, (1e-3, 1e3)) * RBF(length_scale=10, length_scale_bounds=(1e-3, 1e3)), CK(0.1, (1e-3, 1e3)) * RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3)) ) onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=numpy.float32, op_version=get_opset_number_from_onnx()) model_onnx = onx.to_onnx( inputs=[('X', FloatTensorType([None, None]))], outputs=[('Y', FloatTensorType([None, None]))], target_opset=get_opset_number_from_onnx()) model_onnx.ir_version = get_ir_version_from_onnx() sess = OnnxInference(model_onnx.SerializeToString(), runtime="onnxruntime1") rows = [] def myprint(*args, **kwargs): rows.append(" ".join(map(str, args))) res = _capture_output( lambda: sess.run({'X': Xtest_.astype(numpy.float32)}, intermediate=True, verbose=1, fLOG=myprint), 'c')[0] self.assertGreater(len(rows), 2) m1 = res['Y'] self.assertNotEmpty(m1) self.assertGreater(len(res), 2) # m2 = ker(Xtest_) # self.assertEqualArray(m1, m2, decimal=5) cpu = OnnxInference(model_onnx.SerializeToString()) sbs = side_by_side_by_values( [cpu, sess], inputs={'X': Xtest_.astype(numpy.float32)}) self.assertGreater(len(sbs), 2) self.assertIsInstance(sbs, list) self.assertIsInstance(sbs[0], dict) self.assertIn('step', sbs[0]) self.assertIn('step', sbs[1]) self.assertIn('metric', sbs[0]) self.assertIn('metric', sbs[1]) self.assertIn('cmp', sbs[0]) self.assertIn('cmp', sbs[1]) sess3 = _capture_output( lambda: OnnxInference(model_onnx.SerializeToString(), runtime="onnxruntime2"), 'c')[0] try: sbs = side_by_side_by_values( [cpu, sess, sess3], inputs={'X': Xtest_.astype(numpy.float32)}) except RuntimeError as e: if "Got invalid dimensions for input" in str(e): # probable bug somewhere return raise e self.assertNotEmpty(sbs) inputs = {'X': Xtest_.astype(numpy.float32)} sbs = side_by_side_by_values( [(cpu, inputs), (sess, inputs), (sess3, inputs)]) self.assertNotEmpty(sbs) def test_merge_results(self): res1 = [('AA', [0, 0]), ('BB', [1, 1])] res2 = [('AA', [2, 2]), ('BB', [3, 3])] res = merge_results([res1, res2]) exp = [('AA', [[0, 0], [2, 2]]), ('BB', [[1, 1], [3, 3]])] self.assertEqual(exp, res) res1 = [('AA', [0, 0]), ('BB', [1, 1]), ('CC', [10, 10])] res2 = [('AA', [2, 2]), ('BB', [3, 3])] res = merge_results([res1, res2]) exp = [('AA', [[0, 0], [2, 2]]), ('BB', [[1, 1], [3, 3]]), ('CC', [[10, 10], None])] self.assertEqual(exp, res) res1 = [('AA', [0, 0]), ('BB', [1, 1])] res2 = [('AA', [2, 2]), ('BB', [3, 3]), ('CC', [10, 10])] res = merge_results([res1, res2]) exp = [('AA', [[0, 0], [2, 2]]), ('BB', [[1, 1], [3, 3]]), ('CC', [None, [10, 10]])] self.assertEqual(exp, res) res1 = [('AA', [0, 0]), ('CC', [10, 10]), ('BB', [1, 1])] res2 = [('AA', [2, 2]), ('BB', [3, 3])] res = merge_results([res1, res2]) exp = [('AA', [[0, 0], [2, 2]]), ('CC', [[10, 10], None]), ('BB', [[1, 1], [3, 3]])] self.assertEqual(exp, res) res1 = [('AA', [0, 0]), ('BB', [1, 1])] res2 = [('AA', [2, 2]), ('CC', [10, 10]), ('BB', [3, 3])] res = merge_results([res1, res2]) exp = [('AA', [[0, 0], [2, 2]]), ('CC', [None, [10, 10]]), ('BB', [[1, 1], [3, 3]])] self.assertEqual(exp, res)
class TestOnnxrtSideBySide(ExtTestCase): def setUp(self): logger = getLogger('skl2onnx') logger.disabled = True @unittest.skipIf(convert_kernel is None, reason="not enough recent version") def test_kernel_ker12_def(self): ker = (Sum(CK(0.1, (1e-3, 1e3)), CK(0.1, (1e-3, 1e3)) * RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3)))) onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=numpy.float32) model_onnx = onx.to_onnx( inputs=[('X', FloatTensorType([None, None]))], outputs=[('Y', FloatTensorType([None, None]))]) sess = OnnxInference(model_onnx.SerializeToString()) res = sess.run({'X': Xtest_.astype(numpy.float32)}) m1 = res['Y'] m2 = ker(Xtest_) self.assertEqualArray(m1, m2) @unittest.skipIf(convert_kernel is None, reason="not enough recent version") def test_kernel_ker2_def(self): ker = Sum( CK(0.1, (1e-3, 1e3)) * RBF(length_scale=10, length_scale_bounds=(1e-3, 1e3)), CK(0.1, (1e-3, 1e3)) * RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3)) ) onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=numpy.float32) model_onnx = onx.to_onnx( inputs=[('X', FloatTensorType([None, None]))], outputs=[('Y', FloatTensorType([None, None]))]) sess = OnnxInference(model_onnx.SerializeToString()) res = sess.run({'X': Xtest_.astype(numpy.float32)}) m1 = res['Y'] m2 = ker(Xtest_) self.assertEqualArray(m1, m2) res = sess.run({'X': Xtest_.astype(numpy.float32)}, intermediate=True) self.assertGreater(len(res), 30) self.assertIsInstance(res, OrderedDict) @unittest.skipIf(convert_kernel is None, reason="not enough recent version") @unittest.skipIf(compare_module_version(ort_version, threshold) <= 0, reason="Node:Scan1 Field 'shape' of type is required but missing.") def test_kernel_ker2_def_ort(self): ker = Sum( CK(0.1, (1e-3, 1e3)) * RBF(length_scale=10, length_scale_bounds=(1e-3, 1e3)), CK(0.1, (1e-3, 1e3)) * RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3)) ) onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=numpy.float32) model_onnx = onx.to_onnx( inputs=[('X', FloatTensorType([None, None]))], outputs=[('Y', FloatTensorType([None, None]))]) sess = OnnxInference(model_onnx.SerializeToString(), runtime="onnxruntime2") res = sess.run({'X': Xtest_.astype(numpy.float32)}) m1 = res['Y'] m2 = ker(Xtest_) self.assertEqualArray(m1, m2, decimal=5) @unittest.skipIf(convert_kernel is None, reason="not enough recent version") @unittest.skipIf(compare_module_version(ort_version, threshold) <= 0, reason="Node:Scan1 Field 'shape' of type is required but missing.") def test_kernel_ker2_def_ort1(self): ker = Sum( CK(0.1, (1e-3, 1e3)) * RBF(length_scale=10, length_scale_bounds=(1e-3, 1e3)), CK(0.1, (1e-3, 1e3)) * RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3)) ) onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=numpy.float32) model_onnx = onx.to_onnx( inputs=[('X', FloatTensorType([None, None]))], outputs=[('Y', FloatTensorType([None, None]))]) sess = OnnxInference(model_onnx.SerializeToString(), runtime="onnxruntime1") rows = [] def myprint(*args, **kwargs): rows.append(" ".join(map(str, args))) res = sess.run({'X': Xtest_.astype(numpy.float32)}, intermediate=True, verbose=1, fLOG=myprint) self.assertGreater(len(rows), 2) m1 = res['Y'] self.assertNotEmpty(m1) self.assertGreater(len(res), 2) # m2 = ker(Xtest_) # self.assertEqualArray(m1, m2, decimal=5) cpu = OnnxInference(model_onnx.SerializeToString()) sbs = side_by_side_by_values( [cpu, sess], inputs={'X': Xtest_.astype(numpy.float32)}) self.assertGreater(len(sbs), 2) self.assertIsInstance(sbs, list) self.assertIsInstance(sbs[0], dict) self.assertIn('step', sbs[0]) self.assertIn('step', sbs[1]) self.assertIn('metric', sbs[0]) self.assertIn('metric', sbs[1]) self.assertIn('cmp', sbs[0]) self.assertIn('cmp', sbs[1]) sess3 = OnnxInference(model_onnx.SerializeToString(), runtime="onnxruntime2") sbs = side_by_side_by_values( [cpu, sess, sess3], inputs={'X': Xtest_.astype(numpy.float32)}) self.assertNotEmpty(sbs) inputs = {'X': Xtest_.astype(numpy.float32)} sbs = side_by_side_by_values( [(cpu, inputs), (sess, inputs), (sess3, inputs)]) self.assertNotEmpty(sbs)