def convert_model_to_onnx(model: Any): """ Helper function to convert a ML model to onnx format """ if isinstance(model, model_classes_keras): return onnxmltools.convert_keras(model) if isinstance(model, model_classes_sklearn): return onnxmltools.convert_sklearn(model) if 'xgboost' in model.__repr__(): return onnxmltools.convert_sklearn(model) if isinstance(model, model_classes_scipy): raise Exception("Pytorch models not yet supported to onnx") else: raise Exception( "Attempt to convert unsupported model to onnx: {model}")
def sk_model(sk_file_path): sk_load_model = pickle.load(sk_file_path) sk2onnx = onnxmltools.convert_sklearn(sk_load_model) # Save as protobuf sk2onnx = onnxmltools.utils.save_model(sk2onnx, '.path/to/save/onnx/model.onnx') return coreml2onnx
def _test_binarizer_converter(self, threshold): warnings.filterwarnings("ignore") X = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=np.float32) # Create SKL model for testing model = Binarizer(threshold=threshold) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs)[0] # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) return onnx_ml_pred, onnx_pred
def test_label_encoder_converter(self): model = LabelEncoder() model.fit(['str3', 'str2', 'str0', 'str1', 'str3']) model_onnx = convert_sklearn(model, 'scikit-learn label encoder', [('input', StringTensorType([1, 1]))]) self.assertTrue(model_onnx.graph.node is not None)
def test_model_label_encoder_str_onnxml(self): model = LabelEncoder() data = [ "paris", "milan", "amsterdam", "tokyo", ] model.fit(data) onnx_ml_model = convert_sklearn(model, initial_types=[ ("input", StringTensorType_onnx([4])) ]) onnx_model = convert(onnx_ml_model, "onnx", data) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: data} onnx_ml_pred = session.run(output_names, inputs) # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(data) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred[0], onnx_pred, rtol=1e-06, atol=1e-06)
def _test_single_output_core(self, model): X = [[0, 1], [1, 1], [2, 0]] y = [100, -10, 50] model.fit(X, y) model_onnx = convert_sklearn(model, 'tree-based regressor', [('input', Int64TensorType([1, 2]))]) self.assertTrue(model_onnx is not None)
def save_sklearn(model, path: str, initial_types=None, prototype=None, shape=None, dtype=None): """ Convert a scikit-learn model to onnx first and then save it to disk using `save_onnx`. We use onnxmltool to do the conversion from scikit-learn to ONNX and currently not all the scikit-learn models are supported by onnxmltools. A list of supported models can be found in the documentation. :param model: Scikit-learn model :param path: Path to which the object will be serialized :param initial_types: a python list. Each element is a tuple of a variable name and a type defined in onnxconverter_common.data_types. If initial type is empty, we'll guess the required information from prototype or infer it by using shape and dtype. :param prototype: A numpy array that gives shape and type information. This is ignored if initial_types is not None :param shape: Shape of the input to the model. Ignored if initial_types or prototype is not None :param dtype: redisai.DType object which represents the type of the input to the model. Ignored if initial_types or prototype is not None """ if not utils.is_installed(['onnxmltools', 'skl2onnx', 'pandas']): raise RuntimeError('Please install onnxmltools, skl2onnx & pandas to use this feature.') from onnxmltools import convert_sklearn if initial_types is None: initial_types = [utils.guess_onnx_tensortype(prototype, shape, dtype)] if not isinstance(initial_types, list): raise TypeError(( "`initial_types` has to be a list. " "If you have only one initial_type, put that into a list")) serialized = convert_sklearn(model, initial_types=initial_types) save_onnx(serialized, path)
def _test_scaler_converter(self, model): warnings.filterwarnings("ignore") X = np.array([[0.0, 0.0, 3.0], [1.0, -1.0, 0.0], [0.0, 2.0, 1.0], [1.0, 0.0, -2.0]], dtype=np.float32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn( model, initial_types=[("float_input", FloatTensorType([None, X.shape[1]]))]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs)[0] # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) return onnx_ml_pred, onnx_pred
def _test_binary_classification_core(self, model): X = [[0, 1], [1, 1], [2, 0]] y = ['A', 'B', 'A'] model.fit(X, y) model_onnx = convert_sklearn(model, 'tree-based binary classifier', [('input', Int64TensorType([1, 2]))]) self.assertTrue(model_onnx is not None)
def test_one_hot_encoder_onnx_int(self, rtol=1e-06, atol=1e-06): model = OneHotEncoder() X = np.array([[1, 2, 3]], dtype=np.int32) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("int_input", IntTensorType_onnx(X.shape)) ]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = session.run(output_names, inputs) # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X) # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol)
def test_model_one_hot_encoder(self): # categorical_features will be removed in 0.22 (this test will fail by then). model = OneHotEncoder() model.fit([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]]) model_onnx = convert_sklearn(model, 'scikit-learn one-hot encoder', [('input', Int64TensorType([1, 3]))]) self.assertTrue(model_onnx is not None)
def onnx_sklearn_model(sklearn_model): import onnxmltools from skl2onnx.common.data_types import FloatTensorType initial_type = [('float_input', FloatTensorType([None, 4]))] onx = onnxmltools.convert_sklearn(sklearn_model, initial_types=initial_type) return onx
def _test_one_class_classification_core(self, model): X = [[0., 1.], [1., 1.], [2., 0.]] y = [1, 1, 1] model.fit(X, y) model_onnx = convert_sklearn(model, 'tree-based classifier', [('input', FloatTensorType([1, 2]))]) self.assertTrue(model_onnx is not None)
def from_sklearn( model, inputs: Iterable[IOShape], opset: int = DEFAULT_OPSET, ): initial_type = ONNXConverter.convert_initial_type(inputs) return onnxmltools.convert_sklearn(model, initial_types=initial_type, target_opset=opset)
def test_model_label_encoder_int_onnxml(self): model = LabelEncoder() X = np.array([1, 4, 5, 2, 0, 2], dtype=np.int64) model.fit(X) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("input", LongTensorType_onnx(X.shape)) ]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] inputs = {session.get_inputs()[0].name: X} onnx_ml_pred = np.array(session.run(output_names, inputs)).ravel() # Get the predictions for the ONNX model onnx_pred = onnx_model.transform(X).ravel() # Check that predicted values match np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=1e-06, atol=1e-06)
def onnx_model(): from skl2onnx.common.data_types import FloatTensorType initial_type = [('float_input', FloatTensorType([None, X.shape[1]]))] clf = onnxmltools.convert_sklearn(skl_model, 'iris model', initial_types=initial_type) onnx.save(clf, 'iris_bdt.onnx') return clf, 'iris_bdt.onnx'
def test_robust_scaler_floats_no_scaling(self): model = RobustScaler(with_scaling=False) data = [[0., 0., 3.], [1., 1., 0.], [0., 2., 1.], [1., 0., 2.]] model.fit(data) model_onnx = convert_sklearn(model, 'scaler', [('input', FloatTensorType([1, 3]))]) self.assertTrue(model_onnx is not None) dump_data_and_model(numpy.array(data, dtype=numpy.float32), model, basename="SklearnRobustScalerNoScalingFloat32")
def test_max_abs_scaler(self): model = MaxAbsScaler() data = [[0., 0., 3.], [1., 1., 0.], [0., 2., 1.], [1., 0., 2.]] model.fit(data) model_onnx = convert_sklearn(model, 'scaler', [('input', FloatTensorType([1, 3]))]) self.assertTrue(model_onnx is not None) dump_data_and_model(numpy.array(data, dtype=numpy.float32), model, basename="SklearnMaxAbsScaler")
def test_standard_scaler(self): model = StandardScaler() data = [[0, 0, 3], [1, 1, 0], [0, 2, 1], [1, 0, 2]] model.fit(data) model_onnx = convert_sklearn(model, 'scaler', [('input', Int64TensorType([1, 3]))]) self.assertTrue(model_onnx is not None) dump_data_and_model(numpy.array(data, dtype=numpy.int64), model, model_onnx, basename="SklearnStandardScalerInt64")
def save(self): # Onnx Save (can't save a list of model for now) onx = convert_sklearn(self.pipe, 'Pipe', [('input', StringTensorType([1, 1]))]) save_model(onx, "Model.onnx") print ("Model saved")
def test_one_hot_encoder_mixed_float_int(self): # categorical_features will be removed in 0.22 (this test will fail by then). model = OneHotEncoder() model.fit([[0.4, 0.2, 3], [1.4, 1.2, 0], [0.2, 2.2, 1]]) model_onnx = convert_sklearn(model, 'one-hot encoder mixed-type inputs', [('input1', FloatTensorType([1, 2])), ('input2', Int64TensorType([1, 1]))]) self.assertTrue(model_onnx is not None)
def test_model_binarizer(self): model = Binarizer(threshold=0.5) model_onnx = convert_sklearn(model, 'scikit-learn binarizer', [('input', FloatTensorType([1, 1]))]) self.assertTrue(model_onnx is not None) dump_data_and_model(numpy.array([[1, 1]], dtype=numpy.float32), model, model_onnx, basename="SklearnBinarizer-SkipDim1")
def _test_linear(self, classes): """ This helper function tests conversion of `ai.onnx.ml.LinearClassifier` which is created from a scikit-learn LogisticRegression. This tests `convert_onnx_linear_model` in `hummingbird.ml.operator_converters.onnxml_linear` """ n_features = 20 n_total = 100 np.random.seed(0) warnings.filterwarnings("ignore") X = np.random.rand(n_total, n_features) X = np.array(X, dtype=np.float32) y = np.random.randint(classes, size=n_total) # Create SKL model for testing model = LogisticRegression(solver="liblinear", multi_class="ovr", fit_intercept=True) model.fit(X, y) # Create ONNX-ML model onnx_ml_model = convert_sklearn(model, initial_types=[ ("float_input", FloatTensorType_onnx(X.shape)) ]) # Create ONNX model by calling converter onnx_model = convert(onnx_ml_model, "onnx", X) # Get the predictions for the ONNX-ML model session = ort.InferenceSession(onnx_ml_model.SerializeToString()) output_names = [ session.get_outputs()[i].name for i in range(len(session.get_outputs())) ] onnx_ml_pred = [[] for i in range(len(output_names))] inputs = {session.get_inputs()[0].name: X} pred = session.run(output_names, inputs) for i in range(len(output_names)): if output_names[i] == "output_label": onnx_ml_pred[1] = pred[i] else: onnx_ml_pred[0] = pred[i] # Get the predictions for the ONNX model session = ort.InferenceSession(onnx_model.SerializeToString()) onnx_pred = [[] for i in range(len(output_names))] pred = session.run(output_names, inputs) for i in range(len(output_names)): if output_names[i] == "output_label": onnx_pred[1] = pred[i] else: onnx_pred[0] = pred[i] return onnx_ml_pred, onnx_pred
def test_model_linear_svc_multi_class(self): model, X = self._fit_model_multiclass_classification(LinearSVC()) model_onnx = convert_sklearn(model, 'multi-class linear SVC', [('input', FloatTensorType([1, 3]))]) self.assertIsNotNone(model_onnx) dump_data_and_model(X.astype(numpy.float32), model, model_onnx, basename="SklearnLinearSVCMulti")
def test_truncated_svd(self): N, C, K = 2, 3, 2 x = create_tensor(N, C) svd = TruncatedSVD(n_components=K) svd.fit(x) model_onnx = onnxmltools.convert_sklearn( svd, initial_types=[('input', FloatTensorType(shape=[1, C]))]) self.assertTrue(model_onnx is not None) dump_data_and_model(x, svd, model_onnx, basename="SklearnTruncatedSVD")
def get_onnx_model(model_format, model, initial_types: list = None, final_types: list = None): if model_format == ModelFormat.KERAS: return onnxmltools.convert_keras(model) if model_format == ModelFormat.SK_LEARN: return onnxmltools.convert_sklearn(model, initial_types=initial_types) if model_format == ModelFormat.TENSORFLOW: return onnxmltools.convert_tensorflow(model)
def test_model_logistic_regression_multi_class(self): model, X = self._fit_model_multiclass_classification( linear_model.LogisticRegression()) model_onnx = convert_sklearn(model, 'maximum entropy classifier', [('input', FloatTensorType([1, 3]))]) self.assertIsNotNone(model_onnx) dump_data_and_model(X.astype(numpy.float32), model, model_onnx, basename="SklearnLogitisticRegressionMulti")
def test_model_logistic_regression_binary_class(self): model, X = self._fit_model_binary_classification( linear_model.LogisticRegression()) model_onnx = convert_sklearn(model, 'logistic regression', [('input', FloatTensorType([1, 3]))]) self.assertIsNotNone(model_onnx) dump_data_and_model(X.astype(numpy.float32), model, model_onnx, basename="SklearnLogitisticRegressionBinary")
def test_convert_svmr_linear_multi(self): model = self._fit_multi_classification(SVR(kernel='linear')) node = convert_sklearn(model, 'SVR', [('input', FloatTensorType([1, 1]))]).graph.node[0] self.assertIsNotNone(node) self._check_attributes(node, {'coefficients': None, 'kernel_params': None, 'kernel_type': 'LINEAR', 'post_transform': None, 'rho': None, 'support_vectors': None})
def test_convert_nusvmr_binary(self): model = self._fit_binary_classification(NuSVR()) node = convert_sklearn(model, 'SVR', [('input', FloatTensorType([1, 1]))]).graph.node[0] self.assertIsNotNone(node) self._check_attributes(node, {'coefficients': None, 'kernel_params': None, 'kernel_type': 'RBF', 'post_transform': None, 'rho': None, 'support_vectors': None})