def test_model_dict_vectorizer_sort_false(self): model = DictVectorizer(sparse=False, sort=False) data = [{1: 1.0, 2: 200.0}, {1: 3.0, 3: 1.0}] model.fit_transform(data) model_onnx = convert_sklearn( model, "dictionary vectorizer", [( "input", DictionaryType(Int64TensorType([1]), FloatTensorType([1])), )], ) self.assertTrue(model_onnx is not None) dump_data_and_model( data, model, model_onnx, basename="SklearnDictVectorizerSortFalse-OneOff-SkipDim1", allow_failure="StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.1.3') or " "StrictVersion(onnx.__version__)" " < StrictVersion('1.3.0')", )
def test_onnx_helper_load_save_init(self): model = make_pipeline( Binarizer(), OneHotEncoder(sparse=False, handle_unknown='ignore'), StandardScaler()) X = numpy.array([[0.1, 1.1], [0.2, 2.2], [0.4, 2.2], [0.2, 2.4]]) model.fit(X) model_onnx = convert_sklearn( model, 'pipe3', [('input', FloatTensorType([None, 2]))]) model_onnx.ir_version = get_ir_version(TARGET_OPSET) filename = "temp_onnx_helper_load_save.onnx" save_onnx_model(model_onnx, filename) model = load_onnx_model(filename) list(enumerate_model_node_outputs(model)) new_model = select_model_inputs_outputs(model, 'variable') self.assertTrue(new_model.graph is not None) # pylint: disable=E1101 tr1 = self.get_model(model) tr2 = self.get_model(new_model) X = X.astype(numpy.float32) X1 = tr1(X) X2 = tr2(X) self.assertEqual(X1.shape, (4, 2)) self.assertEqual(X2.shape, (4, 2))
def test_model_tfidf_vectorizer11_empty_string_case1(self): corpus = numpy.array([ 'This is the first document.', 'This document is the second document.', 'And this is the third one.', ' ', ]).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 1), norm=None) vect.fit(corpus[:3].ravel()) model_onnx = convert_sklearn(vect, 'TfidfVectorizer', [('input', StringTensorType([1]))], options=self.get_options(), target_opset=TARGET_OPSET) self.assertTrue(model_onnx is not None) # TfidfVectorizer in onnxruntime fails with empty strings, # which was fixed in version 0.3.0 afterward dump_data_and_model( corpus[2:], vect, model_onnx, basename="SklearnTfidfVectorizer11EmptyStringSepCase1-" "OneOff-SklCol", allow_failure="StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.4.0')")
def test_combine_inputs_floats_ints(self): from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline data = [[0, 0.], [0, 0.], [1, 1.], [1, 1.]] scaler = StandardScaler() scaler.fit(data) model = Pipeline([('scaler1', scaler), ('scaler2', scaler)]) model_onnx = convert_sklearn(model, 'pipeline', [('input1', Int64TensorType([1, 1])), ('input2', FloatTensorType([1, 1]))]) self.assertTrue(len(model_onnx.graph.node[-1].output) == 1) self.assertTrue(model_onnx is not None) data = numpy.array(data) data = { 'input1': data[:, 0].astype(numpy.int64), 'input2': data[:, 1].astype(numpy.float32) } dump_data_and_model(data, PipeConcatenateInput(model), model_onnx, basename="SklearnPipelineScalerMixed-OneOff")
def test_model_multi_class_nocl(self): model, X = fit_classification_model(SGDClassifier(loss='log', random_state=42), 2, label_string=True) model_onnx = convert_sklearn( model, "multi-class nocl", [("input", FloatTensorType([None, X.shape[1]]))], options={id(model): { 'nocl': True }}, target_opset=TARGET_OPSET) self.assertIsNotNone(model_onnx) sonx = str(model_onnx) assert 'classlabels_strings' not in sonx assert 'cl0' not in sonx dump_data_and_model(X[6:8], model, model_onnx, classes=model.classes_, basename="SklearnSGDMultiNoCl", verbose=False)
def test_simple_imputer_float_inputs(self): model = SimpleImputer(strategy="mean", fill_value="nan") data = [[1, 2], [np.nan, 3], [7, 6]] model.fit(data) model_onnx = convert_sklearn(model, "scikit-learn simple imputer", [("input", FloatTensorType([None, 2]))], target_opset=TARGET_OPSET) self.assertTrue(model_onnx.graph.node is not None) # should contain only node self.assertEqual(len(model_onnx.graph.node), 1) # last node should contain the Imputer outputs = model_onnx.graph.output self.assertEqual(len(outputs), 1) self.assertEqual(outputs[0].type.tensor_type.shape.dim[-1].dim_value, 2) dump_data_and_model(np.array(data, dtype=np.float32), model, model_onnx, basename="SklearnSimpleImputerMeanFloat32")
def test_convert_svr_linear(self): model, X = self._fit_binary_classification(SVR(kernel="linear")) model_onnx = convert_sklearn( model, "SVR", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) nodes = model_onnx.graph.node self.assertIsNotNone(nodes) self._check_attributes( nodes[0], { "coefficients": None, "kernel_params": None, "kernel_type": "LINEAR", "post_transform": None, "rho": None, "support_vectors": None, }, ) dump_data_and_model(X, model, model_onnx, basename="SklearnRegSVRLinear-Dec3")
def test_model_multinomial_nb_multiclass_params(self): model, X = fit_classification_model( MultinomialNB(alpha=0.5, fit_prior=False), 5, pos_features=True) model_onnx = convert_sklearn( model, "multinomial naive bayes", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET ) self.assertIsNotNone(model_onnx) pp = model.predict_proba(X) col = pp.shape[1] pps = np.sort(pp, axis=1) diff = pps[:, col-1] - pps[:, col-2] ind = diff >= 1e-4 dump_data_and_model( X[ind], model, model_onnx, basename="SklearnMclMultinomialNBParams-Dec4", allow_failure="StrictVersion(onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
def test_imputer_float_inputs(self): model = Imputer(missing_values="NaN", strategy="mean", axis=0) data = [[1, 2], [np.nan, 3], [7, 6]] model.fit(data) model_onnx = convert_sklearn(model, "scikit-learn imputer", [("input", FloatTensorType([None, 2]))]) self.assertTrue(model_onnx.graph.node is not None) # should contain only node self.assertEqual(len(model_onnx.graph.node), 1) # last node should contain the Imputer outputs = model_onnx.graph.output self.assertEqual(len(outputs), 1) self.assertEqual(outputs[0].type.tensor_type.shape.dim[-1].dim_value, 2) dump_data_and_model( np.array(data, dtype=np.float32), model, model_onnx, basename="SklearnImputerMeanFloat32", )
def test_model_mlp_regressor_tanh(self): data = load_diabetes() X, y = data.data, data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) model = MLPRegressor(random_state=42, activation="tanh").fit( X_train, y_train) model_onnx = convert_sklearn( model, "scikit-learn MLPRegressor", [("input", FloatTensorType(X_test.shape))], ) self.assertTrue(model_onnx is not None) dump_data_and_model( X_test.astype(np.float32), model, model_onnx, basename="SklearnMLPRegressorTanhActivation-Dec4", allow_failure="StrictVersion(" "onnxruntime.__version__)<= StrictVersion('0.2.1')", )
def test_model_mlp_classifier_multiclass_identity(self): data = load_digits() X, y = data.data, data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) model = MLPClassifier(random_state=42, activation="identity").fit( X_train, y_train) model_onnx = convert_sklearn( model, "scikit-learn MLPClassifier", [("input", Int64TensorType(X_test.shape))], ) self.assertTrue(model_onnx is not None) dump_data_and_model( X_test.astype(np.int64), model, model_onnx, basename="SklearnMLPClassifierMultiClassIdentityActivation", allow_failure="StrictVersion(" "onnxruntime.__version__)<= StrictVersion('0.2.1')", )
def test_model_mlp_classifier_binary(self): data = load_iris() X, y = data.data, data.target y[y > 1] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) model = MLPClassifier(random_state=42).fit(X_train, y_train) model_onnx = convert_sklearn( model, "scikit-learn MLPClassifier", [("input", FloatTensorType(X_test.shape))], ) self.assertTrue(model_onnx is not None) dump_data_and_model( X_test.astype(np.float32), model, model_onnx, basename="SklearnMLPClassifierBinary", allow_failure="StrictVersion(" "onnxruntime.__version__)<= StrictVersion('0.2.1')", )
def test_model_count_vectorizer_wrong_ngram(self): corpus = numpy.array([ 'A AABBB0', 'AAABB B1', 'AA ABBB2', 'AAAB BB3', 'AAA BBB4', ]).reshape((5, 1)) vect = TfidfVectorizer(ngram_range=(1, 2), token_pattern=r"(?u)\b\w\w+\b") vect.fit(corpus.ravel()) model_onnx = convert_sklearn(vect, 'TfidfVectorizer', [('input', StringTensorType([1]))]) self.assertTrue(model_onnx is not None) dump_data_and_model( corpus, vect, model_onnx, basename="SklearnTfidfVectorizer12Wngram-OneOff-SklCol", allow_failure="StrictVersion(onnxruntime.__version__) <= " "StrictVersion('0.3.0')")
def test_model_sgd_multi_class_log_l1_no_intercept(self): model, X = fit_classification_model( SGDClassifier(loss='log', penalty='l1', fit_intercept=False, random_state=42), 5) X = np.array([X[4], X[4]]) model_onnx = convert_sklearn( model, "scikit-learn SGD multi-class classifier", [("input", FloatTensorType([None, X.shape[1]]))], ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(np.float32), model, model_onnx, basename="SklearnSGDClassifierMultiLogL1NoIntercept-Dec4", allow_failure="StrictVersion(onnx.__version__)" " < StrictVersion('1.2') or " "StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.2.1')", )
def test_model_knn_regressor_double(self): model, X = self._fit_model(KNeighborsRegressor(n_neighbors=2)) model_onnx = convert_sklearn(model, "KNN regressor", [("input", DoubleTensorType([None, 4]))], target_opset=TARGET_OPSET, options={id(model): { 'optim': 'cdist' }}, dtype=numpy.float64) self.assertIsNotNone(model_onnx) try: InferenceSession(model_onnx.SerializeToString()) except OrtImpl as e: if ("Could not find an implementation for the node " "To_TopK:TopK(11)") in str(e): # onnxruntime does not declare TopK(11) for double return raise e dump_data_and_model(X.astype(numpy.float64)[:7], model, model_onnx, basename="SklearnKNeighborsRegressor64")
def test_model_tfidf_vectorizer11_empty_string_case2(self): corpus = numpy.array([ "This is the first document.", "This document is the second document.", "And this is the third one.", "", ]).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 1), norm=None) vect.fit(corpus.ravel()) model_onnx = convert_sklearn(vect, "TfidfVectorizer", [("input", StringTensorType([1]))], options=self.get_options(), target_opset=TARGET_OPSET) self.assertTrue(model_onnx is not None) # onnxruntime fails with empty strings dump_data_and_model( corpus, vect, model_onnx, basename="SklearnTfidfVectorizer11EmptyString-OneOff-SklCol", allow_failure="StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.4.0')", )
def test_model_dict_vectorizer(self): model = DictVectorizer() data = [{"amy": 1.0, "chin": 200.0}, {"nice": 3.0, "amy": 1.0}] model.fit_transform(data) model_onnx = convert_sklearn( model, "dictionary vectorizer", [( "input", DictionaryType(StringTensorType([1]), FloatTensorType([1])), )], ) self.assertTrue(model_onnx is not None) dump_data_and_model( data, model, model_onnx, basename="SklearnDictVectorizer-OneOff-SkipDim1", allow_failure="StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.1.3') or " "StrictVersion(onnx.__version__)" " < StrictVersion('1.3.0')", )
def test_model_mlp_regressor_default(self): model, X_test = fit_regression_model(MLPRegressor(random_state=42)) exp = model.predict(X_test) for opv in (1, 2, 7, 8, 9, 10, 11, 12, 13, onnx_opset_version()): if opv is not None and opv > TARGET_OPSET: continue with self.subTest(opv=opv): try: onx = convert_sklearn( model, "scikit-learn MLPRegressor", [("input", FloatTensorType([None, X_test.shape[1]]))], target_opset=opv) except RuntimeError as e: if ("is higher than the number of the " "installed onnx package") in str(e): continue raise e as_string = onx.SerializeToString() try: ort = InferenceSession(as_string) except (RuntimeError, InvalidGraph, Fail) as e: if opv in (None, 1, 2): continue if opv >= onnx_opset_version(): continue if ("No suitable kernel definition found for " "op Cast(9)") in str(e): # too old onnxruntime continue raise AssertionError( "Unable to load opv={}\n---\n{}\n---".format( opv, onx)) from e res_out = ort.run(None, {'input': X_test}) assert len(res_out) == 1 res = res_out[0] assert_almost_equal(exp.ravel(), res.ravel(), decimal=4)
def test_onnxruntime_shapes_clr(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, y_test = train_test_split(X, y) clr = RandomForestClassifier(max_depth=1) clr.fit(X_train, y_train) initial_type = [('float_input', FloatTensorType([None, 4]))] onx = convert_sklearn(clr, initial_types=initial_type, options={id(clr): { 'zipmap': False }}, target_opset=TARGET_OPSET) sess = rt.InferenceSession(onx.SerializeToString()) input_name = sess.get_inputs()[0].name pred_onx = sess.run(None, {input_name: X_test.astype(numpy.float32)}) shape1 = sess.get_inputs()[0].shape shape2 = sess.get_outputs()[0].shape assert shape1 == [None, 4] assert shape2 in ([None, 1], [1], [None]) if len(pred_onx[0].shape) > 1: assert pred_onx[0].shape[1] == shape2[1] try: ishape = onnx.shape_inference.infer_shapes(onx) except RuntimeError: # Shape inference does not work? ishape = None if ishape is None: oshape = None else: dims = ishape.graph.output[0].type.tensor_type.shape.dim oshape = [d.dim_value for d in dims] self.assertIn(oshape, (None, [0])) dims = ishape.graph.output[1].type.tensor_type.shape.dim oshape = [d.dim_value for d in dims] self.assertIn(oshape, (None, [0, 3]))
def test_grid_search(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, y_test = train_test_split(X, y) pca = PCA(n_components=2) pca.fit(X_train) onx = convert_sklearn(pca, initial_types=[('input', FloatTensorType( (1, X.shape[1])))]) onx_bytes = onx.SerializeToString() tr = OnnxTransformer(onx_bytes) pipe = make_pipeline(tr, LogisticRegression(solver='liblinear')) param_grid = [{'logisticregression__penalty': ['l2', 'l1']}] clf = GridSearchCV(pipe, param_grid, cv=3) clf.fit(X_train, y_train) bp = clf.best_params_ self.assertIn(bp, ({ 'logisticregression__penalty': 'l1' }, { 'logisticregression__penalty': 'l2' })) tr2 = OnnxTransformer(onx_bytes) tr2.fit() self.assertEqualArray( tr2.transform(X_test), clf.best_estimator_.steps[0][1].transform(X_test)) y_true, y_pred = y_test, clf.predict(X_test) cl = classification_report(y_true, y_pred) self.assertIn('precision', cl) sc = clf.score(X_test, y_test) self.assertGreater(sc, 0.70)
def fcts_model(X, y, fit_intercept): "LogisticRegression." rf = LogisticRegression(fit_intercept=fit_intercept) rf.fit(X, y) initial_types = [('X', FloatTensorType([None, X.shape[1]]))] onx = convert_sklearn(rf, initial_types=initial_types) f = BytesIO() f.write(onx.SerializeToString()) content = f.getvalue() sess = InferenceSession(content) outputs = [o.name for o in sess.get_outputs()] def predict_skl_predict(X, model=rf): return rf.predict(X) def predict_skl_predict_proba(X, model=rf): return rf.predict_proba(X) def predict_onnxrt_predict(X, sess=sess): return numpy.array(sess.run(outputs[:1], {'X': X.astype(np.float32)})) def predict_onnxrt_predict_proba(X, sess=sess): res = sess.run(outputs[1:], {'X': X.astype(np.float32)})[0] # do not use DataFrame to convert the output into array, # it takes too much time out = numpy.empty((len(res), len(res[0])), dtype=numpy.float32) for i, row in enumerate(res): for k, v in row.items(): out[i, k] = v return out return {'predict': (predict_skl_predict, predict_onnxrt_predict), 'predict_proba': (predict_skl_predict_proba, predict_onnxrt_predict_proba)}
def test_pipeline_pca_pipeline_multinomial(self): model = Pipeline( memory=None, steps=[('PCA', PCA(copy=True, iterated_power='auto', n_components=2, random_state=None, svd_solver='auto', tol=0.0, whiten=False)), ('Pipeline', Pipeline(memory=None, steps=[('MinMax scaler', MinMaxScaler( copy=True, feature_range=(0, 3.7209871159509307))), ('MultinomialNB', MultinomialNB(alpha=0.7368421052631579, class_prior=None, fit_prior=True))]))]) data = np.array([[0, 0, 0], [0, 0, 0.1], [1, 1, 1.1], [1, 1.1, 1]], dtype=np.float32) y = [0, 0, 1, 1] model.fit(data, y) model_onnx = convert_sklearn(model, 'pipelinewithinpipeline', [('input', FloatTensorType(data.shape))]) self.assertTrue(model_onnx is not None) dump_data_and_model( data, model, model_onnx, basename="SklearnPipelinePcaPipelineMinMaxNB2", allow_failure= "StrictVersion(onnxruntime.__version__) <= StrictVersion('0.2.1')")
def test_model_multi_class_nocl(self): model, X = fit_classification_model( RandomForestClassifier(random_state=42), 2, label_string=True) model_onnx = convert_sklearn( model, "multi-class nocl", [("input", FloatTensorType([None, X.shape[1]]))], options={id(model): { 'nocl': True }}) self.assertIsNotNone(model_onnx) sonx = str(model_onnx) assert 'classlabels_strings' not in sonx assert 'cl0' not in sonx dump_data_and_model(X, model, model_onnx, classes=model.classes_, basename="SklearnRFMultiNoCl", verbose=False, allow_failure="StrictVersion(onnx.__version__)" " < StrictVersion('1.2') or " "StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.2.1')")
def test_model_logistic_regression_multi_class_saga_elasticnet(self): if _sklearn_version() < StrictVersion('0.21.0'): model, X = self._fit_model_multiclass_classification( linear_model.LogisticRegression(solver='saga')) else: model, X = self._fit_model_multiclass_classification( linear_model.LogisticRegression( solver='saga', penalty='elasticnet', l1_ratio=0.1)) model_onnx = convert_sklearn( model, "multi-class logistic regression", [("input", FloatTensorType([1, 3]))], ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32), model, model_onnx, basename="SklearnLogitisticRegressionMultiSagaElasticnet", allow_failure="StrictVersion(onnx.__version__)" " < StrictVersion('1.2') or " "StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.2.1')", )
def test_model_tfidf_vectorizer11_word4(self): corpus = numpy.array([ 'This is the first document.', 'This document is the second document.', 'And this is the third one.', 'Is this the first document?', ]).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 1), norm=None, token_pattern="[a-zA-Z]{1,4}") vect.fit(corpus.ravel()) model_onnx = convert_sklearn(vect, 'TfidfVectorizer', [('input', StringTensorType([1]))], options=self.get_options(), target_opset=TARGET_OPSET) self.assertTrue(model_onnx is not None) dump_data_and_model( corpus, vect, model_onnx, basename="SklearnTfidfVectorizer11Regex4-OneOff-SklCol", allow_failure="StrictVersion(onnxruntime.__version__) <= " "StrictVersion('0.4.0')")
def test_model_one_hot_encoder(self): # categorical_features will be removed in 0.22 (this test # will fail by then). FutureWarning: The handling of integer # data will change in version 0.22. Currently, the categories # are determined based on the range [0, max(values)], while # in the future they will be determined based on the unique values. # If you want the future behaviour and silence this warning, # you can specify "categories='auto'". model = OneHotEncoder() data = numpy.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=numpy.int64) model.fit(data) model_onnx = convert_sklearn( model, "scikit-learn one-hot encoder", [("input", Int64TensorType([1, 3]))], ) self.assertTrue(model_onnx is not None) dump_data_and_model( data, model, model_onnx, basename="SklearnOneHotEncoderInt64-SkipDim1", )
def test_ada_boost_classifier_samme_r_decision_function(self): model, X_test = fit_classification_model(AdaBoostClassifier( n_estimators=10, algorithm="SAMME.R", random_state=42, base_estimator=DecisionTreeClassifier( max_depth=2, random_state=42)), 4) options = {id(model): {'raw_scores': True}} model_onnx = convert_sklearn( model, "AdaBoost classification", [("input", FloatTensorType((None, X_test.shape[1])))], target_opset=10, options=options, ) self.assertIsNotNone(model_onnx) dump_data_and_model( X_test, model, model_onnx, basename="SklearnAdaBoostClassifierSAMMERDecisionFunction", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", methods=['predict', 'decision_function'], )
def test_model_tfidf_vectorizer11_out_vocabulary(self): corpus = numpy.array([ 'This is the first document.', 'This document is the second document.', 'And this is the third one.', 'Is this the first document?', ]).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 1), norm=None) vect.fit(corpus.ravel()) model_onnx = convert_sklearn(vect, 'TfidfVectorizer', [('input', StringTensorType([1]))], options=self.get_options()) self.assertTrue(model_onnx is not None) corpus = numpy.array([ 'AZZ ZZ This is the first document.', 'BZZ ZZ This document is the second document.', 'ZZZ ZZ And this is the third one.', 'WZZ ZZ Is this the first document?', ]).reshape((4, 1)) dump_data_and_model( corpus, vect, model_onnx, basename="SklearnTfidfVectorizer11OutVocabRegex-OneOff-SklCol", allow_failure="StrictVersion(onnxruntime.__version__) <= " "StrictVersion('0.4.0')")
def test_model_logistic_linear_discriminant_analysis_decfunc3(self): X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) y = np.array([1, 1, 1, 2, 2, 3]) X_test = np.array([[-0.8, -1], [0, 1]], dtype=np.float32) model = LinearDiscriminantAnalysis().fit(X, y) model_onnx = convert_sklearn( model, "linear model", [("input", FloatTensorType([None, X_test.shape[1]]))], options={id(model): { 'raw_scores': True }}) self.assertIsNotNone(model_onnx) dump_data_and_model( X_test, model, model_onnx, basename="SklearnLinearDiscriminantAnalysisBinRawScore3-Out0", # Operator cast-1 is not implemented in onnxruntime allow_failure="StrictVersion(onnx.__version__)" " < StrictVersion('1.3') or " "StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.2.1')", methods=['predict', 'decision_function'])
def common_test_model_hgb_classifier(self, add_nan=False, n_classes=2): model = HistGradientBoostingClassifier(max_iter=5, max_depth=2) X, y = make_classification(n_features=10, n_samples=1000, n_informative=4, n_classes=n_classes, random_state=42) if add_nan: rows = numpy.random.randint(0, X.shape[0] - 1, X.shape[0] // 3) cols = numpy.random.randint(0, X.shape[1] - 1, X.shape[0] // 3) X[rows, cols] = numpy.nan X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, random_state=42) model.fit(X_train, y_train) model_onnx = convert_sklearn( model, "unused", [("input", FloatTensorType([None, X.shape[1]]))]) self.assertIsNotNone(model_onnx) X_test = X_test.astype(numpy.float32)[:5] dump_data_and_model(X_test, model, model_onnx, folder=self.folder)