def test_model_dict_vectorizer_sort_false(self):
     model = DictVectorizer(sparse=False, sort=False)
     data = [{1: 1.0, 2: 200.0}, {1: 3.0, 3: 1.0}]
     model.fit_transform(data)
     model_onnx = convert_sklearn(
         model,
         "dictionary vectorizer",
         [(
             "input",
             DictionaryType(Int64TensorType([1]), FloatTensorType([1])),
         )],
     )
     self.assertTrue(model_onnx is not None)
     dump_data_and_model(
         data,
         model,
         model_onnx,
         basename="SklearnDictVectorizerSortFalse-OneOff-SkipDim1",
         allow_failure="StrictVersion(onnxruntime.__version__)"
         " <= StrictVersion('0.1.3') or "
         "StrictVersion(onnx.__version__)"
         " < StrictVersion('1.3.0')",
     )
示例#2
0
 def test_onnx_helper_load_save_init(self):
     model = make_pipeline(
         Binarizer(),
         OneHotEncoder(sparse=False, handle_unknown='ignore'),
         StandardScaler())
     X = numpy.array([[0.1, 1.1], [0.2, 2.2], [0.4, 2.2], [0.2, 2.4]])
     model.fit(X)
     model_onnx = convert_sklearn(
         model, 'pipe3', [('input', FloatTensorType([None, 2]))])
     model_onnx.ir_version = get_ir_version(TARGET_OPSET)
     filename = "temp_onnx_helper_load_save.onnx"
     save_onnx_model(model_onnx, filename)
     model = load_onnx_model(filename)
     list(enumerate_model_node_outputs(model))
     new_model = select_model_inputs_outputs(model, 'variable')
     self.assertTrue(new_model.graph is not None)  # pylint: disable=E1101
     tr1 = self.get_model(model)
     tr2 = self.get_model(new_model)
     X = X.astype(numpy.float32)
     X1 = tr1(X)
     X2 = tr2(X)
     self.assertEqual(X1.shape, (4, 2))
     self.assertEqual(X2.shape, (4, 2))
    def test_model_tfidf_vectorizer11_empty_string_case1(self):
        corpus = numpy.array([
                'This is the first document.',
                'This document is the second document.',
                'And this is the third one.',
                ' ',
                ]).reshape((4, 1))
        vect = TfidfVectorizer(ngram_range=(1, 1), norm=None)
        vect.fit(corpus[:3].ravel())
        model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
                                     [('input', StringTensorType([1]))],
                                     options=self.get_options(),
                                     target_opset=TARGET_OPSET)
        self.assertTrue(model_onnx is not None)

        # TfidfVectorizer in onnxruntime fails with empty strings,
        # which was fixed in version 0.3.0 afterward
        dump_data_and_model(
            corpus[2:], vect, model_onnx,
            basename="SklearnTfidfVectorizer11EmptyStringSepCase1-"
                     "OneOff-SklCol",
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.4.0')")
示例#4
0
    def test_combine_inputs_floats_ints(self):
        from sklearn.preprocessing import StandardScaler
        from sklearn.pipeline import Pipeline

        data = [[0, 0.], [0, 0.], [1, 1.], [1, 1.]]
        scaler = StandardScaler()
        scaler.fit(data)
        model = Pipeline([('scaler1', scaler), ('scaler2', scaler)])

        model_onnx = convert_sklearn(model, 'pipeline',
                                     [('input1', Int64TensorType([1, 1])),
                                      ('input2', FloatTensorType([1, 1]))])
        self.assertTrue(len(model_onnx.graph.node[-1].output) == 1)
        self.assertTrue(model_onnx is not None)
        data = numpy.array(data)
        data = {
            'input1': data[:, 0].astype(numpy.int64),
            'input2': data[:, 1].astype(numpy.float32)
        }
        dump_data_and_model(data,
                            PipeConcatenateInput(model),
                            model_onnx,
                            basename="SklearnPipelineScalerMixed-OneOff")
示例#5
0
 def test_model_multi_class_nocl(self):
     model, X = fit_classification_model(SGDClassifier(loss='log',
                                                       random_state=42),
                                         2,
                                         label_string=True)
     model_onnx = convert_sklearn(
         model,
         "multi-class nocl",
         [("input", FloatTensorType([None, X.shape[1]]))],
         options={id(model): {
                      'nocl': True
                  }},
         target_opset=TARGET_OPSET)
     self.assertIsNotNone(model_onnx)
     sonx = str(model_onnx)
     assert 'classlabels_strings' not in sonx
     assert 'cl0' not in sonx
     dump_data_and_model(X[6:8],
                         model,
                         model_onnx,
                         classes=model.classes_,
                         basename="SklearnSGDMultiNoCl",
                         verbose=False)
    def test_simple_imputer_float_inputs(self):
        model = SimpleImputer(strategy="mean", fill_value="nan")
        data = [[1, 2], [np.nan, 3], [7, 6]]
        model.fit(data)

        model_onnx = convert_sklearn(model,
                                     "scikit-learn simple imputer",
                                     [("input", FloatTensorType([None, 2]))],
                                     target_opset=TARGET_OPSET)
        self.assertTrue(model_onnx.graph.node is not None)

        # should contain only node
        self.assertEqual(len(model_onnx.graph.node), 1)

        # last node should contain the Imputer
        outputs = model_onnx.graph.output
        self.assertEqual(len(outputs), 1)
        self.assertEqual(outputs[0].type.tensor_type.shape.dim[-1].dim_value,
                         2)
        dump_data_and_model(np.array(data, dtype=np.float32),
                            model,
                            model_onnx,
                            basename="SklearnSimpleImputerMeanFloat32")
示例#7
0
 def test_convert_svr_linear(self):
     model, X = self._fit_binary_classification(SVR(kernel="linear"))
     model_onnx = convert_sklearn(
         model,
         "SVR", [("input", FloatTensorType([None, X.shape[1]]))],
         target_opset=TARGET_OPSET)
     nodes = model_onnx.graph.node
     self.assertIsNotNone(nodes)
     self._check_attributes(
         nodes[0],
         {
             "coefficients": None,
             "kernel_params": None,
             "kernel_type": "LINEAR",
             "post_transform": None,
             "rho": None,
             "support_vectors": None,
         },
     )
     dump_data_and_model(X,
                         model,
                         model_onnx,
                         basename="SklearnRegSVRLinear-Dec3")
 def test_model_multinomial_nb_multiclass_params(self):
     model, X = fit_classification_model(
         MultinomialNB(alpha=0.5, fit_prior=False), 5, pos_features=True)
     model_onnx = convert_sklearn(
         model,
         "multinomial naive bayes",
         [("input", FloatTensorType([None, X.shape[1]]))],
         target_opset=TARGET_OPSET
     )
     self.assertIsNotNone(model_onnx)
     pp = model.predict_proba(X)
     col = pp.shape[1]
     pps = np.sort(pp, axis=1)
     diff = pps[:, col-1] - pps[:, col-2]
     ind = diff >= 1e-4
     dump_data_and_model(
         X[ind],
         model,
         model_onnx,
         basename="SklearnMclMultinomialNBParams-Dec4",
         allow_failure="StrictVersion(onnxruntime.__version__)"
         "<= StrictVersion('0.2.1')",
     )
    def test_imputer_float_inputs(self):
        model = Imputer(missing_values="NaN", strategy="mean", axis=0)
        data = [[1, 2], [np.nan, 3], [7, 6]]
        model.fit(data)

        model_onnx = convert_sklearn(model, "scikit-learn imputer",
                                     [("input", FloatTensorType([None, 2]))])
        self.assertTrue(model_onnx.graph.node is not None)

        # should contain only node
        self.assertEqual(len(model_onnx.graph.node), 1)

        # last node should contain the Imputer
        outputs = model_onnx.graph.output
        self.assertEqual(len(outputs), 1)
        self.assertEqual(outputs[0].type.tensor_type.shape.dim[-1].dim_value,
                         2)
        dump_data_and_model(
            np.array(data, dtype=np.float32),
            model,
            model_onnx,
            basename="SklearnImputerMeanFloat32",
        )
示例#10
0
 def test_model_mlp_regressor_tanh(self):
     data = load_diabetes()
     X, y = data.data, data.target
     X_train, X_test, y_train, y_test = train_test_split(X,
                                                         y,
                                                         test_size=0.2,
                                                         random_state=42)
     model = MLPRegressor(random_state=42, activation="tanh").fit(
         X_train, y_train)
     model_onnx = convert_sklearn(
         model,
         "scikit-learn MLPRegressor",
         [("input", FloatTensorType(X_test.shape))],
     )
     self.assertTrue(model_onnx is not None)
     dump_data_and_model(
         X_test.astype(np.float32),
         model,
         model_onnx,
         basename="SklearnMLPRegressorTanhActivation-Dec4",
         allow_failure="StrictVersion("
         "onnxruntime.__version__)<= StrictVersion('0.2.1')",
     )
示例#11
0
 def test_model_mlp_classifier_multiclass_identity(self):
     data = load_digits()
     X, y = data.data, data.target
     X_train, X_test, y_train, y_test = train_test_split(X,
                                                         y,
                                                         test_size=0.2,
                                                         random_state=42)
     model = MLPClassifier(random_state=42, activation="identity").fit(
         X_train, y_train)
     model_onnx = convert_sklearn(
         model,
         "scikit-learn MLPClassifier",
         [("input", Int64TensorType(X_test.shape))],
     )
     self.assertTrue(model_onnx is not None)
     dump_data_and_model(
         X_test.astype(np.int64),
         model,
         model_onnx,
         basename="SklearnMLPClassifierMultiClassIdentityActivation",
         allow_failure="StrictVersion("
         "onnxruntime.__version__)<= StrictVersion('0.2.1')",
     )
示例#12
0
 def test_model_mlp_classifier_binary(self):
     data = load_iris()
     X, y = data.data, data.target
     y[y > 1] = 1
     X_train, X_test, y_train, y_test = train_test_split(X,
                                                         y,
                                                         test_size=0.2,
                                                         random_state=42)
     model = MLPClassifier(random_state=42).fit(X_train, y_train)
     model_onnx = convert_sklearn(
         model,
         "scikit-learn MLPClassifier",
         [("input", FloatTensorType(X_test.shape))],
     )
     self.assertTrue(model_onnx is not None)
     dump_data_and_model(
         X_test.astype(np.float32),
         model,
         model_onnx,
         basename="SklearnMLPClassifierBinary",
         allow_failure="StrictVersion("
         "onnxruntime.__version__)<= StrictVersion('0.2.1')",
     )
    def test_model_count_vectorizer_wrong_ngram(self):
        corpus = numpy.array([
            'A AABBB0',
            'AAABB B1',
            'AA ABBB2',
            'AAAB BB3',
            'AAA BBB4',
        ]).reshape((5, 1))
        vect = TfidfVectorizer(ngram_range=(1, 2),
                               token_pattern=r"(?u)\b\w\w+\b")
        vect.fit(corpus.ravel())

        model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
                                     [('input', StringTensorType([1]))])

        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            corpus,
            vect,
            model_onnx,
            basename="SklearnTfidfVectorizer12Wngram-OneOff-SklCol",
            allow_failure="StrictVersion(onnxruntime.__version__) <= "
            "StrictVersion('0.3.0')")
 def test_model_sgd_multi_class_log_l1_no_intercept(self):
     model, X = fit_classification_model(
         SGDClassifier(loss='log',
                       penalty='l1',
                       fit_intercept=False,
                       random_state=42), 5)
     X = np.array([X[4], X[4]])
     model_onnx = convert_sklearn(
         model,
         "scikit-learn SGD multi-class classifier",
         [("input", FloatTensorType([None, X.shape[1]]))],
     )
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(
         X.astype(np.float32),
         model,
         model_onnx,
         basename="SklearnSGDClassifierMultiLogL1NoIntercept-Dec4",
         allow_failure="StrictVersion(onnx.__version__)"
         " < StrictVersion('1.2') or "
         "StrictVersion(onnxruntime.__version__)"
         " <= StrictVersion('0.2.1')",
     )
 def test_model_knn_regressor_double(self):
     model, X = self._fit_model(KNeighborsRegressor(n_neighbors=2))
     model_onnx = convert_sklearn(model,
                                  "KNN regressor",
                                  [("input", DoubleTensorType([None, 4]))],
                                  target_opset=TARGET_OPSET,
                                  options={id(model): {
                                               'optim': 'cdist'
                                           }},
                                  dtype=numpy.float64)
     self.assertIsNotNone(model_onnx)
     try:
         InferenceSession(model_onnx.SerializeToString())
     except OrtImpl as e:
         if ("Could not find an implementation for the node "
                 "To_TopK:TopK(11)") in str(e):
             # onnxruntime does not declare TopK(11) for double
             return
         raise e
     dump_data_and_model(X.astype(numpy.float64)[:7],
                         model,
                         model_onnx,
                         basename="SklearnKNeighborsRegressor64")
 def test_model_tfidf_vectorizer11_empty_string_case2(self):
     corpus = numpy.array([
         "This is the first document.",
         "This document is the second document.",
         "And this is the third one.",
         "",
     ]).reshape((4, 1))
     vect = TfidfVectorizer(ngram_range=(1, 1), norm=None)
     vect.fit(corpus.ravel())
     model_onnx = convert_sklearn(vect, "TfidfVectorizer",
                                  [("input", StringTensorType([1]))],
                                  options=self.get_options(),
                                  target_opset=TARGET_OPSET)
     self.assertTrue(model_onnx is not None)
     # onnxruntime fails with empty strings
     dump_data_and_model(
         corpus,
         vect,
         model_onnx,
         basename="SklearnTfidfVectorizer11EmptyString-OneOff-SklCol",
         allow_failure="StrictVersion(onnxruntime.__version__)"
                       " <= StrictVersion('0.4.0')",
     )
 def test_model_dict_vectorizer(self):
     model = DictVectorizer()
     data = [{"amy": 1.0, "chin": 200.0}, {"nice": 3.0, "amy": 1.0}]
     model.fit_transform(data)
     model_onnx = convert_sklearn(
         model,
         "dictionary vectorizer",
         [(
             "input",
             DictionaryType(StringTensorType([1]), FloatTensorType([1])),
         )],
     )
     self.assertTrue(model_onnx is not None)
     dump_data_and_model(
         data,
         model,
         model_onnx,
         basename="SklearnDictVectorizer-OneOff-SkipDim1",
         allow_failure="StrictVersion(onnxruntime.__version__)"
         " <= StrictVersion('0.1.3') or "
         "StrictVersion(onnx.__version__)"
         " < StrictVersion('1.3.0')",
     )
示例#18
0
 def test_model_mlp_regressor_default(self):
     model, X_test = fit_regression_model(MLPRegressor(random_state=42))
     exp = model.predict(X_test)
     for opv in (1, 2, 7, 8, 9, 10, 11, 12, 13, onnx_opset_version()):
         if opv is not None and opv > TARGET_OPSET:
             continue
         with self.subTest(opv=opv):
             try:
                 onx = convert_sklearn(
                     model,
                     "scikit-learn MLPRegressor",
                     [("input", FloatTensorType([None, X_test.shape[1]]))],
                     target_opset=opv)
             except RuntimeError as e:
                 if ("is higher than the number of the "
                         "installed onnx package") in str(e):
                     continue
                 raise e
             as_string = onx.SerializeToString()
             try:
                 ort = InferenceSession(as_string)
             except (RuntimeError, InvalidGraph, Fail) as e:
                 if opv in (None, 1, 2):
                     continue
                 if opv >= onnx_opset_version():
                     continue
                 if ("No suitable kernel definition found for "
                         "op Cast(9)") in str(e):
                     # too old onnxruntime
                     continue
                 raise AssertionError(
                     "Unable to load opv={}\n---\n{}\n---".format(
                         opv, onx)) from e
             res_out = ort.run(None, {'input': X_test})
             assert len(res_out) == 1
             res = res_out[0]
             assert_almost_equal(exp.ravel(), res.ravel(), decimal=4)
示例#19
0
    def test_onnxruntime_shapes_clr(self):
        iris = load_iris()
        X, y = iris.data, iris.target
        X_train, X_test, y_train, y_test = train_test_split(X, y)
        clr = RandomForestClassifier(max_depth=1)
        clr.fit(X_train, y_train)
        initial_type = [('float_input', FloatTensorType([None, 4]))]
        onx = convert_sklearn(clr,
                              initial_types=initial_type,
                              options={id(clr): {
                                           'zipmap': False
                                       }},
                              target_opset=TARGET_OPSET)
        sess = rt.InferenceSession(onx.SerializeToString())
        input_name = sess.get_inputs()[0].name
        pred_onx = sess.run(None, {input_name: X_test.astype(numpy.float32)})
        shape1 = sess.get_inputs()[0].shape
        shape2 = sess.get_outputs()[0].shape
        assert shape1 == [None, 4]
        assert shape2 in ([None, 1], [1], [None])
        if len(pred_onx[0].shape) > 1:
            assert pred_onx[0].shape[1] == shape2[1]

        try:
            ishape = onnx.shape_inference.infer_shapes(onx)
        except RuntimeError:
            # Shape inference does not work?
            ishape = None
        if ishape is None:
            oshape = None
        else:
            dims = ishape.graph.output[0].type.tensor_type.shape.dim
            oshape = [d.dim_value for d in dims]
            self.assertIn(oshape, (None, [0]))
            dims = ishape.graph.output[1].type.tensor_type.shape.dim
            oshape = [d.dim_value for d in dims]
            self.assertIn(oshape, (None, [0, 3]))
    def test_grid_search(self):
        iris = load_iris()
        X, y = iris.data, iris.target
        X_train, X_test, y_train, y_test = train_test_split(X, y)

        pca = PCA(n_components=2)
        pca.fit(X_train)
        onx = convert_sklearn(pca,
                              initial_types=[('input',
                                              FloatTensorType(
                                                  (1, X.shape[1])))])
        onx_bytes = onx.SerializeToString()
        tr = OnnxTransformer(onx_bytes)

        pipe = make_pipeline(tr, LogisticRegression(solver='liblinear'))

        param_grid = [{'logisticregression__penalty': ['l2', 'l1']}]

        clf = GridSearchCV(pipe, param_grid, cv=3)
        clf.fit(X_train, y_train)
        bp = clf.best_params_
        self.assertIn(bp, ({
            'logisticregression__penalty': 'l1'
        }, {
            'logisticregression__penalty': 'l2'
        }))

        tr2 = OnnxTransformer(onx_bytes)
        tr2.fit()
        self.assertEqualArray(
            tr2.transform(X_test),
            clf.best_estimator_.steps[0][1].transform(X_test))
        y_true, y_pred = y_test, clf.predict(X_test)
        cl = classification_report(y_true, y_pred)
        self.assertIn('precision', cl)
        sc = clf.score(X_test, y_test)
        self.assertGreater(sc, 0.70)
def fcts_model(X, y, fit_intercept):
    "LogisticRegression."
    rf = LogisticRegression(fit_intercept=fit_intercept)
    rf.fit(X, y)

    initial_types = [('X', FloatTensorType([None, X.shape[1]]))]
    onx = convert_sklearn(rf, initial_types=initial_types)
    f = BytesIO()
    f.write(onx.SerializeToString())
    content = f.getvalue()
    sess = InferenceSession(content)

    outputs = [o.name for o in sess.get_outputs()]

    def predict_skl_predict(X, model=rf):
        return rf.predict(X)

    def predict_skl_predict_proba(X, model=rf):
        return rf.predict_proba(X)

    def predict_onnxrt_predict(X, sess=sess):
        return numpy.array(sess.run(outputs[:1], {'X': X.astype(np.float32)}))

    def predict_onnxrt_predict_proba(X, sess=sess):
        res = sess.run(outputs[1:], {'X': X.astype(np.float32)})[0]
        # do not use DataFrame to convert the output into array,
        # it takes too much time
        out = numpy.empty((len(res), len(res[0])), dtype=numpy.float32)
        for i, row in enumerate(res):
            for k, v in row.items():
                out[i, k] = v
        return out

    return {'predict': (predict_skl_predict,
                        predict_onnxrt_predict),
            'predict_proba': (predict_skl_predict_proba,
                              predict_onnxrt_predict_proba)}
示例#22
0
    def test_pipeline_pca_pipeline_multinomial(self):
        model = Pipeline(
            memory=None,
            steps=[('PCA',
                    PCA(copy=True,
                        iterated_power='auto',
                        n_components=2,
                        random_state=None,
                        svd_solver='auto',
                        tol=0.0,
                        whiten=False)),
                   ('Pipeline',
                    Pipeline(memory=None,
                             steps=[('MinMax scaler',
                                     MinMaxScaler(
                                         copy=True,
                                         feature_range=(0,
                                                        3.7209871159509307))),
                                    ('MultinomialNB',
                                     MultinomialNB(alpha=0.7368421052631579,
                                                   class_prior=None,
                                                   fit_prior=True))]))])

        data = np.array([[0, 0, 0], [0, 0, 0.1], [1, 1, 1.1], [1, 1.1, 1]],
                        dtype=np.float32)
        y = [0, 0, 1, 1]
        model.fit(data, y)
        model_onnx = convert_sklearn(model, 'pipelinewithinpipeline',
                                     [('input', FloatTensorType(data.shape))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            data,
            model,
            model_onnx,
            basename="SklearnPipelinePcaPipelineMinMaxNB2",
            allow_failure=
            "StrictVersion(onnxruntime.__version__) <= StrictVersion('0.2.1')")
示例#23
0
 def test_model_multi_class_nocl(self):
     model, X = fit_classification_model(
         RandomForestClassifier(random_state=42), 2, label_string=True)
     model_onnx = convert_sklearn(
         model,
         "multi-class nocl",
         [("input", FloatTensorType([None, X.shape[1]]))],
         options={id(model): {
                      'nocl': True
                  }})
     self.assertIsNotNone(model_onnx)
     sonx = str(model_onnx)
     assert 'classlabels_strings' not in sonx
     assert 'cl0' not in sonx
     dump_data_and_model(X,
                         model,
                         model_onnx,
                         classes=model.classes_,
                         basename="SklearnRFMultiNoCl",
                         verbose=False,
                         allow_failure="StrictVersion(onnx.__version__)"
                         " < StrictVersion('1.2') or "
                         "StrictVersion(onnxruntime.__version__)"
                         " <= StrictVersion('0.2.1')")
示例#24
0
 def test_model_logistic_regression_multi_class_saga_elasticnet(self):
     if _sklearn_version() < StrictVersion('0.21.0'):
         model, X = self._fit_model_multiclass_classification(
             linear_model.LogisticRegression(solver='saga'))
     else:
         model, X = self._fit_model_multiclass_classification(
             linear_model.LogisticRegression(
                 solver='saga', penalty='elasticnet', l1_ratio=0.1))
     model_onnx = convert_sklearn(
         model,
         "multi-class logistic regression",
         [("input", FloatTensorType([1, 3]))],
     )
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(
         X.astype(numpy.float32),
         model,
         model_onnx,
         basename="SklearnLogitisticRegressionMultiSagaElasticnet",
         allow_failure="StrictVersion(onnx.__version__)"
                       " < StrictVersion('1.2') or "
                       "StrictVersion(onnxruntime.__version__)"
                       " <= StrictVersion('0.2.1')",
     )
示例#25
0
 def test_model_tfidf_vectorizer11_word4(self):
     corpus = numpy.array([
         'This is the first document.',
         'This document is the second document.',
         'And this is the third one.',
         'Is this the first document?',
     ]).reshape((4, 1))
     vect = TfidfVectorizer(ngram_range=(1, 1),
                            norm=None,
                            token_pattern="[a-zA-Z]{1,4}")
     vect.fit(corpus.ravel())
     model_onnx = convert_sklearn(vect,
                                  'TfidfVectorizer',
                                  [('input', StringTensorType([1]))],
                                  options=self.get_options(),
                                  target_opset=TARGET_OPSET)
     self.assertTrue(model_onnx is not None)
     dump_data_and_model(
         corpus,
         vect,
         model_onnx,
         basename="SklearnTfidfVectorizer11Regex4-OneOff-SklCol",
         allow_failure="StrictVersion(onnxruntime.__version__) <= "
         "StrictVersion('0.4.0')")
示例#26
0
 def test_model_one_hot_encoder(self):
     # categorical_features will be removed in 0.22 (this test
     # will fail by then). FutureWarning: The handling of integer
     # data will change in version 0.22. Currently, the categories
     # are determined based on the range [0, max(values)], while
     # in the future they will be determined based on the unique values.
     # If you want the future behaviour and silence this warning,
     # you can specify "categories='auto'".
     model = OneHotEncoder()
     data = numpy.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]],
                        dtype=numpy.int64)
     model.fit(data)
     model_onnx = convert_sklearn(
         model,
         "scikit-learn one-hot encoder",
         [("input", Int64TensorType([1, 3]))],
     )
     self.assertTrue(model_onnx is not None)
     dump_data_and_model(
         data,
         model,
         model_onnx,
         basename="SklearnOneHotEncoderInt64-SkipDim1",
     )
 def test_ada_boost_classifier_samme_r_decision_function(self):
     model, X_test = fit_classification_model(AdaBoostClassifier(
         n_estimators=10, algorithm="SAMME.R", random_state=42,
         base_estimator=DecisionTreeClassifier(
             max_depth=2, random_state=42)), 4)
     options = {id(model): {'raw_scores': True}}
     model_onnx = convert_sklearn(
         model,
         "AdaBoost classification",
         [("input", FloatTensorType((None, X_test.shape[1])))],
         target_opset=10,
         options=options,
     )
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(
         X_test,
         model,
         model_onnx,
         basename="SklearnAdaBoostClassifierSAMMERDecisionFunction",
         allow_failure="StrictVersion("
         "onnxruntime.__version__)"
         "<= StrictVersion('0.2.1')",
         methods=['predict', 'decision_function'],
     )
 def test_model_tfidf_vectorizer11_out_vocabulary(self):
     corpus = numpy.array([
         'This is the first document.',
         'This document is the second document.',
         'And this is the third one.',
         'Is this the first document?',
     ]).reshape((4, 1))
     vect = TfidfVectorizer(ngram_range=(1, 1), norm=None)
     vect.fit(corpus.ravel())
     model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
                                  [('input', StringTensorType([1]))],
                                  options=self.get_options())
     self.assertTrue(model_onnx is not None)
     corpus = numpy.array([
         'AZZ ZZ This is the first document.',
         'BZZ ZZ This document is the second document.',
         'ZZZ ZZ And this is the third one.',
         'WZZ ZZ Is this the first document?',
     ]).reshape((4, 1))
     dump_data_and_model(
         corpus, vect, model_onnx,
         basename="SklearnTfidfVectorizer11OutVocabRegex-OneOff-SklCol",
         allow_failure="StrictVersion(onnxruntime.__version__) <= "
                       "StrictVersion('0.4.0')")
示例#29
0
 def test_model_logistic_linear_discriminant_analysis_decfunc3(self):
     X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
     y = np.array([1, 1, 1, 2, 2, 3])
     X_test = np.array([[-0.8, -1], [0, 1]], dtype=np.float32)
     model = LinearDiscriminantAnalysis().fit(X, y)
     model_onnx = convert_sklearn(
         model,
         "linear model",
         [("input", FloatTensorType([None, X_test.shape[1]]))],
         options={id(model): {
                      'raw_scores': True
                  }})
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(
         X_test,
         model,
         model_onnx,
         basename="SklearnLinearDiscriminantAnalysisBinRawScore3-Out0",
         # Operator cast-1 is not implemented in onnxruntime
         allow_failure="StrictVersion(onnx.__version__)"
         " < StrictVersion('1.3') or "
         "StrictVersion(onnxruntime.__version__)"
         " <= StrictVersion('0.2.1')",
         methods=['predict', 'decision_function'])
示例#30
0
    def common_test_model_hgb_classifier(self, add_nan=False, n_classes=2):
        model = HistGradientBoostingClassifier(max_iter=5, max_depth=2)
        X, y = make_classification(n_features=10,
                                   n_samples=1000,
                                   n_informative=4,
                                   n_classes=n_classes,
                                   random_state=42)
        if add_nan:
            rows = numpy.random.randint(0, X.shape[0] - 1, X.shape[0] // 3)
            cols = numpy.random.randint(0, X.shape[1] - 1, X.shape[0] // 3)
            X[rows, cols] = numpy.nan

        X_train, X_test, y_train, _ = train_test_split(X,
                                                       y,
                                                       test_size=0.5,
                                                       random_state=42)
        model.fit(X_train, y_train)

        model_onnx = convert_sklearn(
            model, "unused", [("input", FloatTensorType([None, X.shape[1]]))])
        self.assertIsNotNone(model_onnx)
        X_test = X_test.astype(numpy.float32)[:5]

        dump_data_and_model(X_test, model, model_onnx, folder=self.folder)