def test_onnxt_runtime_solve(self):
        for transposed in [False, True]:
            with self.subTest(transposed=transposed):
                A = numpy.array([[2, 1], [0, 1]], dtype=float)
                Y = numpy.array([2, 1], dtype=float)
                X = solve(A, Y, transposed=transposed)

                onx = OnnxSolve('A',
                                'Y',
                                output_names=['X'],
                                transposed=transposed,
                                op_version=get_opset_number_from_onnx())
                model_def = onx.to_onnx(
                    {
                        'A': A.astype(numpy.float32),
                        'Y': Y.astype(numpy.float32)
                    },
                    outputs={'X': X.astype(numpy.float32)},
                    target_opset=get_opset_number_from_onnx())
                oinf = OnnxInference(model_def)
                got = oinf.run({'A': A, 'Y': Y})
                self.assertEqual(list(sorted(got)), ['X'])
                self.assertEqualArray(X, got['X'], decimal=6)

                python_tested.append(OnnxCDist)
                oinfpy = OnnxInference(model_def,
                                       runtime="python",
                                       inplace=True)
                validate_python_inference(oinfpy, {
                    'A': A.astype(numpy.float32),
                    'Y': Y.astype(numpy.float32)
                })
                python_tested.append(OnnxSolve)
    def test_onnxt_runtime_cdist(self):
        for metric in ['sqeuclidean', 'euclidean']:
            with self.subTest(metric=metric):
                X = numpy.array([[2, 1], [0, 1]], dtype=float)
                Y = numpy.array([[2, 1, 5], [0, 1, 3]], dtype=float).T
                Z = cdist(X, Y, metric=metric)

                onx = OnnxCDist('X',
                                'Y',
                                output_names=['Z'],
                                metric=metric,
                                op_version=get_opset_number_from_onnx())
                model_def = onx.to_onnx(
                    {
                        'X': X.astype(numpy.float32),
                        'Y': Y.astype(numpy.float32)
                    },
                    outputs={'Z': Z.astype(numpy.float32)},
                    target_opset=get_opset_number_from_onnx())
                self.assertIn('s: "%s"' % metric, str(model_def))
                oinf = OnnxInference(model_def)
                got = oinf.run({'X': X, 'Y': Y})
                self.assertEqual(list(sorted(got)), ['Z'])
                self.assertEqualArray(Z, got['Z'], decimal=6)

                python_tested.append(OnnxCDist)
                oinfpy = OnnxInference(model_def,
                                       runtime="python",
                                       inplace=True)
                validate_python_inference(oinfpy, {
                    'X': X.astype(numpy.float32),
                    'Y': Y.astype(numpy.float32)
                },
                                          tolerance=1e-6)
Exemplo n.º 3
0
 def test_code_add_transpose(self):
     idi = numpy.identity(2, dtype=numpy.float32)
     onx = OnnxTranspose(OnnxAdd('X',
                                 idi,
                                 op_version=get_opset_number_from_onnx()),
                         output_names=['Y'],
                         op_version=get_opset_number_from_onnx())
     model_def = onx.to_onnx({'X': idi.astype(numpy.float32)})
     oinf = OnnxInference(model_def, runtime='python')
     res = oinf.to_python(inline=False)
     self.assertNotEmpty(res)
     self.assertIsInstance(res, dict)
     self.assertEqual(len(res), 2)
     self.assertIn('onnx_pyrt_Ad_Addcst.pkl', res)
     self.assertIn('onnx_pyrt_main.py', res)
     cd = res['onnx_pyrt_main.py']
     self.assertIn('def pyrt_Add(X, Ad_Addcst):', cd)
     self.assertIn('def run(self, X):', cd)
     # inline
     temp = get_temp_folder(__file__, "temp_code_add_transpose")
     res = oinf.to_python(inline=True, dest=temp)
     self.assertNotEmpty(res)
     name = os.path.join(temp, 'onnx_pyrt_main.py')
     self.assertExists(name)
     # test code
     test_code = """
         X = numpy.array([[1, 2], [3, 4]], dtype=numpy.float32)
         oinf = OnnxPythonInference()
         Y = oinf.run(X)
         print(Y)
         """
     X = numpy.array([[1, 2], [3, 4]], dtype=numpy.float32)
     exp = oinf.run({'X': X})['Y']
     sexp = str(exp)
     self.auto_test_script(name, test_code, sexp)
Exemplo n.º 4
0
 def test_validate_sklearn_operators_all(self):
     fLOG(__file__,
          self._testMethodName,
          OutputPrint=__name__ == "__main__")
     logger = getLogger('skl2onnx')
     logger.disabled = True
     verbose = 1 if __name__ == "__main__" else 0
     temp = get_temp_folder(__file__, "temp_validate_sklearn_operators_all")
     if False:  # pylint: disable=W0125
         rows = list(
             enumerate_validated_operator_opsets(
                 verbose,
                 models={"DecisionTreeClassifier"},
                 filter_exp=lambda m, p: '64' not in p,
                 debug=True,
                 fLOG=fLOG))
     else:
         rows = list(
             enumerate_validated_operator_opsets(
                 verbose,
                 debug=None,
                 fLOG=fLOG,
                 dump_folder=temp,
                 time_kwargs={
                     get_opset_number_from_onnx(): dict(number=2, repeat=2)
                 },
                 n_features=[None]))
     self.assertGreater(len(rows), 1)
     df = DataFrame(rows)
     self.assertGreater(df.shape[1], 1)
     fLOG("output results")
     df.to_csv(os.path.join(temp, "sklearn_opsets_report.csv"), index=False)
     df.to_excel(os.path.join(temp, "sklearn_opsets_report.xlsx"),
                 index=False)
    def test_create_asv_benchmark_pyspy_compiled(self):
        self.assertNotEmpty(mlprodict)
        temp = get_temp_folder(__file__,
                               "temp_create_asv_benchmark_pyspy_compiled")
        created = create_asv_benchmark(location=temp,
                                       verbose=0,
                                       runtime=('python', 'python_compiled'),
                                       exc=False,
                                       execute=True,
                                       models={'AdaBoostRegressor'},
                                       add_pyspy=True)
        self.assertNotEmpty(created)

        ops = get_opset_number_from_onnx()
        verif = False
        allnames = []
        for path, _, files in os.walk(os.path.join(temp, 'pyspy')):
            for zoo in files:
                if '__init__' in zoo:
                    continue
                allnames.append(zoo)
                fullname = os.path.join(path, zoo)
                with open(fullname, 'r', encoding='utf-8') as f:
                    content = f.read()
                if (zoo.endswith(
                        "bench_AdaBoostReg_default_b_reg_nest10_1_4_%d_float_.py"
                        % ops) and compare_module_version(
                            sklearn.__version__, "0.21") >= 0):
                    if "setup_profile" not in content:
                        raise AssertionError(content)
                    verif = True
        if not verif:
            raise AssertionError("Visited files\n{}".format(
                "\n".join(allnames)))
Exemplo n.º 6
0
 def test_template_benchmark_classifier_raw_scores(self):
     if not os.path.exists('_cache'):
         os.mkdir('_cache')
     cl = TemplateBenchmarkClassifierRawScore()
     res = {}
     cl.setup_cache()
     N = 60
     nf = cl.params[2][1]
     opset = get_opset_number_from_onnx()
     dtype = 'float'
     optim = None
     for runtime in ['skl', 'pyrt', 'ort']:
         cl.setup(runtime, N, nf, opset, dtype, optim)
         self.assertEqual(cl.X.shape, (N, nf))
         for method in dir(cl):
             if method.split('_')[0] in ('time', 'peakmem', 'track'):
                 meth = getattr(cl.__class__, method)
                 res[method, runtime] = meth(cl, runtime, N, nf, opset,
                                             dtype, optim)
                 if method == 'track_score' and res[method,
                                                    runtime] in (0, 1):
                     raise AssertionError(
                         "Predictions are too perfect: {},{}: {}".format(
                             method, runtime, res[method, runtime]))
     self.assertEqual(len(res), 24)
     exp = [('time_predict', 'skl'), ('peakmem_predict', 'skl'),
            ('track_score', 'skl'), ('track_onnxsize', 'skl'),
            ('time_predict', 'pyrt'), ('peakmem_predict', 'pyrt'),
            ('track_score', 'pyrt'), ('track_onnxsize', 'pyrt'),
            ('time_predict', 'ort'), ('peakmem_predict', 'ort'),
            ('track_score', 'ort'), ('track_onnxsize', 'ort'),
            ('track_nbnodes', 'skl'), ('track_nbnodes', 'ort'),
            ('track_nbnodes', 'pyrt')]
     self.assertEqual(set(_ for _ in exp if not _[0].startswith('track_v')),
                      set(_ for _ in res if not _[0].startswith('track_v')))
 def test_onnxt_runtime_add_raise(self):
     idi = numpy.identity(2)
     onx = OnnxAdd('X', idi, output_names=['Y'],
                   op_version=get_opset_number_from_onnx())
     model_def = onx.to_onnx({'X': idi.astype(numpy.float32)})
     self.assertRaise(lambda: OnnxInference(model_def, runtime='onnxruntime-1'),
                      ValueError)
Exemplo n.º 8
0
    def test_onnx_test_knn_transform(self):
        iris = load_iris()
        X, _ = iris.data, iris.target

        X_train, X_test = train_test_split(X, random_state=11)
        clr = NearestNeighbors(n_neighbors=3)
        clr.fit(X_train)

        for to in (10, 11, 12):
            if to > get_opset_number_from_onnx():
                break
            try:
                model_def = to_onnx(
                    clr,
                    X_train.astype(numpy.float32),
                    rewrite_ops=True,
                    options={NearestNeighbors: {
                        'largest0': False
                    }},
                    target_opset=to)
            except NameError as e:
                if "Option 'largest0' not in" in str(e):
                    continue
            oinf = OnnxInference(model_def, runtime='python')

            X_test = X_test[:3]
            y = oinf.run({'X': X_test.astype(numpy.float32)})
            dist, ind = clr.kneighbors(X_test)

            self.assertEqual(list(sorted(y)), ['distance', 'index'])
            self.assertEqualArray(ind, y['index'])
            self.assertEqualArray(dist,
                                  DataFrame(y['distance']).values,
                                  decimal=5)
Exemplo n.º 9
0
    def __init__(self,
                 estimator,
                 dim=None,
                 N_fit=100000,
                 runtimes=('python_compiled', 'onnxruntime1'),
                 onnx_options=None,
                 dtype=numpy.float32,
                 **opts):
        """
        @param      estimator       estimator class
        @param      dim             number of features
        @param      N_fit           number of observations to fit an estimator
        @param      runtimes        runtimes to test for class :epkg:`OnnxInference`
        @param      opts            training settings
        @param      onnx_options    ONNX conversion options
        @param      dtype           dtype (float32 or float64)
        """
        # These libraries are optional.
        from skl2onnx import to_onnx  # pylint: disable=E0401,C0415
        from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType  # pylint: disable=E0401,C0415

        if dim is None:
            raise RuntimeError(  # pragma: no cover
                "dim must be defined.")
        BenchPerfTest.__init__(self, **opts)

        allowed = {"max_depth"}
        opts = {k: v for k, v in opts.items() if k in allowed}
        self.dtype = dtype
        self.skl = estimator(**opts)
        X, y = self._get_random_dataset(N_fit, dim)
        try:
            self.skl.fit(X, y)
        except Exception as e:  # pragma: no cover
            raise RuntimeError(
                "X.shape={}\nopts={}\nTraining failed for {}".format(
                    X.shape, opts, self.skl)) from e

        if dtype == numpy.float64:
            initial_types = [('X', DoubleTensorType([None, X.shape[1]]))]
        elif dtype == numpy.float32:
            initial_types = [('X', FloatTensorType([None, X.shape[1]]))]
        else:
            raise ValueError(  # pragma: no cover
                "Unable to convert the model into ONNX, unsupported dtype {}.".
                format(dtype))
        self.logconvert = StringIO()
        with contextlib.redirect_stdout(self.logconvert):
            with contextlib.redirect_stderr(self.logconvert):
                onx = to_onnx(self.skl,
                              initial_types=initial_types,
                              options=onnx_options,
                              target_opset=get_opset_number_from_onnx())
                onx.ir_version = get_ir_version_from_onnx()

        self._init(onx, runtimes)
Exemplo n.º 10
0
 def extract_model_info_onnx(self, **kwargs):
     """
     Populates member ``self.onnx_info`` with additional
     information on the :epkg:`ONNX` graph.
     """
     self.onnx_info = {
         'onnx_nodes': len(self.ort_onnx.graph.node),  # pylint: disable=E1101
         'onnx_opset': get_opset_number_from_onnx(),
     }
     self.onnx_info.update(kwargs)
Exemplo n.º 11
0
    def test_onnx_example_cdist_in_euclidean(self):
        x = numpy.array([1, 2, 4, 5, 5, 4]).astype(numpy.float32).reshape(
            (3, 2))
        x2 = numpy.array([1.1, 2.1, 4.01, 5.01, 5.001, 4.001, 0,
                          0]).astype(numpy.float32).reshape((4, 2))
        cop = OnnxAdd('input',
                      'input',
                      op_version=get_opset_number_from_onnx())
        cop2 = OnnxIdentity(onnx_cdist(
            cop,
            x2,
            dtype=numpy.float32,
            metric='euclidean',
            op_version=get_opset_number_from_onnx()),
                            output_names=['cdist'],
                            op_version=get_opset_number_from_onnx())

        model_def = cop2.to_onnx(inputs=[('input',
                                          FloatTensorType([None, None]))],
                                 outputs=[('cdist', FloatTensorType())],
                                 target_opset=get_opset_number_from_onnx())

        sess = OnnxInference(model_def)
        res = sess.run({'input': x})['cdist']
        exp = scipy_cdist(x * 2, x2, metric="euclidean")
        self.assertEqualArray(exp, res, decimal=5)

        x = numpy.array(
            [[6.1, 2.8, 4.7, 1.2], [5.7, 3.8, 1.7, 0.3], [7.7, 2.6, 6.9, 2.3],
             [6.0, 2.9, 4.5, 1.5], [6.8, 2.8, 4.8, 1.4], [5.4, 3.4, 1.5, 0.4],
             [5.6, 2.9, 3.6, 1.3], [6.9, 3.1, 5.1, 2.3]],
            dtype=numpy.float32)
        cop = OnnxAdd('input',
                      'input',
                      op_version=get_opset_number_from_onnx())
        cop2 = OnnxIdentity(onnx_cdist(
            cop,
            x,
            dtype=numpy.float32,
            op_version=get_opset_number_from_onnx()),
                            output_names=['cdist'],
                            op_version=get_opset_number_from_onnx())

        model_def = cop2.to_onnx(inputs=[('input',
                                          FloatTensorType([None, None]))],
                                 outputs=[('cdist', FloatTensorType())],
                                 target_opset=get_opset_number_from_onnx())

        sess = OnnxInference(model_def)
        res = sess.run({'input': x})['cdist']
        exp = scipy_cdist(x * 2, x, metric="sqeuclidean")
        self.assertEqualArray(exp, res, decimal=4)
 def test_onnxt_runtime_add1(self):
     idi = numpy.identity(2, dtype=numpy.float32)
     onx = OnnxAdd('X', idi, output_names=['Y'],
                   op_version=get_opset_number_from_onnx())
     model_def = onx.to_onnx({'X': idi.astype(numpy.float32)})
     X = numpy.array([[1, 2], [3, 4]], dtype=numpy.float32)
     model_def.ir_version = get_ir_version_from_onnx()
     oinf = OnnxInference(model_def, runtime='onnxruntime1')
     got = oinf.run({'X': X})
     self.assertEqual(list(sorted(got)), ['Y'])
     self.assertEqualArray(idi + X, got['Y'], decimal=6)
Exemplo n.º 13
0
    def test_onnx_example_cdist_in_minkowski(self):
        x = numpy.array([1, 2, 1, 3, 2, 2, 2,
                         3]).astype(numpy.float32).reshape((4, 2))
        x2 = numpy.array([[1, 2], [2, 2], [2.1, 2.1],
                          [2, 2]]).astype(numpy.float32).reshape((4, 2))
        for pp in [1, 2]:
            with self.subTest(pp=pp):
                cop = OnnxIdentity('input',
                                   op_version=get_opset_number_from_onnx())
                cop2 = OnnxIdentity(onnx_cdist(
                    cop,
                    x2,
                    dtype=numpy.float32,
                    metric="minkowski",
                    p=pp,
                    op_version=get_opset_number_from_onnx()),
                                    output_names=['cdist'],
                                    op_version=get_opset_number_from_onnx())

                model_def = cop2.to_onnx(inputs=[('input',
                                                  FloatTensorType([None,
                                                                   None]))],
                                         outputs=[('cdist', FloatTensorType())
                                                  ])

                try:
                    sess = OnnxInference(model_def)
                except RuntimeError as e:
                    raise AssertionError("Issue\n{}".format(model_def)) from e
                res = sess.run({'input': x})['cdist']
                exp = scipy_cdist(x, x2, metric="minkowski", p=pp)
                self.assertEqualArray(exp, res, decimal=5)

        with self.subTest(pp=3):
            x = numpy.array([[6.1, 2.8, 4.7, 1.2], [5.7, 3.8, 1.7, 0.3],
                             [7.7, 2.6, 6.9, 2.3], [6.0, 2.9, 4.5, 1.5],
                             [6.8, 2.8, 4.8, 1.4], [5.4, 3.4, 1.5, 0.4],
                             [5.6, 2.9, 3.6, 1.3], [6.9, 3.1, 5.1, 2.3]],
                            dtype=numpy.float32)
            cop = OnnxAdd('input',
                          'input',
                          op_version=get_opset_number_from_onnx())
            cop2 = OnnxIdentity(onnx_cdist(
                cop,
                x,
                dtype=numpy.float32,
                metric="minkowski",
                p=3,
                op_version=get_opset_number_from_onnx()),
                                output_names=['cdist'],
                                op_version=get_opset_number_from_onnx())

            model_def = cop2.to_onnx(inputs=[('input',
                                              FloatTensorType([None, None]))],
                                     outputs=[('cdist', FloatTensorType())])

            sess = OnnxInference(model_def)
            res = sess.run({'input': x})['cdist']
            exp = scipy_cdist(x * 2, x, metric="minkowski", p=3)
            self.assertEqualArray(exp, res, decimal=4)
Exemplo n.º 14
0
 def test_code_add_except(self):
     idi = numpy.identity(2, dtype=numpy.float32)
     onx = OnnxAdd('X',
                   idi,
                   output_names=['Y'],
                   op_version=get_opset_number_from_onnx())
     model_def = onx.to_onnx({'X': idi.astype(numpy.float32)})
     model_def.ir_version = get_ir_version_from_onnx()
     oinf = OnnxInference(model_def, runtime='onnxruntime1')
     try:
         oinf.to_python()
     except ValueError:
         pass
Exemplo n.º 15
0
    def test_cpu_conv_init(self):
        x = numpy.random.rand(1, 96, 56, 56).astype(numpy.float32)
        W = numpy.random.rand(24, 96, 1, 1).astype(numpy.float32)

        onx = OnnxConv('X',
                       'W',
                       output_names=['Y'],
                       auto_pad='NOTSET',
                       group=1,
                       dilations=[1, 1],
                       kernel_shape=[1, 1],
                       pads=[0, 0, 0, 0],
                       strides=[1, 1],
                       op_version=get_opset_number_from_onnx())
        model_def = onx.to_onnx(
            {
                'X': x.astype(numpy.float32),
                'W': W.astype(numpy.float32)
            },
            target_opset=get_opset_number_from_onnx())
        oinf = OnnxInference(model_def)
        oinfrt = OnnxInference(model_def, runtime='onnxruntime1')
        for _ in range(0, 3):
            x = numpy.random.rand(1, 96, 56, 56).astype(numpy.float32)
            W = numpy.random.rand(24, 96, 1, 1).astype(numpy.float32)
            got = oinf.run({'X': x, 'W': W})
            gotrt = oinfrt.run({'X': x, 'W': W})
            diff = list(numpy.abs((gotrt['Y'] - got['Y']).ravel()))
            sdiff = list(sorted(diff))
            if sdiff[-1] > 1e-5:
                raise AssertionError("runtimes disagree {}".format(sdiff[-5:]))
            for ii in range(len(diff)):  # pylint: disable=C0200
                if numpy.isnan(diff[ii]):
                    raise AssertionError(
                        "runtimes disagree about nan {}: {} # {} ? {}".format(
                            ii, diff[ii], gotrt['Y'].ravel()[ii],
                            got['Y'].ravel()[ii]))
            self.assertEqualArray(gotrt['Y'], got['Y'], decimal=5)
    def test_onnxruntime_bug(self):
        rnd = numpy.random.randn(2, 20, 20).astype(numpy.float32)
        bni = (numpy.random.random((20, 20)).astype(  # pylint: disable=E1101
            numpy.float32) >= 0.7).astype(numpy.float32)
        mul = rnd * bni
        isn = any(numpy.isnan(mul.ravel()))
        self.assertFalse(isn)

        node = OnnxMul('X', bni, output_names=['Y'],
                       op_version=get_opset_number_from_onnx())
        onx = node.to_onnx({'X': rnd})
        for rt in ['python', 'onnxruntime1']:
            with self.subTest(runtime=rt):
                oinf = OnnxInference(onx, runtime=rt)
                y = oinf.run({'X': rnd})['Y']
                self.assertEqualArray(mul, y)
Exemplo n.º 17
0
    def test_model_bernoulli_nb_bc_onnxruntime1(self):
        model, X = self.fit_classification_model(BernoulliNB(), 2)
        model_onnx = convert_sklearn(
            model, "?", [("input", FloatTensorType([None, X.shape[1]]))],
            target_opset=get_opset_number_from_onnx())
        exp1 = model.predict(X)
        exp = model.predict_proba(X)

        model_onnx.ir_version = get_ir_version_from_onnx()
        oinf = _capture_output(
            lambda: OnnxInference(model_onnx, runtime='onnxruntime1'),
            'c')[0]
        got = oinf.run({'input': X})
        self.assertEqualArray(exp1, got['output_label'])
        got2 = DataFrame(got['output_probability']).values
        self.assertEqualArray(exp, got2, decimal=4)
Exemplo n.º 18
0
 def test_model_extra_trees_classifier_multilabel(self):
     model, X_test = fit_multilabel_classification_model(
         ExtraTreesClassifier(random_state=42, n_estimators=10))
     options = {id(model): {'zipmap': False}}
     model_onnx = convert_sklearn(
         model,
         "scikit-learn ExtraTreesClassifier",
         [("input", FloatTensorType([None, X_test.shape[1]]))],
         options=options,
         target_opset=get_opset_number_from_onnx())
     self.assertTrue(model_onnx is not None)
     self.assertNotIn('zipmap', str(model_onnx).lower())
     dump_data_and_model(
         X_test,
         model,
         model_onnx,
         basename="SklearnExtraTreesClassifierMultiLabel-Out0",
         folder=self.folder)
    def test_validate_sklearn_operators_dump_all(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        logger = getLogger('skl2onnx')
        logger.disabled = True
        verbose = 1 if __name__ == "__main__" else 0
        temp = get_temp_folder(__file__,
                               "temp_validate_sklearn_operators_dump_all")
        self.assertRaise(
            lambda: list(
                enumerate_validated_operator_opsets(
                    verbose,
                    models={"DecisionTreeClassifier"},
                    filter_exp=lambda m, p: '64' not in p,
                    fLOG=fLOG,
                    dump_all=True)), ValueError)
        rows = list(
            enumerate_validated_operator_opsets(
                verbose,
                models={"DecisionTreeClassifier"},
                filter_exp=lambda m, p: '64' not in p,
                fLOG=fLOG,
                dump_all=True,
                dump_folder=temp))
        self.assertGreater(len(rows), 1)
        df = DataFrame(rows)
        self.assertGreater(df.shape[1], 1)
        fLOG("output results")
        df.to_csv(os.path.join(temp, "sklearn_opsets_report.csv"), index=False)
        df.to_excel(os.path.join(temp, "sklearn_opsets_report.xlsx"),
                    index=False)

        stored = os.path.join(
            temp,
            ("dump-i-python-DecisionTreeClassifier-default-b-cl-tree._classes."
             "DecisionTreeClassifierzipmapFalse-op%d-nf4.pkl" %
             get_opset_number_from_onnx()))
        with open(stored, "rb") as f:
            obj = pickle.load(f)
        self.assertIn('onnx_bytes', obj)
        self.assertIn('skl_model', obj)
        self.assertIn('X_test', obj)
        self.assertIn('Xort_test', obj)
    def test_validate_sklearn_operators_onnxruntime_KMeans(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        logger = getLogger('skl2onnx')
        logger.disabled = True
        verbose = 1 if __name__ == "__main__" else 0

        buffer = []

        def myprint(*args, **kwargs):
            buffer.append(" ".join(map(str, args)))

        op = get_opset_number_from_onnx()
        rows = list(enumerate_validated_operator_opsets(
            verbose, models={"KMeans"},
            fLOG=myprint,
            runtime='onnxruntime2', debug=True,
            filter_exp=lambda m, p: '-64' not in p,
            opset_min=op, opset_max=op))
        self.assertGreater(len(rows), 1)
Exemplo n.º 21
0
 def test_template_benchmark_transformPositive(self):
     if not os.path.exists('_cache'):
         os.mkdir('_cache')
     cl = TemplateBenchmarkTransformPositive()
     res = {}
     cl.setup_cache()
     N = 60
     nf = cl.params[2][1]
     opset = get_opset_number_from_onnx()
     dtype = 'float'
     expect = 12
     optim = None
     for runtime in ['skl', 'pyrt']:
         try:
             cl.setup(runtime, N, nf, opset, dtype, optim)
         except MissingShapeCalculator:
             # Converter not yet implemented.
             expect = 0
             continue
         self.assertEqual(cl.X.shape, (N, nf))
         for method in dir(cl):
             if method.split('_')[0] in ('time', 'peakmem', 'track'):
                 meth = getattr(cl.__class__, method)
                 res[method, runtime] = meth(cl, runtime, N, nf, opset,
                                             dtype, optim)
                 if method == 'track_score' and res[method,
                                                    runtime] in (0, 1):
                     raise AssertionError(
                         "Predictions are too perfect: {},{}: {}".format(
                             method, runtime, res[method, runtime]))
     if expect == 0:
         return
     self.assertEqual(len(res), expect)
     exp = [('time_predict', 'skl'), ('peakmem_predict', 'skl'),
            ('track_score', 'skl'), ('track_onnxsize', 'skl'),
            ('time_predict', 'pyrt'), ('peakmem_predict', 'pyrt'),
            ('track_score', 'pyrt'), ('track_onnxsize', 'pyrt'),
            ('track_nbnodes', 'skl'), ('track_opset', 'skl'),
            ('track_opset', 'pyrt'), ('track_nbnodes', 'pyrt')]
     self.assertEqual(set(_ for _ in exp if not _[0].startswith('track_v')),
                      set(_ for _ in res if not _[0].startswith('track_v')))
    def test_onnxruntime_knn_radius(self):
        def _get_reg_data(self, n, n_features, n_targets, n_informative=10):
            X, y = make_regression(  # pylint: disable=W0632
                n, n_features=n_features, random_state=0,
                n_targets=n_targets, n_informative=n_informative)
            return X, y

        def _fit_model(model, n_targets=1, label_int=False,
                       n_informative=10):
            X, y = _get_reg_data(20, 4, n_targets, n_informative)
            if label_int:
                y = y.astype(numpy.int64)
            model.fit(X, y)
            return model, X

        model, X = _fit_model(RadiusNeighborsRegressor())
        model_onnx = to_onnx(
            model, X[:1].astype(numpy.float32),
            target_opset=get_opset_number_from_onnx(),
            options={id(model): {'optim': 'cdist'}})
        oinf = OnnxInference(model_onnx, runtime='onnxruntime1')
        X = X[:7]
        got = oinf.run({'X': X.astype(numpy.float32)})['variable']
        exp = model.predict(X.astype(numpy.float32))
        if any(numpy.isnan(got.ravel())):
            # The model is unexpectedly producing nan values
            # sometimes.
            res = oinf.run({'X': X.astype(numpy.float32)}, intermediate=True)
            rows = ['--EXP--', str(exp), '--GOT--', str(got),
                    '--EVERY-OUTPUT--']
            for k, v in res.items():
                rows.append('-%s-' % k)
                rows.append(str(v))
            if any(map(numpy.isnan, res["variable"].ravel())):
                raise AssertionError('\n'.join(rows))
            # onnxruntime and mlprodict do not return the same
            # output
            warnings.warn('\n'.join(rows))
            return
        self.assertEqualArray(exp, got, decimal=4)
Exemplo n.º 23
0
class TemplateBenchmarkClassifier(_CommonAsvSklBenchmarkClassifier):
    """
    :epkg:`asv` test for a classifier,
    Full template can be found in
    `common_asv_skl.py <https://github.com/sdpython/mlprodict/
    blob/master/mlprodict/asv_benchmark/common_asv_skl.py>`_.
    """
    params = [
        ['skl', 'pyrtc', 'ort'],  # values for runtime
        [1, 10, 100, 1000, 10000, 100000],  # values for N
        [4, 20],  # values for nf
        [get_opset_number_from_onnx()],  # values for opset
        ['float', 'double'],  # values for dtype
        [None],  # values for optim
    ]

    # additional parameters

    def setup_cache(self):  # pylint: disable=W0235
        super().setup_cache()

    def _create_model(self):
        return LogisticRegression(multi_class='ovr', solver='liblinear')
Exemplo n.º 24
0
class TemplateBenchmarkTransform(_CommonAsvSklBenchmarkTransform):
    """
    :epkg:`asv` example for a transform,
    Full template can be found in
    `common_asv_skl.py <https://github.com/sdpython/mlprodict/blob/
    master/mlprodict/asv_benchmark/common_asv_skl.py>`_.
    """
    params = [
        ['skl', 'pyrtc', 'ort'],  # values for runtime
        [1, 10, 100, 1000, 10000, 100000],  # values for N
        [4, 20],  # values for nf
        [get_opset_number_from_onnx()],  # values for opset
        ['float', 'double'],  # values for dtype
        [None],  # values for optim
    ]

    # additional parameters

    def setup_cache(self):  # pylint: disable=W0235
        super().setup_cache()

    def _create_model(self):
        return Normalizer()
Exemplo n.º 25
0
 def test_validate_sklearn_operators_benchmark_errors(self):
     fLOG(__file__,
          self._testMethodName,
          OutputPrint=__name__ == "__main__")
     logger = getLogger('skl2onnx')
     logger.disabled = True
     verbose = 1 if __name__ == "__main__" else 0
     temp = get_temp_folder(
         __file__, "temp_validate_sklearn_operators_benchmark_summary")
     rows = list(
         enumerate_validated_operator_opsets(
             verbose,
             models={"RFE", "DecisionTreeRegressor"},
             opset_min=10,
             benchmark=True,
             fLOG=fLOG))
     self.assertGreater(len(rows), 1)
     df = DataFrame(rows)
     for col in ['skl', 'batch']:
         self.assertIn('lambda-' + col, df.columns)
     for col in ['1', '10']:
         self.assertIn('time-ratio-N=' + col, df.columns)
     self.assertGreater(df.shape[1], 1)
     self.assertGreater(df.loc[0, "tostring_time"], 0)
     piv = summary_report(df)
     self.assertGreater(piv.shape[1], 1)
     self.assertIn('RT/SKL-N=1', piv.columns)
     self.assertNotIn('RT/SKL-N=10', piv.columns)
     self.assertIn('N=10', piv.columns)
     fLOG("output results")
     ops = 'opset%d' % get_opset_number_from_onnx()
     li = len(piv[ops].notnull())
     self.assertEqual(li, piv.shape[0])
     df.to_excel(os.path.join(temp, "sklearn_opsets_report.xlsx"),
                 index=False)
     piv.to_excel(os.path.join(temp, "sklearn_opsets_summary.xlsx"),
                  index=False)
Exemplo n.º 26
0
def _enumerate_asv_benchmark_all_models(  # pylint: disable=R0914
        location,
        opset_min=10,
        opset_max=None,
        runtime=('scikit-learn', 'python'),
        models=None,
        skip_models=None,
        extended_list=True,
        n_features=None,
        dtype=None,
        verbose=0,
        filter_exp=None,
        dims=None,
        filter_scenario=None,
        exc=True,
        flat=False,
        execute=False,
        dest_pyspy=None,
        fLOG=print):
    """
    Loops over all possible models and fills a folder
    with benchmarks following :epkg:`asv` concepts.

    :param n_features: number of features to try
    :param dims: number of observations to try
    :param verbose: integer from 0 (None) to 2 (full verbose)
    :param opset_min: tries every conversion from this minimum opset
    :param opset_max: tries every conversion up to maximum opset
    :param runtime: runtime to check, *scikit-learn*, *python*,
        *onnxruntime1* to check :epkg:`onnxruntime`,
        *onnxruntime2* to check every ONNX node independently
        with onnxruntime, many runtime can be checked at the same time
        if the value is a comma separated list
    :param models: list of models to test or empty
        string to test them all
    :param skip_models: models to skip
    :param extended_list: extends the list of :epkg:`scikit-learn` converters
        with converters implemented in this module
    :param n_features: change the default number of features for
        a specific problem, it can also be a comma separated list
    :param dtype: '32' or '64' or None for both,
        limits the test to one specific number types
    :param fLOG: logging function
    :param filter_exp: function which tells if the experiment must be run,
        None to run all, takes *model, problem* as an input
    :param filter_scenario: second function which tells if the experiment must be run,
        None to run all, takes *model, problem, scenario, extra*
        as an input
    :param exc: if False, raises warnings instead of exceptions
        whenever possible
    :param flat: one folder for all files or subfolders
    :param execute: execute each script to make sure
        imports are correct
    :param dest_pyspy: add a file to profile the prediction
        function with :epkg:`pyspy`
    """

    ops = [_ for _ in sklearn_operators(extended=extended_list)]
    patterns = _read_patterns()

    if models is not None:
        if not all(map(lambda m: isinstance(m, str), models)):
            raise ValueError(
                "models must be a set of strings.")  # pragma: no cover
        ops_ = [_ for _ in ops if _['name'] in models]
        if len(ops) == 0:
            raise ValueError(
                "Parameter models is wrong: {}\n{}".format(  # pragma: no cover
                    models, ops[0]))
        ops = ops_
    if skip_models is not None:
        ops = [m for m in ops if m['name'] not in skip_models]

    if verbose > 0:

        def iterate():
            for i, row in enumerate(ops):  # pragma: no cover
                fLOG("{}/{} - {}".format(i + 1, len(ops), row))
                yield row

        if verbose >= 11:
            verbose -= 10  # pragma: no cover
            loop = iterate()  # pragma: no cover
        else:
            try:
                from tqdm import trange

                def iterate_tqdm():
                    with trange(len(ops)) as t:
                        for i in t:
                            row = ops[i]
                            disp = row['name'] + " " * (28 - len(row['name']))
                            t.set_description("%s" % disp)
                            yield row

                loop = iterate_tqdm()

            except ImportError:  # pragma: no cover
                loop = iterate()
    else:
        loop = ops

    if opset_max is None:
        opset_max = get_opset_number_from_onnx()
    opsets = list(range(opset_min, opset_max + 1))
    all_created = set()

    # loop on all models
    for row in loop:

        model = row['cl']

        problems, extras = _retrieve_problems_extra(model, verbose, fLOG,
                                                    extended_list)
        if extras is None or problems is None:
            # Not tested yet.
            continue

        # flat or not flat
        created, location_model, prefix_import, dest_pyspy_model = _handle_init_files(
            model, flat, location, verbose, dest_pyspy, fLOG)
        for init in created:
            yield init

        # loops on problems
        for prob in problems:
            if filter_exp is not None and not filter_exp(model, prob):
                continue

            (X_train, X_test, y_train, y_test, Xort_test, init_types,
             conv_options, method_name, output_index, dofit,
             predict_kwargs) = _get_problem_data(prob, None)

            for scenario_extra in extras:
                subset_problems = None
                optimisations = None
                new_conv_options = None

                if len(scenario_extra) > 2:
                    options = scenario_extra[2]
                    if isinstance(options, dict):
                        subset_problems = options.get('subset_problems', None)
                        optimisations = options.get('optim', None)
                        new_conv_options = options.get('conv_options', None)
                    else:
                        subset_problems = options

                if subset_problems and isinstance(subset_problems,
                                                  (list, set)):
                    if prob not in subset_problems:
                        # Skips unrelated problem for a specific configuration.
                        continue
                elif subset_problems is not None:
                    raise RuntimeError(  # pragma: no cover
                        "subset_problems must be a set or a list not {}.".
                        format(subset_problems))

                scenario, extra = scenario_extra[:2]
                if optimisations is None:
                    optimisations = [None]
                if new_conv_options is None:
                    new_conv_options = [{}]

                if (filter_scenario is not None and not filter_scenario(
                        model, prob, scenario, extra, new_conv_options)):
                    continue

                if verbose >= 3 and fLOG is not None:
                    fLOG(
                        "[create_asv_benchmark] model={} scenario={} optim={} extra={} dofit={} (problem={} method_name='{}')"
                        .format(model.__name__, scenario, optimisations, extra,
                                dofit, prob, method_name))
                created = _create_asv_benchmark_file(
                    location_model,
                    opsets=opsets,
                    model=model,
                    scenario=scenario,
                    optimisations=optimisations,
                    extra=extra,
                    dofit=dofit,
                    problem=prob,
                    runtime=runtime,
                    new_conv_options=new_conv_options,
                    X_train=X_train,
                    X_test=X_test,
                    y_train=y_train,
                    y_test=y_test,
                    Xort_test=Xort_test,
                    init_types=init_types,
                    conv_options=conv_options,
                    method_name=method_name,
                    dims=dims,
                    n_features=n_features,
                    output_index=output_index,
                    predict_kwargs=predict_kwargs,
                    exc=exc,
                    prefix_import=prefix_import,
                    execute=execute,
                    location_pyspy=dest_pyspy_model,
                    patterns=patterns)
                for cr in created:
                    if cr in all_created:
                        raise RuntimeError(
                            "File '{}' was already created.".format(cr))
                    all_created.add(cr)
                    if verbose > 1 and fLOG is not None:
                        fLOG("[create_asv_benchmark] add '{}'.".format(cr))
                    yield cr
Exemplo n.º 27
0
class _CommonAsvSklBenchmark:
    """
    Common tests to all benchmarks testing converted
    :epkg:`scikit-learn` models. See `benchmark attributes
    <https://asv.readthedocs.io/en/stable/benchmarks.html#general>`_.
    """

    # Part which changes.
    # params and param_names may be changed too.

    params = [
        ['skl', 'pyrtc', 'ort'],  # values for runtime
        [1, 10, 100, 10000, 100000],  # values for N
        [4, 20],  # values for nf
        [get_opset_number_from_onnx()],  # values for opset
        ["float", "double"],  # values for dtype
        [None],  # values for optim
    ]
    param_names = ['rt', 'N', 'nf', 'opset', 'dtype', 'optim']
    chk_method_name = None
    version = datetime.now().isoformat()
    pretty_source = "disabled"

    par_ydtype = numpy.int64
    par_dofit = True
    par_convopts = None

    def _create_model(self):  # pragma: no cover
        raise NotImplementedError("This method must be overwritten.")

    def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype,
                                 optim):  # pragma: no cover
        raise NotImplementedError("This method must be overwritten.")

    def _score_metric(self, X, y_exp, y_pred):  # pragma: no cover
        raise NotImplementedError("This method must be overwritten.")

    def _optimize_onnx(self, onx):
        return onx

    def _get_xdtype(self, dtype):
        if dtype in ('float', numpy.float32):
            return numpy.float32
        elif dtype in ('double', numpy.float64):
            return numpy.float64
        raise ValueError(  # pragma: no cover
            "Unknown dtype '{}'.".format(dtype))

    def _get_dataset(self, nf, dtype):
        xdtype = self._get_xdtype(dtype)
        data = load_iris()
        X, y = data.data, data.target
        state = numpy.random.RandomState(seed=34)  # pylint: disable=E1101
        rnd = state.randn(*X.shape) / 3
        X += rnd
        X = _modify_dimension(X, nf)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=42)
        Xt = X_test.astype(xdtype)
        yt = y_test.astype(self.par_ydtype)
        return (X_train, y_train), (Xt, yt)

    def _to_onnx(self, model, X, opset, dtype, optim):
        if optim is None or len(optim) == 0:
            options = self.par_convopts
        elif self.par_convopts and len(self.par_convopts) > 0:
            raise NotImplementedError(  # pragma: no cover
                "Conflict between par_convopts={} and optim={}".format(
                    self.par_convopts, optim))
        else:
            # Expand common onnx options, see _nick_name_options.
            options = expand_onnx_options(model, optim)

        if dtype in (numpy.float64, 'double'):
            return to_onnx(model, X, options=options, target_opset=opset)
        return to_onnx(model, X, options=options, target_opset=opset)

    def _create_onnx_inference(self, onx, runtime):
        if 'onnxruntime' in runtime:
            old = onx.ir_version
            onx.ir_version = get_ir_version_from_onnx()
        else:
            old = None

        try:
            res = OnnxInference(onx, runtime=runtime)
        except RuntimeError as e:  # pragma: no cover
            if "[ONNXRuntimeError]" in str(e):
                return RuntimeError("onnxruntime fails due to {}".format(
                    str(e)))
            raise e
        if old is not None:
            onx.ir_version = old
        return res

    # Part which does not change.

    def runtime_name(self, runtime):
        """
        Returns the runtime shortname.
        """
        if runtime == 'skl':
            name = runtime
        elif runtime == 'ort':
            name = 'onnxruntime1'
        elif runtime == 'ort2':
            name = 'onnxruntime2'
        elif runtime == 'pyrt':
            name = 'python'
        elif runtime == 'pyrtc':
            name = 'python_compiled'
        else:
            raise ValueError(  # pragma: no cover
                "Unknown runtime '{}'.".format(runtime))
        return name

    def _name(self, nf, opset, dtype):
        last = 'cache-{}-nf{}-op{}-dt{}.pickle'.format(self.__class__.__name__,
                                                       nf, opset, dtype)
        return last

    def setup_cache(self):
        "asv API"
        for dtype in self.params[4]:
            for opv in self.params[3]:
                for nf in self.params[2]:
                    (X_train, y_train), (X, y) = self._get_dataset(nf, dtype)
                    model = self._create_model()
                    if self.par_dofit:
                        set_random_state(model)
                        model.fit(X_train, y_train)
                    stored = {'model': model, 'X': X, 'y': y}
                    filename = self._name(nf, opv, dtype)
                    with open(filename, "wb") as f:
                        pickle.dump(stored, f)
                    if not os.path.exists(filename):
                        raise RuntimeError(  # pragma: no cover
                            "Unable to dump model %r into %r." %
                            (model, filename))

    def setup(self, runtime, N, nf, opset, dtype, optim):
        "asv API"
        logger = getLogger('skl2onnx')
        logger.disabled = True
        register_converters()
        register_rewritten_operators()
        with open(self._name(nf, opset, dtype), "rb") as f:
            stored = pickle.load(f)
        self.stored = stored
        self.model = stored['model']
        self.X, self.y = make_n_rows(stored['X'], N, stored['y'])
        onx, rt_, rt_fct_, rt_fct_track_ = self._create_onnx_and_runtime(
            runtime, self.model, self.X, opset, dtype, optim)
        self.onx = onx
        setattr(self, "rt_" + runtime, rt_)
        setattr(self, "rt_fct_" + runtime, rt_fct_)
        setattr(self, "rt_fct_track_" + runtime, rt_fct_track_)
        set_config(assume_finite=True)

    def time_predict(self, runtime, N, nf, opset, dtype, optim):
        "asv API"
        return getattr(self, "rt_fct_" + runtime)(self.X)

    def peakmem_predict(self, runtime, N, nf, opset, dtype, optim):
        "asv API"
        return getattr(self, "rt_fct_" + runtime)(self.X)

    def track_score(self, runtime, N, nf, opset, dtype, optim):
        "asv API"
        yp = getattr(self, "rt_fct_track_" + runtime)(self.X)
        return self._score_metric(self.X, self.y, yp)

    def track_onnxsize(self, runtime, N, nf, opset, dtype, optim):
        "asv API"
        return len(self.onx.SerializeToString())

    def track_nbnodes(self, runtime, N, nf, opset, dtype, optim):
        "asv API"
        stats = onnx_statistics(self.onx)
        return stats.get('nnodes', 0)

    def track_vmlprodict(self, runtime, N, nf, opset, dtype, optim):
        "asv API"
        from mlprodict import __version__
        return version2number(__version__)

    def track_vsklearn(self, runtime, N, nf, opset, dtype, optim):
        "asv API"
        from sklearn import __version__
        return version2number(__version__)

    def track_vort(self, runtime, N, nf, opset, dtype, optim):
        "asv API"
        try:
            from onnxruntime import __version__
            return version2number(__version__)
        except ImportError:  # pragma: no cover
            return 0

    def check_method_name(self, method_name):
        "Does some verifications. Fails if inconsistencies."
        if getattr(self, 'chk_method_name', None) not in (None, method_name):
            raise RuntimeError(  # pragma: no cover
                "Method name must be '{}'.".format(method_name))
        if getattr(self, 'chk_method_name', None) is None:
            raise RuntimeError(  # pragma: no cover
                "Unable to check that the method name is correct "
                "(expected is '{}')".format(method_name))
Exemplo n.º 28
0
    def test_cpu_conv_group(self):
        x = numpy.random.rand(1, 3, 3, 4).astype(numpy.float32)
        W = numpy.random.rand(9, 1, 3, 3).astype(numpy.float32)

        onx = OnnxConv('X',
                       'W',
                       output_names=['Y'],
                       auto_pad='NOTSET',
                       group=3,
                       dilations=[1, 1],
                       kernel_shape=[3, 3],
                       strides=[1, 1],
                       op_version=get_opset_number_from_onnx())
        model_def = onx.to_onnx(
            {
                'X': x.astype(numpy.float32),
                'W': W.astype(numpy.float32)
            },
            target_opset=get_opset_number_from_onnx())
        oinf = OnnxInference(model_def)
        oinfrt = OnnxInference(model_def, runtime='onnxruntime1')
        d = oinf.sequence_[-1].ops_.atts_value
        self.assertIsInstance(d, dict)
        self.assertEqual(d['kernel_shape'].tolist(), [3, 3])

        xs = [
            numpy.random.rand(1, 3, 3, 4).astype(numpy.float32),
            numpy.array([
                1.0, 4.0, 7.0, 10.0, 13.0, 16.0, 19.0, 22.0, 25.0, 28.0, 31.0,
                34.0, 2.0, 5.0, 8.0, 11.0, 14.0, 17.0, 20.0, 23.0, 26.0, 29.0,
                32.0, 35.0, 3.0, 6.0, 9.0, 12.0, 15.0, 18.0, 21.0, 24.0, 27.0,
                30.0, 33.0, 36.0
            ],
                        dtype=numpy.float32).reshape((1, 3, 3, 4))
        ]
        Ws = [
            numpy.random.rand(9, 1, 3, 3).astype(numpy.float32),
            numpy.array([
                1.0, 10.0, 19.0, 28.0, 37.0, 46.0, 55.0, 64.0, 73.0, 2.0, 11.0,
                20.0, 29.0, 38.0, 47.0, 56.0, 65.0, 74.0, 3.0, 12.0, 21.0,
                30.0, 39.0, 48.0, 57.0, 66.0, 75.0, 4.0, 13.0, 22.0, 31.0,
                40.0, 49.0, 58.0, 67.0, 76.0, 5.0, 14.0, 23.0, 32.0, 41.0,
                50.0, 59.0, 68.0, 77.0, 6.0, 15.0, 24.0, 33.0, 42.0, 51.0,
                60.0, 69.0, 78.0, 7.0, 16.0, 25.0, 34.0, 43.0, 52.0, 61.0,
                70.0, 79.0, 8.0, 17.0, 26.0, 35.0, 44.0, 53.0, 62.0, 71.0,
                80.0, 9.0, 18.0, 27.0, 36.0, 45.0, 54.0, 63.0, 72.0, 81.0
            ],
                        dtype=numpy.float32).reshape((9, 1, 3, 3))
        ]

        for x, W in zip(xs, Ws):
            x = numpy.asfortranarray(x)
            W = numpy.asfortranarray(W)
            got = oinf.run({'X': x, 'W': W})
            gotrt = oinfrt.run({'X': x, 'W': W})
            diff = list(numpy.abs((gotrt['Y'] - got['Y']).ravel()))
            sdiff = list(sorted(diff))
            if sdiff[-1] > 1e-5:
                raise AssertionError("runtimes disagree {}".format(sdiff[-5:]))
            for ii in range(len(diff)):  # pylint: disable=C0200
                if numpy.isnan(diff[ii]):
                    raise AssertionError(
                        "runtimes disagree about nan {}: {} # {} ? {}".format(
                            ii, diff[ii], gotrt['Y'].ravel()[ii],
                            got['Y'].ravel()[ii]))
            self.assertEqualArray(gotrt['Y'], got['Y'], decimal=5)
Exemplo n.º 29
0
    def onnx_test_knn_single_classreg(self,
                                      dtype,
                                      n_targets=1,
                                      debug=False,
                                      add_noise=False,
                                      runtime='python',
                                      target_opset=None,
                                      optim=None,
                                      kind='reg',
                                      level=1,
                                      largest0=True,
                                      metric_params=None,
                                      **kwargs):
        iris = load_iris()
        X, y = iris.data, iris.target
        if add_noise:
            X += numpy.random.randn(X.shape[0], X.shape[1]) * 10
        if kind == 'reg':
            y = y.astype(dtype)
        elif kind == 'bin':
            y = (y % 2).astype(numpy.int64)
        elif kind == 'mcl':
            y = y.astype(numpy.int64)
        else:
            raise AssertionError("unknown '{}'".format(kind))

        if n_targets != 1:
            yn = numpy.empty((y.shape[0], n_targets), dtype=dtype)
            for i in range(n_targets):
                yn[:, i] = y + i
            y = yn
        X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11)
        X_test = X_test.astype(dtype)
        if kind in ('bin', 'mcl'):
            clr = KNeighborsClassifier(metric_params=metric_params, **kwargs)
        elif kind == 'reg':
            clr = KNeighborsRegressor(metric_params=metric_params, **kwargs)
        else:
            raise NotImplementedError(kind)
        clr.fit(X_train, y_train)

        if optim is None:
            options = None
        else:
            options = {clr.__class__: {'optim': 'cdist'}}
        if not largest0:
            if options is None:
                options = {}
            if clr.__class__ not in options:
                options[clr.__class__] = {}
            options[clr.__class__].update({'largest0': False})

        if target_opset is None:
            opsets = list(
                sorted(set([9, 10, 11, 12,
                            get_opset_number_from_onnx()])))
        else:
            opsets = [target_opset]
        for ops in opsets:
            if ops is None:
                raise AssertionError("Cannot happen: {}.".format(opsets))
            with self.subTest(target_opset=ops):
                try:
                    model_def = to_onnx(clr,
                                        X_train.astype(dtype),
                                        rewrite_ops=True,
                                        target_opset=ops,
                                        options=options)
                except NameError as e:
                    if "Option 'largest0' not in" in str(e):
                        continue
                if 'onnxruntime' in runtime:
                    model_def.ir_version = get_ir_version_from_onnx()
                try:
                    if runtime == 'onnxruntime2':
                        oinf = _capture_output(
                            lambda: OnnxInference(model_def, runtime=runtime),  # pylint: disable=W0640
                            'c')[0]
                    else:
                        oinf = OnnxInference(model_def, runtime=runtime)
                except (RuntimeError, TypeError, OrtInvalidArgument) as e:
                    if "No Op registered for Identity with domain_version of 12" in str(
                            e):
                        continue
                    if debug:
                        raise AssertionError(
                            "Unable to create a model for target_opset={}\n----\n{}\n----"
                            .format(ops,
                                    str(model_def)[:100])) from e
                    if "Unknown model file format version." in str(e):
                        continue
                    raise AssertionError(
                        "Unable to create model for opset={} and runtime='{}'\n{}"
                        "".format(ops, runtime,
                                  str(model_def)[:100])) from e

                if debug:
                    y = oinf.run({'X': X_test}, verbose=level, fLOG=print)
                else:
                    y = oinf.run({'X': X_test})

                lexp = clr.predict(X_test)
                if kind == 'reg':
                    self.assertEqual(list(sorted(y)), ['variable'])
                    if dtype == numpy.float32:
                        self.assertEqualArray(lexp,
                                              y['variable'],
                                              decimal=5,
                                              squeeze=True)
                    else:
                        self.assertEqualArray(lexp,
                                              y['variable'],
                                              squeeze=True)
                else:
                    self.assertEqual(list(sorted(y)),
                                     ['output_label', 'output_probability'])
                    self.assertEqualArray(lexp, y['output_label'])
                    lprob = clr.predict_proba(X_test)
                    self.assertEqualArray(lprob,
                                          DataFrame(
                                              y['output_probability']).values,
                                          decimal=5)
Exemplo n.º 30
0
def create_asv_benchmark(location,
                         opset_min=-1,
                         opset_max=None,
                         runtime=('scikit-learn', 'python_compiled'),
                         models=None,
                         skip_models=None,
                         extended_list=True,
                         dims=(1, 10, 100, 10000, 100000),
                         n_features=(4, 20),
                         dtype=None,
                         verbose=0,
                         fLOG=print,
                         clean=True,
                         conf_params=None,
                         filter_exp=None,
                         filter_scenario=None,
                         flat=False,
                         exc=False,
                         build=None,
                         execute=False,
                         add_pyspy=False,
                         env=None,
                         matrix=None):
    """
    Creates an :epkg:`asv` benchmark in a folder
    but does not run it.

    :param n_features: number of features to try
    :param dims: number of observations to try
    :param verbose: integer from 0 (None) to 2 (full verbose)
    :param opset_min: tries every conversion from this minimum opset,
        -1 to get the current opset defined by module :epkg:`onnx`
    :param opset_max: tries every conversion up to maximum opset,
        -1 to get the current opset defined by module :epkg:`onnx`
    :param runtime: runtime to check, *scikit-learn*, *python*,
        *python_compiled* compiles the graph structure
        and is more efficient when the number of observations is
        small, *onnxruntime1* to check :epkg:`onnxruntime`,
        *onnxruntime2* to check every ONNX node independently
        with onnxruntime, many runtime can be checked at the same time
        if the value is a comma separated list
    :param models: list of models to test or empty
        string to test them all
    :param skip_models: models to skip
    :param extended_list: extends the list of :epkg:`scikit-learn` converters
        with converters implemented in this module
    :param n_features: change the default number of features for
        a specific problem, it can also be a comma separated list
    :param dtype: '32' or '64' or None for both,
        limits the test to one specific number types
    :param fLOG: logging function
    :param clean: clean the folder first, otherwise overwrites the content
    :param conf_params: to overwrite some of the configuration parameters
    :param filter_exp: function which tells if the experiment must be run,
        None to run all, takes *model, problem* as an input
    :param filter_scenario: second function which tells if the experiment must be run,
        None to run all, takes *model, problem, scenario, extra*
        as an input
    :param flat: one folder for all files or subfolders
    :param exc: if False, raises warnings instead of exceptions
        whenever possible
    :param build: where to put the outputs
    :param execute: execute each script to make sure
        imports are correct
    :param add_pyspy: add an extra folder with code to profile
        each configuration
    :param env: None to use the default configuration or ``same`` to use
        the current one
    :param matrix: specifies versions for a module,
        example: ``{'onnxruntime': ['1.1.1', '1.1.2']}``,
        if a package name starts with `'~'`, the package is removed
    :return: created files

    The default configuration is the following:

    .. runpython::
        :showcode:

        import pprint
        from mlprodict.asv_benchmark.create_asv import default_asv_conf

        pprint.pprint(default_asv_conf)

    The benchmark does not seem to work well with setting
    ``-environment existing:same``. The publishing fails.
    """
    if opset_min == -1:
        opset_min = get_opset_number_from_onnx()
    if opset_max == -1:
        opset_max = get_opset_number_from_onnx()  # pragma: no cover
    if verbose > 0 and fLOG is not None:  # pragma: no cover
        fLOG("[create_asv_benchmark] opset in [{}, {}].".format(
            opset_min, opset_max))

    # creates the folder if it does not exist.
    if not os.path.exists(location):
        if verbose > 0 and fLOG is not None:
            fLOG("[create_asv_benchmark] create folder '{}'.".format(location))
        os.makedirs(location)

    location_test = os.path.join(location, 'benches')
    if not os.path.exists(location_test):
        if verbose > 0 and fLOG is not None:
            fLOG("[create_asv_benchmark] create folder '{}'.".format(
                location_test))
        os.mkdir(location_test)

    # Cleans the content of the folder
    created = []
    if clean:
        for name in os.listdir(location_test):
            full_name = os.path.join(location_test, name)
            if os.path.isfile(full_name):
                os.remove(full_name)

    # configuration
    conf = default_asv_conf.copy()
    if conf_params is not None:
        for k, v in conf_params.items():
            conf[k] = v
    if build is not None:
        for fi in ['env_dir', 'results_dir', 'html_dir']:
            conf[fi] = os.path.join(build, conf[fi])
    if env == 'same':
        if matrix is not None:
            raise ValueError("Parameter matrix must be None if env is 'same'.")
        conf['pythons'] = ['same']
        conf['matrix'] = {}
    elif matrix is not None:
        drop_keys = set(p for p in matrix if p.startswith('~'))
        matrix = {k: v for k, v in matrix.items() if k not in drop_keys}
        conf['matrix'] = {
            k: v
            for k, v in conf['matrix'].items() if k not in drop_keys
        }
        conf['matrix'].update(matrix)
    elif env is not None:
        raise ValueError(  # pragma: no cover
            "Unable to handle env='{}'.".format(env))
    dest = os.path.join(location, "asv.conf.json")
    created.append(dest)
    with open(dest, "w", encoding='utf-8') as f:
        json.dump(conf, f, indent=4)
    if verbose > 0 and fLOG is not None:
        fLOG("[create_asv_benchmark] create 'asv.conf.json'.")

    # __init__.py
    dest = os.path.join(location, "__init__.py")
    with open(dest, "w", encoding='utf-8') as f:
        pass
    created.append(dest)
    if verbose > 0 and fLOG is not None:
        fLOG("[create_asv_benchmark] create '__init__.py'.")
    dest = os.path.join(location_test, '__init__.py')
    with open(dest, "w", encoding='utf-8') as f:
        pass
    created.append(dest)
    if verbose > 0 and fLOG is not None:
        fLOG("[create_asv_benchmark] create 'benches/__init__.py'.")

    # flask_server
    tool_dir = os.path.join(location, 'tools')
    if not os.path.exists(tool_dir):
        os.mkdir(tool_dir)
    fl = os.path.join(tool_dir, 'flask_serve.py')
    with open(fl, "w", encoding='utf-8') as f:
        f.write(flask_helper)
    if verbose > 0 and fLOG is not None:
        fLOG("[create_asv_benchmark] create 'flask_serve.py'.")

    # command line
    if sys.platform.startswith("win"):
        run_bash = os.path.join(tool_dir, 'run_asv.bat')  # pragma: no cover
    else:
        run_bash = os.path.join(tool_dir, 'run_asv.sh')
    with open(run_bash, 'w') as f:
        f.write(
            textwrap.dedent("""
            echo --BENCHRUN--
            python -m asv run --show-stderr --config ./asv.conf.json
            echo --PUBLISH--
            python -m asv publish --config ./asv.conf.json -o ./html
            echo --CSV--
            python -m mlprodict asv2csv -f ./results -o ./data_bench.csv
            """))

    # pyspy
    if add_pyspy:
        dest_pyspy = os.path.join(location, 'pyspy')
        if not os.path.exists(dest_pyspy):
            os.mkdir(dest_pyspy)
    else:
        dest_pyspy = None

    if verbose > 0 and fLOG is not None:
        fLOG("[create_asv_benchmark] create all tests.")

    created.extend(
        list(
            _enumerate_asv_benchmark_all_models(
                location_test,
                opset_min=opset_min,
                opset_max=opset_max,
                runtime=runtime,
                models=models,
                skip_models=skip_models,
                extended_list=extended_list,
                n_features=n_features,
                dtype=dtype,
                verbose=verbose,
                filter_exp=filter_exp,
                filter_scenario=filter_scenario,
                dims=dims,
                exc=exc,
                flat=flat,
                fLOG=fLOG,
                execute=execute,
                dest_pyspy=dest_pyspy)))

    if verbose > 0 and fLOG is not None:
        fLOG("[create_asv_benchmark] done.")
    return created