Пример #1
0
    def __init__(self,
                 model,
                 n_threads=None,
                 batch_size=None,
                 extra_config={}):
        super(ONNXSklearnContainer, self).__init__(model, n_threads,
                                                   batch_size, extra_config)

        assert onnx_runtime_installed(
        ), "ONNX Container requires ONNX runtime installed."

        sess_options = ort.SessionOptions()
        if self._n_threads is not None:
            sess_options.intra_op_num_threads = self._n_threads
            sess_options.inter_op_num_threads = 1
            sess_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
        self._session = ort.InferenceSession(self._model.SerializeToString(),
                                             sess_options=sess_options)
        self._output_names = [
            self._session.get_outputs()[i].name
            for i in range(len(self._session.get_outputs()))
        ]
        self._input_names = [
            input.name for input in self._session.get_inputs()
        ]
        self._extra_config = extra_config
class TestNoExtra(unittest.TestCase):
    """
    These tests are meant to be run on a clean container after doing
    `pip install hummingbird-ml` without any of the `extra` packages
    """

    # Test no LGBM returns false on lightgbm_installed()
    @unittest.skipIf(lightgbm_installed(),
                     reason="Test when LightGBM is not installed")
    def test_lightgbm_installed_false(self):
        warnings.filterwarnings("ignore")
        assert not lightgbm_installed()

    # Test no XGB returns false on xgboost_installed()
    @unittest.skipIf(xgboost_installed(),
                     reason="Test when XGBoost is not installed")
    def test_xgboost_installed_false(self):
        warnings.filterwarnings("ignore")
        assert not xgboost_installed()

    # Test no ONNX returns false on onnx_installed()
    @unittest.skipIf(onnx_runtime_installed(),
                     reason="Test when ONNX is not installed")
    def test_onnx_installed_false(self):
        warnings.filterwarnings("ignore")
        assert not onnx_runtime_installed()

    # Test no ONNXMLTOOLS returns false on onnx_ml_tools_installed()
    @unittest.skipIf(onnx_ml_tools_installed(),
                     reason="Test when ONNXMLTOOLS is not installed")
    def test_onnx_ml_installed_false(self):
        warnings.filterwarnings("ignore")
        assert not onnx_ml_tools_installed()

    # Test no TVM returns false on tvm_installed()
    @unittest.skipIf(onnx_ml_tools_installed(),
                     reason="Test when TVM is not installed")
    def test_tvm_installed_false(self):
        warnings.filterwarnings("ignore")
        assert not tvm_installed()

    # Test that we can import the converter successfully without installing [extra]
    def test_import_convert_no_extra(self):
        try:
            from hummingbird.ml import convert
        except Exception:  # TODO something more specific?
            self.fail(
                "Unexpected Error on importing convert without extra packages")
Пример #3
0
class TestBackends(unittest.TestCase):
    # Test backends are browsable
    def test_backends(self):
        warnings.filterwarnings("ignore")
        self.assertTrue(len(hummingbird.ml.backends) > 0)

    # Test backends are not case sensitive
    def test_backends_case_sensitive(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tOrCh")
        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

    # Test pytorch is still a valid backend name
    def test_backends_pytorch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "pytOrCh")
        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

    # Test not supported backends
    def test_unsupported_backend(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Test backends are not case sensitive
        self.assertRaises(MissingBackend, hummingbird.ml.convert, model,
                          "scala")

    # Test torchscript requires test_data
    def test_torchscript_test_data(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Test torcscript requires test_input
        self.assertRaises(RuntimeError, hummingbird.ml.convert, model,
                          "torch.jit")

    # Test onnx requires test_data or initial_types
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_test_data(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", FloatTensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=11)

        # Test onnx requires test_data
        self.assertRaises(RuntimeError, hummingbird.ml.convert, onnx_ml_model,
                          "onnx")
class TestONNXBinarizer(unittest.TestCase):
    def _test_binarizer_converter(self, threshold):
        warnings.filterwarnings("ignore")
        X = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]],
                     dtype=np.float32)

        # Create SKL model for testing
        model = Binarizer(threshold=threshold)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("float_input",
                                             FloatTensorType_onnx(X.shape))
                                        ])

        # Create ONNX model by calling converter
        onnx_model = convert(onnx_ml_model, "onnx", X)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = session.run(output_names, inputs)[0]

        # Get the predictions for the ONNX model
        onnx_pred = onnx_model.transform(X)

        return onnx_ml_pred, onnx_pred

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    # Check 0.0 threshold
    def test_binarizer_converter_0thresh(self, rtol=1e-06, atol=1e-06):
        onnx_ml_pred, onnx_pred = self._test_binarizer_converter(0.0)

        np.testing.assert_allclose(onnx_ml_pred,
                                   onnx_pred,
                                   rtol=rtol,
                                   atol=atol)

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    # Check positive threshold
    def test_binarizer_converter_posthresh(self, rtol=1e-06, atol=1e-06):
        onnx_ml_pred, onnx_pred = self._test_binarizer_converter(2.0)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred,
                                   onnx_pred,
                                   rtol=rtol,
                                   atol=atol)

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    # Check neg threshold
    def test_binarizer_converter_negthresh(self, rtol=1e-06, atol=1e-06):
        onnx_ml_pred, onnx_pred = self._test_binarizer_converter(-2.0)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred,
                                   onnx_pred,
                                   rtol=rtol,
                                   atol=atol)

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    # if the model is corrupt, we should get a RuntimeError
    def test_onnx_binarizer_converter_raises_rt(self):
        warnings.filterwarnings("ignore")
        X = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]],
                     dtype=np.float32)
        model = Binarizer(threshold=0)
        model.fit(X)

        # generate test input
        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("float_input",
                                             FloatTensorType_onnx(X.shape))
                                        ])
        onnx_ml_model.graph.node[0].attribute[0].name = "".encode()

        self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", X)
Пример #5
0
class TestBackends(unittest.TestCase):
    # Test backends are browsable
    def test_backends(self):
        warnings.filterwarnings("ignore")
        self.assertTrue(len(hummingbird.ml.backends) > 0)

    # Test backends are not case sensitive
    def test_backends_case_sensitive(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tOrCh")
        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

    # Test pytorch is still a valid backend name
    def test_backends_pytorch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "pytOrCh")
        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

    # Test pytorch save and load
    def test_pytorch_save_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(hb_model)
        hb_model.save("pt-tmp")

        hb_model_loaded = hummingbird.ml.TorchContainer.load("pt-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("pt-tmp.zip")
        shutil.rmtree("pt-tmp")

    # Test pytorch save and generic load
    def test_pytorch_save_generic_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(hb_model)
        hb_model.save("pt-tmp")

        hb_model_loaded = hummingbird.ml.load("pt-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("pt-tmp.zip")
        shutil.rmtree("pt-tmp")

    # Test torchscript save and load
    def test_torchscript_save_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch.jit", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("ts-tmp")

        hb_model_loaded = hummingbird.ml.TorchContainer.load("ts-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("ts-tmp.zip")
        shutil.rmtree("ts-tmp")

    # Test torchscript save and generic load
    def test_torchscript_save_generic_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch.jit", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("ts-tmp")

        hb_model_loaded = hummingbird.ml.load("ts-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("ts-tmp.zip")
        shutil.rmtree("ts-tmp")

    # Test not supported backends
    def test_unsupported_backend(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Test scala backend rises an exception
        self.assertRaises(MissingBackend, hummingbird.ml.convert, model,
                          "scala")

    # Test torchscript requires test_data
    def test_torchscript_test_data(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Test torcscript requires test_input
        self.assertRaises(RuntimeError, hummingbird.ml.convert, model,
                          "torch.jit")

    # Test TVM requires test_data
    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_tvm_test_data(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Test tvm requires test_input
        self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "tvm")

    # Test tvm save and load
    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_tvm_save_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("tvm-tmp")

        hb_model_loaded = hummingbird.ml.TVMContainer.load("tvm-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("tvm-tmp.zip")
        shutil.rmtree("tvm-tmp")

    # Test tvm save and generic load
    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_tvm_save_generic_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("tvm-tmp")

        hb_model_loaded = hummingbird.ml.load("tvm-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("tvm-tmp.zip")
        shutil.rmtree("tvm-tmp")

    # Test tvm save and load zip file
    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_tvm_save_load_zip(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("tvm-tmp.zip")

        hb_model_loaded = hummingbird.ml.TVMContainer.load("tvm-tmp.zip")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("tvm-tmp.zip")
        shutil.rmtree("tvm-tmp")

    # Test onnx requires test_data or initial_types
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_no_test_data_float(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", FloatTensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=11)

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx 0 shape input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_zero_shape_input(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("input",
                                             DoubleTensorType([0, X.shape[1]]))
                                        ],
                                        target_opset=11)

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, double input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_no_test_data_double(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", DoubleTensorType([X.shape[0],
                                                       X.shape[1]]))],
            target_opset=11)

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, long input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_no_test_data_long(self):
        warnings.filterwarnings("ignore")
        model = model = StandardScaler(with_mean=True, with_std=True)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.int64)

        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", Int64TensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=11)

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, int input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_no_test_data_int(self):
        warnings.filterwarnings("ignore")
        model = OneHotEncoder()
        X = np.array([[1, 2, 3]], dtype=np.int32)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", Int32TensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=11)

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, string input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_no_test_data_string(self):
        warnings.filterwarnings("ignore")
        model = OneHotEncoder()
        X = np.array([["a", "b", "c"]])
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", StringTensorType([X.shape[0],
                                                       X.shape[1]]))],
            target_opset=11)

        # Test backends are not case sensitive
        self.assertRaises(RuntimeError, hummingbird.ml.convert, onnx_ml_model,
                          "onnx")

    # Test ONNX save and load
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_save_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "onnx", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("onnx-tmp")

        hb_model_loaded = hummingbird.ml.ONNXContainer.load("onnx-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("onnx-tmp.zip")
        shutil.rmtree("onnx-tmp")

    # Test ONNX save and generic load
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_save_generic_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "onnx", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("onnx-tmp")

        hb_model_loaded = hummingbird.ml.load("onnx-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("onnx-tmp.zip")
        shutil.rmtree("onnx-tmp")

    # Test for when the user forgets to add a target (ex: convert(model, output) rather than convert(model, 'torch')) due to API change
    def test_forgotten_backend_string(self):
        from sklearn.preprocessing import LabelEncoder

        model = LabelEncoder()
        data = np.array([1, 4, 5, 2, 0, 2], dtype=np.int32)
        model.fit(data)

        self.assertRaises(ValueError, hummingbird.ml.convert, model,
                          [("input", Int32TensorType([6, 1]))])
class TestONNXScaler(unittest.TestCase):
    def _test_scaler_converter(self, model):
        warnings.filterwarnings("ignore")
        X = np.array([[0.0, 0.0, 3.0], [1.0, -1.0, 0.0], [0.0, 2.0, 1.0],
                      [1.0, 0.0, -2.0]],
                     dtype=np.float32)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("float_input", FloatTensorType([None,
                                                            X.shape[1]]))])

        # Create ONNX model by calling converter
        onnx_model = convert(onnx_ml_model, "onnx", X)
        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = session.run(output_names, inputs)[0]

        # Get the predictions for the ONNX model
        onnx_pred = onnx_model.transform(X)

        return onnx_ml_pred, onnx_pred

    # Test StandardScaler with_mean=True, with_std=True
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    def test_standard_scaler_onnx_tt(self, rtol=1e-06, atol=1e-06):
        model = StandardScaler(with_mean=True, with_std=True)
        onnx_ml_pred, onnx_pred = self._test_scaler_converter(model)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred,
                                   onnx_pred,
                                   rtol=rtol,
                                   atol=atol)

    # Test StandardScaler with_mean=True, with_std=False
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    def test_standard_scaler_onnx_tf(self, rtol=1e-06, atol=1e-06):
        model = StandardScaler(with_mean=True, with_std=False)
        onnx_ml_pred, onnx_pred = self._test_scaler_converter(model)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred,
                                   onnx_pred,
                                   rtol=rtol,
                                   atol=atol)

    # Test StandardScaler with_mean=False, with_std=False
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    def test_standard_scaler_onnx_ff(self, rtol=1e-06, atol=1e-06):
        model = StandardScaler(with_mean=False, with_std=False)
        onnx_ml_pred, onnx_pred = self._test_scaler_converter(model)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred,
                                   onnx_pred,
                                   rtol=rtol,
                                   atol=atol)

    # Test RobustScaler with with_centering=True
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    def test_robust_scaler_onnx_t(self, rtol=1e-06, atol=1e-06):
        model = RobustScaler(with_centering=True)
        onnx_ml_pred, onnx_pred = self._test_scaler_converter(model)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred,
                                   onnx_pred,
                                   rtol=rtol,
                                   atol=atol)

    # Test RobustScaler with with_centering=False
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    def test_robust_scaler_onnx_f(self, rtol=1e-06, atol=1e-06):
        model = RobustScaler(with_centering=False)
        onnx_ml_pred, onnx_pred = self._test_scaler_converter(model)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred,
                                   onnx_pred,
                                   rtol=rtol,
                                   atol=atol)

    # Test MaxAbsScaler
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    def test_max_abs_scaler_onnx(self, rtol=1e-06, atol=1e-06):
        model = MaxAbsScaler()
        onnx_ml_pred, onnx_pred = self._test_scaler_converter(model)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred,
                                   onnx_pred,
                                   rtol=rtol,
                                   atol=atol)

    # Test MinMaxScaler
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    def test_min_max_scaler_onnx(self, rtol=1e-06, atol=1e-06):
        model = MinMaxScaler()
        onnx_ml_pred, onnx_pred = self._test_scaler_converter(model)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred,
                                   onnx_pred,
                                   rtol=rtol,
                                   atol=atol)

    # Test that malformed models throw an exception
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    def test_scaler_converter_raises_rt_onnx(self):
        warnings.filterwarnings("ignore")
        X = np.array([[0.0, 0.0, 3.0], [1.0, -1.0, 0.0], [0.0, 2.0, 1.0],
                      [1.0, 0.0, -2.0]],
                     dtype=np.float32)

        # Create SKL model for testing
        model = StandardScaler()
        model.fit(X)

        # Generate test input
        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("float_input",
                                             FloatTensorType(X.shape))
                                        ])
        onnx_ml_model.graph.node[0].attribute[0].name = "".encode()

        self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", X)

    # Test with float64
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    def test_scaler_converter_float_64(self):
        warnings.filterwarnings("ignore")
        X = np.array([[0.0, 0.0, 3.0], [1.0, -1.0, 0.0], [0.0, 2.0, 1.0],
                      [1.0, 0.0, -2.0]],
                     dtype=np.float64)

        # Create SKL model for testing
        model = StandardScaler()
        model.fit(X)

        # Generate test input
        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[("double_input",
                                                        DoubleTensorType(
                                                            [None,
                                                             X.shape[1]]))])

        # Create ONNX model by calling converter
        onnx_model = convert(onnx_ml_model, "onnx", X)
        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = session.run(output_names, inputs)[0]

        # Get the predictions for the ONNX model
        onnx_pred = onnx_model.transform(X)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred,
                                   onnx_pred,
                                   rtol=1e-06,
                                   atol=1e-06)
 def test_onnx_installed_false(self):
     warnings.filterwarnings("ignore")
     assert not onnx_runtime_installed()
class TestONNXImputer(unittest.TestCase):
    def _test_imputer_converter(self, model, mode="onnx"):
        warnings.filterwarnings("ignore")
        X = np.array([[1, 2], [np.nan, 3], [7, 6]], dtype=np.float32)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model, initial_types=[("float_input", FloatTensorType_onnx(X.shape))])

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = session.run(output_names, inputs)[0]

        # Create test model by calling converter
        model = convert(onnx_ml_model, mode, X)

        # Get the predictions for the test model
        pred = model.transform(X)

        return onnx_ml_pred, pred

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_imputer_const(self, rtol=1e-06, atol=1e-06):
        model = SimpleImputer(strategy="constant")
        onnx_ml_pred, onnx_pred = self._test_imputer_converter(model)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol)

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_imputer_const_nan0(self, rtol=1e-06, atol=1e-06):
        model = SimpleImputer(strategy="constant", fill_value=0)
        onnx_ml_pred, onnx_pred = self._test_imputer_converter(model)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol)

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_imputer_mean(self, rtol=1e-06, atol=1e-06):
        model = SimpleImputer(strategy="mean", fill_value="nan")
        onnx_ml_pred, onnx_pred = self._test_imputer_converter(model)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol)

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_imputer_converter_raises_rt(self):
        warnings.filterwarnings("ignore")
        model = SimpleImputer(strategy="mean", fill_value="nan")
        X = np.array([[1, 2], [np.nan, 3], [7, 6]], dtype=np.float32)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model, initial_types=[("float_input", FloatTensorType_onnx(X.shape))])
        onnx_ml_model.graph.node[0].attribute[0].name = "".encode()

        self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", X)

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_imputer_torch(self, rtol=1e-06, atol=1e-06):
        model = SimpleImputer(strategy="constant")
        onnx_ml_pred, onnx_pred = self._test_imputer_converter(model, mode="torch")

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol)
Пример #9
0
class TestBackends(unittest.TestCase):
    # Test backends are browsable
    def test_backends(self):
        warnings.filterwarnings("ignore")
        self.assertTrue(len(hummingbird.ml.backends) > 0)

    # Test backends are not case sensitive
    def test_backends_case_sensitive(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tOrCh")
        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06)

    # Test pytorch is still a valid backend name
    def test_backends_pytorch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "pytOrCh")
        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06)

    # Test not supported backends
    def test_unsupported_backend(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Test scala backend rises an exception
        self.assertRaises(MissingBackend, hummingbird.ml.convert, model, "scala")

    # Test torchscript requires test_data
    def test_torchscript_test_data(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Test torcscript requires test_input
        self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "torch.jit")

    # Test TVM requires test_data
    @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed")
    def test_tvm_test_data(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Test tvm requires test_input
        self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "tvm")

    # Test onnx requires test_data or initial_types
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_no_test_data_float(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=11
        )

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, double input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_no_test_data_double(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model, initial_types=[("input", DoubleTensorType([X.shape[0], X.shape[1]]))], target_opset=11
        )

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, long input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_no_test_data_long(self):
        warnings.filterwarnings("ignore")
        model = model = StandardScaler(with_mean=True, with_std=True)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.int64)

        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model, initial_types=[("input", Int64TensorType([X.shape[0], X.shape[1]]))], target_opset=11
        )

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, int input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_no_test_data_int(self):
        warnings.filterwarnings("ignore")
        model = OneHotEncoder()
        X = np.array([[1, 2, 3]], dtype=np.int32)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model, initial_types=[("input", Int32TensorType([X.shape[0], X.shape[1]]))], target_opset=11
        )

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, string input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_no_test_data_string(self):
        warnings.filterwarnings("ignore")
        model = OneHotEncoder()
        X = np.array([["a", "b", "c"]])
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model, initial_types=[("input", StringTensorType([X.shape[0], X.shape[1]]))], target_opset=11
        )

        # Test backends are not case sensitive
        self.assertRaises(RuntimeError, hummingbird.ml.convert, onnx_ml_model, "onnx")
Пример #10
0
class TestBackends(unittest.TestCase):
    # Test backends are browsable
    def test_backends(self):
        warnings.filterwarnings("ignore")
        self.assertTrue(len(hummingbird.ml.backends) > 0)

    # Test backends are not case sensitive
    def test_backends_case_sensitive(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tOrCh")
        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

    # Test pytorch is still a valid backend name
    def test_backends_pytorch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "pytOrCh")
        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

    # Test pytorch save and load
    def test_pytorch_save_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(hb_model)
        hb_model.save("pt-tmp")

        hb_model_loaded = hummingbird.ml.TorchContainer.load("pt-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("pt-tmp.zip")

    # Test pytorch save and generic load
    def test_pytorch_save_generic_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(hb_model)
        hb_model.save("pt-tmp")

        hb_model_loaded = hummingbird.ml.load("pt-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("pt-tmp.zip")

    def test_pytorch_save_load_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(hb_model)
        hb_model.save("pt-tmp")

        hummingbird.ml.load("pt-tmp")
        hummingbird.ml.load("pt-tmp")

        os.remove("pt-tmp.zip")

    def test_pytorch_save_load_more_versions(self):
        from hummingbird.ml.operator_converters import constants

        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(hb_model)
        hb_model.save("pt-tmp")

        shutil.unpack_archive("pt-tmp.zip", "pt-tmp", format="zip")

        # Adding a new library does not create problems.
        with open(
                os.path.join("pt-tmp",
                             constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH),
                "r") as file:
            configuration = file.readlines()
        configuration.append("\nlibx=1.3")
        os.remove(
            os.path.join("pt-tmp",
                         constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH))
        with open(
                os.path.join("pt-tmp",
                             constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH),
                "w") as file:
            file.writelines(configuration)
        shutil.make_archive("pt-tmp", "zip", "pt-tmp")

        hummingbird.ml.load("pt-tmp")
        os.remove("pt-tmp.zip")

    def test_pytorch_save_load_less_versions(self):
        from hummingbird.ml.operator_converters import constants

        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(hb_model)
        hb_model.save("pt-tmp")

        shutil.unpack_archive("pt-tmp.zip", "pt-tmp", format="zip")

        # Removing a library does not create problems.
        with open(
                os.path.join("pt-tmp",
                             constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH),
                "r") as file:
            configuration = file.readlines()
        configuration = configuration[-1]
        os.remove(
            os.path.join("pt-tmp",
                         constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH))
        with open(
                os.path.join("pt-tmp",
                             constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH),
                "w") as file:
            file.writelines(configuration)
        shutil.make_archive("pt-tmp", "zip", "pt-tmp")

        hummingbird.ml.load("pt-tmp")
        os.remove("pt-tmp.zip")

    def test_pytorch_save_load_different_versions(self):
        from hummingbird.ml.operator_converters import constants

        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(hb_model)
        hb_model.save("pt-tmp")

        shutil.unpack_archive("pt-tmp.zip", "pt-tmp", format="zip")

        # Changing the version of a library does not create problems.
        with open(
                os.path.join("pt-tmp",
                             constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH),
                "r") as file:
            configuration = file.readlines()
        configuration[0] = "hummingbird=0.0.0.1\n"
        os.remove(
            os.path.join("pt-tmp",
                         constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH))
        with open(
                os.path.join("pt-tmp",
                             constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH),
                "w") as file:
            file.writelines(configuration)
        shutil.make_archive("pt-tmp", "zip", "pt-tmp")

        hummingbird.ml.load("pt-tmp")
        os.remove("pt-tmp.zip")

    # Test torchscript save and load
    def test_torchscript_save_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch.jit", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("ts-tmp")

        hb_model_loaded = hummingbird.ml.TorchContainer.load("ts-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("ts-tmp.zip")

    # Test torchscript save and generic load
    def test_torchscript_save_generic_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch.jit", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("ts-tmp")

        hb_model_loaded = hummingbird.ml.load("ts-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("ts-tmp.zip")

    def test_load_fails_bad_path(self):
        # Asserts for bad path with extension
        self.assertRaises(AssertionError, hummingbird.ml.load, "nonsense.zip")
        self.assertRaises(AssertionError, hummingbird.ml.TorchContainer.load,
                          "nonsense.zip")

        # Asserts for bad path with no extension
        self.assertRaises(AssertionError, hummingbird.ml.load, "nonsense")
        self.assertRaises(AssertionError, hummingbird.ml.TorchContainer.load,
                          "nonsense")

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_load_fails_bad_path_onnx(self):
        self.assertRaises(AssertionError, hummingbird.ml.ONNXContainer.load,
                          "nonsense.zip")
        self.assertRaises(AssertionError, hummingbird.ml.ONNXContainer.load,
                          "nonsense")

    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_load_fails_bad_path_tvm(self):
        self.assertRaises(AssertionError, hummingbird.ml.TVMContainer.load,
                          "nonsense.zip")
        self.assertRaises(AssertionError, hummingbird.ml.TVMContainer.load,
                          "nonsense")

    # Test not supported backends
    def test_unsupported_backend(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Test scala backend rises an exception
        self.assertRaises(MissingBackend, hummingbird.ml.convert, model,
                          "scala")

    # Test torchscript requires test_data
    def test_torchscript_test_data(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Test torcscript requires test_input
        self.assertRaises(RuntimeError, hummingbird.ml.convert, model,
                          "torch.jit")

    # Test TVM requires test_data
    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_tvm_test_data(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Test tvm requires test_input
        self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "tvm")

    # Test tvm save and load
    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_tvm_save_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("tvm-tmp")

        hb_model_loaded = hummingbird.ml.TVMContainer.load("tvm-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("tvm-tmp.zip")

    # Test tvm save and generic load
    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_tvm_save_generic_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("tvm-tmp")

        hb_model_loaded = hummingbird.ml.load("tvm-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("tvm-tmp.zip")

    # Test tvm save and load zip file
    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_tvm_save_load_zip(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("tvm-tmp.zip")

        hb_model_loaded = hummingbird.ml.TVMContainer.load("tvm-tmp.zip")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("tvm-tmp.zip")

    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_tvm_save_load_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("tvm-tmp.zip")

        hummingbird.ml.TVMContainer.load("tvm-tmp.zip")
        hummingbird.ml.TVMContainer.load("tvm-tmp.zip")

        os.remove("tvm-tmp.zip")

    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_tvm_save_load_no_versions(self):
        from hummingbird.ml.operator_converters import constants

        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("tvm-tmp")

        shutil.unpack_archive("tvm-tmp.zip", "tvm-tmp", format="zip")

        # Removing the configuration file with the versions does not create problems.
        os.remove(
            os.path.join("tvm-tmp",
                         constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH))

        hummingbird.ml.load("tvm-tmp")
        os.remove("tvm-tmp.zip")

    # Test onnx requires test_data or initial_types
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_no_test_data_float(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", FloatTensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=11)

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx 0 shape input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_zero_shape_input(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("input",
                                             DoubleTensorType([0, X.shape[1]]))
                                        ],
                                        target_opset=11)

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, double input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_no_test_data_double(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", DoubleTensorType([X.shape[0],
                                                       X.shape[1]]))],
            target_opset=11)

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, long input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_no_test_data_long(self):
        warnings.filterwarnings("ignore")
        model = model = StandardScaler(with_mean=True, with_std=True)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.int64)

        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", Int64TensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=11)

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, int input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_no_test_data_int(self):
        warnings.filterwarnings("ignore")
        model = OneHotEncoder()
        X = np.array([[1, 2, 3]], dtype=np.int32)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", Int32TensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=11)

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, string input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_no_test_data_string(self):
        warnings.filterwarnings("ignore")
        model = OneHotEncoder()
        X = np.array([["a", "b", "c"]])
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", StringTensorType([X.shape[0],
                                                       X.shape[1]]))],
            target_opset=11)

        # Test backends are not case sensitive
        self.assertRaises(RuntimeError, hummingbird.ml.convert, onnx_ml_model,
                          "onnx")

    # Test ONNX save and load
    @unittest.skipIf(not onnx_runtime_installed(),
                     reason="ONNX test requires ORT")
    def test_onnx_save_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "onnx", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("onnx-tmp")

        hb_model_loaded = hummingbird.ml.ONNXContainer.load("onnx-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("onnx-tmp.zip")

    # Test ONNX save and generic load
    @unittest.skipIf(not onnx_runtime_installed(),
                     reason="ONNX test requires ORT")
    def test_onnx_save_generic_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "onnx", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("onnx-tmp")

        hb_model_loaded = hummingbird.ml.load("onnx-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("onnx-tmp.zip")

    # Test ONNX save and generic load
    @unittest.skipIf(not onnx_runtime_installed(),
                     reason="ONNX test requires ORT")
    def test_onnx_save_load_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "onnx", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("onnx-tmp")

        hummingbird.ml.load("onnx-tmp")
        hummingbird.ml.load("onnx-tmp")

        os.remove("onnx-tmp.zip")

    @unittest.skipIf(not onnx_runtime_installed(),
                     reason="ONNX test requires ORT")
    def test_onnx_save_load_no_versions(self):
        from hummingbird.ml.operator_converters import constants

        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "onnx", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("onnx-tmp")

        shutil.unpack_archive("onnx-tmp.zip", "onnx-tmp", format="zip")

        # Removing the configuration file with the versions does not create problems.
        os.remove(
            os.path.join("onnx-tmp",
                         constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH))

        hummingbird.ml.load("onnx-tmp")
        os.remove("onnx-tmp.zip")

    # Test for when the user forgets to add a target (ex: convert(model, output) rather than convert(model, 'torch')) due to API change
    def test_forgotten_backend_string(self):
        from sklearn.preprocessing import LabelEncoder

        model = LabelEncoder()
        data = np.array([1, 4, 5, 2, 0, 2], dtype=np.int32)
        model.fit(data)

        self.assertRaises(ValueError, hummingbird.ml.convert, model,
                          [("input", Int32TensorType([6, 1]))])

    # Test ONNX
    @unittest.skipIf(not onnx_runtime_installed(),
                     reason="ONNX test requires ORT")
    def test_onnx(self):
        import numpy as np
        import lightgbm as lgb
        from hummingbird.ml import convert

        # Create some random data for binary classification.
        num_classes = 2
        X = np.array(np.random.rand(10000, 28), dtype=np.float32)
        y = np.random.randint(num_classes, size=10000)

        model = lgb.LGBMClassifier()
        model.fit(X, y)

        self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "onnx")

    # Test Spark UDF
    @unittest.skipIf(
        os.name == "nt" or not sparkml_installed()
        or LooseVersion(pyspark.__version__) < LooseVersion("3"),
        reason="UDF Test requires spark >= 3",
    )
    def test_udf_torch(self):
        X, y = load_iris(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(
            X,
            y,
            random_state=77,
            test_size=0.2,
        )
        spark_df = sql_context.createDataFrame(pd.DataFrame(data=X_train))
        sql_context.registerDataFrameAsTable(spark_df, "IRIS")

        model = GradientBoostingClassifier(n_estimators=10)
        model.fit(X_train, y_train)

        hb_model = hummingbird.ml.convert(model, "torch")

        # Broadcast the model.
        broadcasted_model = spark.sparkContext.broadcast(hb_model)

        # UDF definition.
        @pandas_udf("long")
        def udf_hb_predict(
                iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
            model = broadcasted_model.value
            for args in iterator:
                data_unmangled = pd.concat([feature for feature in args],
                                           axis=1)
                predictions = model.predict(data_unmangled)
                yield pd.Series(np.array(predictions))

        # Register the UDF.
        sql_context.udf.register("PREDICT", udf_hb_predict)

        # Run the query.
        sql_context.sql(
            "SELECT SUM(prediction) FROM (SELECT PREDICT(*) as prediction FROM IRIS)"
        ).show()

    @unittest.skipIf(
        os.name == "nt" or not sparkml_installed()
        or LooseVersion(pyspark.__version__) < LooseVersion("3"),
        reason="UDF Test requires spark >= 3",
    )
    def test_udf_torch_jit_broadcast(self):
        import pickle

        X, y = load_iris(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(
            X,
            y,
            random_state=77,
            test_size=0.2,
        )
        spark_df = sql_context.createDataFrame(pd.DataFrame(data=X_train))
        sql_context.registerDataFrameAsTable(spark_df, "IRIS")

        model = GradientBoostingClassifier(n_estimators=10)
        model.fit(X_train, y_train)

        hb_model = hummingbird.ml.convert(model, "torch.jit", X_test)

        # Broadcast the model returns an error.
        self.assertRaises(pickle.PickleError, spark.sparkContext.broadcast,
                          hb_model)

    @unittest.skipIf(
        os.name == "nt" or not sparkml_installed()
        or LooseVersion(pyspark.__version__) < LooseVersion("3"),
        reason="UDF Test requires spark >= 3",
    )
    def test_udf_torch_jit_spark_file(self):
        import dill
        import torch.jit

        X, y = load_iris(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(
            X,
            y,
            random_state=77,
            test_size=0.2,
        )
        spark_df = sql_context.createDataFrame(pd.DataFrame(data=X_train))
        sql_context.registerDataFrameAsTable(spark_df, "IRIS")

        model = GradientBoostingClassifier(n_estimators=10)
        model.fit(X_train, y_train)

        hb_model = hummingbird.ml.convert(model, "torch.jit", X_test)

        # Save the file locally.
        if os.path.exists("deployed_model.zip"):
            os.remove("deployed_model.zip")
        torch.jit.save(hb_model.model, "deployed_model.zip")
        hb_model._model = None

        # Share the model using spark file and broadcast the container.
        spark.sparkContext.addFile("deployed_model.zip")
        broadcasted_container = spark.sparkContext.broadcast(hb_model)

        # UDF definition.
        @pandas_udf("long")
        def udf_hb_predict(
                iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
            location = SparkFiles.get("deployed_model.zip")
            torch_model = torch.jit.load(location)
            container = broadcasted_container.value
            container._model = torch_model
            model = container
            for args in iterator:
                data_unmangled = pd.concat([feature for feature in args],
                                           axis=1)
                predictions = model.predict(data_unmangled.values)
                yield pd.Series(np.array(predictions))

        # Register the UDF.
        sql_context.udf.register("PREDICT", udf_hb_predict)

        # Run the query.
        sql_context.sql(
            "SELECT SUM(prediction) FROM (SELECT PREDICT(*) as prediction FROM IRIS)"
        ).show()

        os.remove("deployed_model.zip")
Пример #11
0
class TestSklearnPipeline(unittest.TestCase):
    def test_pipeline(self):
        data = np.array([[0, 0], [0, 0], [1, 1], [1, 1]], dtype=np.float32)
        scaler = StandardScaler()
        scaler.fit(data)
        model = Pipeline([("scaler1", scaler), ("scaler2", scaler)])

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.transform(data),
            torch_model.transform(data),
            rtol=1e-06,
            atol=1e-06,
        )

    def test_pipeline2(self):
        data = np.array([[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]],
                        dtype=np.float32)
        scaler = StandardScaler()
        scaler.fit(data)
        model = Pipeline([("scaler1", scaler), ("scaler2", scaler)])

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.transform(data),
            torch_model.transform(data),
            rtol=1e-06,
            atol=1e-06,
        )

    def test_combine_inputs_union_in_pipeline(self):
        from sklearn.preprocessing import StandardScaler
        from sklearn.pipeline import Pipeline

        data = np.array([[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]],
                        dtype=np.float32)
        model = Pipeline([
            ("scaler1", StandardScaler()),
            ("union",
             FeatureUnion([("scaler2", StandardScaler()),
                           ("scaler3", MinMaxScaler())])),
        ])
        model.fit(data)

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.transform(data),
            torch_model.transform(data),
            rtol=1e-06,
            atol=1e-06,
        )

    def test_combine_inputs_floats_ints(self):
        data = [[0, 0.0], [0, 0.0], [1, 1.0], [1, 1.0]]
        scaler = StandardScaler()
        scaler.fit(data)
        model = Pipeline([("scaler1", scaler), ("scaler2", scaler)])

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.transform(data),
            torch_model.transform(data),
            rtol=1e-06,
            atol=1e-06,
        )

    @unittest.skipIf(not pandas_installed(),
                     reason="Test requires pandas installed")
    def test_pipeline_column_transformer_1(self):
        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
        X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
        X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
        y_train = y % 2
        numeric_features = [0, 1, 2]  # ["vA", "vB", "vC"]

        classifier = LogisticRegression(
            C=0.01,
            class_weight=dict(zip([False, True], [0.2, 0.8])),
            n_jobs=1,
            max_iter=10,
            solver="liblinear",
            tol=1e-3,
        )

        numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])

        preprocessor = ColumnTransformer(transformers=[("num",
                                                        numeric_transformer,
                                                        numeric_features)])

        model = Pipeline(steps=[("precprocessor",
                                 preprocessor), ("classifier", classifier)])

        model.fit(X_train, y_train)

        X_test = X_train[:11]

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.predict_proba(X_test),
            torch_model.predict_proba(X_test.values),
            rtol=1e-06,
            atol=1e-06,
        )

    @unittest.skipIf(not pandas_installed(),
                     reason="Test requires pandas installed")
    def test_pipeline_column_transformer_string(self):
        """
        TODO: Hummingbird does not yet support strings in this context. Should raise error.
        When this feature is complete, change this test.
        """
        # fit
        titanic_url = "https://raw.githubusercontent.com/amueller/scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv"
        data = pandas.read_csv(titanic_url)
        X = data.drop("survived", axis=1)
        y = data["survived"]
        # SimpleImputer on string is not available for string
        # in ONNX-ML specifications.
        # So we do it beforehand.
        X["pclass"].fillna("missing", inplace=True)

        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2)

        numeric_features = ["age", "fare"]
        numeric_transformer = Pipeline(
            steps=[("imputer", SimpleImputer(
                strategy="median")), ("scaler", StandardScaler())])

        categorical_features = ["pclass"]
        categorical_transformer = Pipeline(
            steps=[("onehot", OneHotEncoder(handle_unknown="ignore"))])

        preprocessor = ColumnTransformer(transformers=[
            ("num", numeric_transformer, numeric_features),
            ("cat", categorical_transformer, categorical_features),
        ])

        clf = Pipeline(
            steps=[("preprocessor", preprocessor
                    ), ("classifier", LogisticRegression(solver="liblinear"))])

        to_drop = {
            "parch", "sibsp", "cabin", "ticket", "name", "body", "home.dest",
            "boat", "sex", "embarked"
        }

        X_train = X_train.copy()
        X_test = X_test.copy()
        X_train["pclass"] = X_train["pclass"].astype(np.int64)
        X_test["pclass"] = X_test["pclass"].astype(np.int64)
        X_train = X_train.drop(to_drop, axis=1)
        X_test = X_test.drop(to_drop, axis=1)

        clf.fit(X_train, y_train)

        torch_model = hummingbird.ml.convert(clf, "torch", X_test)

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            clf.predict(X_test),
            torch_model.predict(X_test),
            rtol=1e-06,
            atol=1e-06,
        )

    @unittest.skipIf(not pandas_installed(),
                     reason="Test requires pandas installed")
    def test_pipeline_column_transformer(self):
        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
        X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
        X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
        y_train = y % 2
        numeric_features = [0, 1, 2]  # ["vA", "vB", "vC"]
        categorical_features = [3, 4]  # ["vcat", "vcat2"]

        classifier = LogisticRegression(
            C=0.01,
            class_weight=dict(zip([False, True], [0.2, 0.8])),
            n_jobs=1,
            max_iter=10,
            solver="liblinear",
            tol=1e-3,
        )

        numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])

        categorical_transformer = Pipeline(
            steps=[("onehot",
                    OneHotEncoder(sparse=True, handle_unknown="ignore"))])

        preprocessor = ColumnTransformer(transformers=[
            ("num", numeric_transformer, numeric_features),
            ("cat", categorical_transformer, categorical_features),
        ])

        model = Pipeline(steps=[("precprocessor",
                                 preprocessor), ("classifier", classifier)])

        model.fit(X_train, y_train)

        X_test = X_train[:11]

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.predict_proba(X_test),
            torch_model.predict_proba(X_test.values),
            rtol=1e-06,
            atol=1e-06,
        )

    @unittest.skipIf(not pandas_installed(),
                     reason="Test requires pandas installed")
    def test_pipeline_column_transformer_pandas(self):
        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
        X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
        X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
        y_train = y % 2
        numeric_features = [0, 1, 2]  # ["vA", "vB", "vC"]
        categorical_features = [3, 4]  # ["vcat", "vcat2"]

        classifier = LogisticRegression(
            C=0.01,
            class_weight=dict(zip([False, True], [0.2, 0.8])),
            n_jobs=1,
            max_iter=10,
            solver="liblinear",
            tol=1e-3,
        )

        numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])

        categorical_transformer = Pipeline(
            steps=[("onehot",
                    OneHotEncoder(sparse=True, handle_unknown="ignore"))])

        preprocessor = ColumnTransformer(transformers=[
            ("num", numeric_transformer, numeric_features),
            ("cat", categorical_transformer, categorical_features),
        ])

        model = Pipeline(steps=[("precprocessor",
                                 preprocessor), ("classifier", classifier)])

        model.fit(X_train, y_train)

        X_test = X_train[:11]

        torch_model = hummingbird.ml.convert(model, "torch", X_test)

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.predict_proba(X_test),
            torch_model.predict_proba(X_test),
            rtol=1e-06,
            atol=1e-06,
        )

    @unittest.skipIf(not pandas_installed(),
                     reason="Test requires pandas installed")
    def test_pipeline_column_transformer_pandas_ts(self):
        iris = datasets.load_iris()
        X = np.array(
            iris.data[:, :3], np.float32
        )  # If we don't use float32 here, with python 3.5 and torch 1.5.1 will fail.
        y = iris.target
        X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
        X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
        X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
        y_train = y % 2
        numeric_features = [0, 1, 2]  # ["vA", "vB", "vC"]
        categorical_features = [3, 4]  # ["vcat", "vcat2"]

        classifier = LogisticRegression(
            C=0.01,
            class_weight=dict(zip([False, True], [0.2, 0.8])),
            n_jobs=1,
            max_iter=10,
            solver="liblinear",
            tol=1e-3,
        )

        numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])

        categorical_transformer = Pipeline(
            steps=[("onehot",
                    OneHotEncoder(sparse=True, handle_unknown="ignore"))])

        preprocessor = ColumnTransformer(transformers=[
            ("num", numeric_transformer, numeric_features),
            ("cat", categorical_transformer, categorical_features),
        ])

        model = Pipeline(steps=[("preprocessor",
                                 preprocessor), ("classifier", classifier)])

        model.fit(X_train, y_train)

        X_test = X_train[:11]

        torch_model = hummingbird.ml.convert(model, "torch.jit", X_test)

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.predict_proba(X_test),
            torch_model.predict_proba(X_test),
            rtol=1e-06,
            atol=1e-06,
        )

    @unittest.skipIf(not pandas_installed(),
                     reason="Test requires pandas installed")
    def test_pipeline_column_transformer_weights(self):
        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
        X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
        X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
        y_train = y % 2
        numeric_features = [0, 1, 2]  # ["vA", "vB", "vC"]
        categorical_features = [3, 4]  # ["vcat", "vcat2"]

        classifier = LogisticRegression(
            C=0.01,
            class_weight=dict(zip([False, True], [0.2, 0.8])),
            n_jobs=1,
            max_iter=10,
            solver="liblinear",
            tol=1e-3,
        )

        numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])

        categorical_transformer = Pipeline(
            steps=[("onehot",
                    OneHotEncoder(sparse=True, handle_unknown="ignore"))])

        preprocessor = ColumnTransformer(
            transformers=[
                ("num", numeric_transformer, numeric_features),
                ("cat", categorical_transformer, categorical_features),
            ],
            transformer_weights={
                "num": 2,
                "cat": 3
            },
        )

        model = Pipeline(steps=[("preprocessor",
                                 preprocessor), ("classifier", classifier)])

        model.fit(X_train, y_train)

        X_test = X_train[:11]

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.predict_proba(X_test),
            torch_model.predict_proba(X_test.values),
            rtol=1e-06,
            atol=1e-06,
        )

    @unittest.skipIf(not pandas_installed(),
                     reason="Test requires pandas installed")
    def test_pipeline_column_transformer_weights_pandas(self):
        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
        X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
        X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
        y_train = y % 2
        numeric_features = [0, 1, 2]  # ["vA", "vB", "vC"]
        categorical_features = [3, 4]  # ["vcat", "vcat2"]

        classifier = LogisticRegression(
            C=0.01,
            class_weight=dict(zip([False, True], [0.2, 0.8])),
            n_jobs=1,
            max_iter=10,
            solver="liblinear",
            tol=1e-3,
        )

        numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])

        categorical_transformer = Pipeline(
            steps=[("onehot",
                    OneHotEncoder(sparse=True, handle_unknown="ignore"))])

        preprocessor = ColumnTransformer(
            transformers=[
                ("num", numeric_transformer, numeric_features),
                ("cat", categorical_transformer, categorical_features),
            ],
            transformer_weights={
                "num": 2,
                "cat": 3
            },
        )

        model = Pipeline(steps=[("precprocessor",
                                 preprocessor), ("classifier", classifier)])

        model.fit(X_train, y_train)

        X_test = X_train[:11]

        torch_model = hummingbird.ml.convert(model, "torch", X_test)

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.predict_proba(X_test),
            torch_model.predict_proba(X_test),
            rtol=1e-06,
            atol=1e-06,
        )

    @unittest.skipIf(not pandas_installed(),
                     reason="Test requires pandas installed")
    def test_pipeline_column_transformer_drop(self):
        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
        X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
        X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
        y_train = y % 2
        numeric_features = [0, 1]  # ["vA", "vB"]
        categorical_features = [3, 4]  # ["vcat", "vcat2"]

        classifier = LogisticRegression(
            C=0.01,
            class_weight=dict(zip([False, True], [0.2, 0.8])),
            n_jobs=1,
            max_iter=10,
            solver="liblinear",
            tol=1e-3,
        )

        numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])

        categorical_transformer = Pipeline(
            steps=[("onehot",
                    OneHotEncoder(sparse=True, handle_unknown="ignore"))])

        preprocessor = ColumnTransformer(
            transformers=[
                ("num", numeric_transformer, numeric_features),
                ("cat", categorical_transformer, categorical_features),
            ],
            transformer_weights={
                "num": 2,
                "cat": 3
            },
            remainder="drop",
        )

        model = Pipeline(steps=[("precprocessor",
                                 preprocessor), ("classifier", classifier)])

        model.fit(X_train, y_train)

        X_test = X_train[:11]

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.predict_proba(X_test),
            torch_model.predict_proba(X_test.values),
            rtol=1e-06,
            atol=1e-06,
        )

    @unittest.skipIf(not pandas_installed(),
                     reason="Test requires pandas installed")
    def test_pipeline_column_transformer_drop_noweights(self):
        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
        X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
        X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
        y_train = y % 2
        numeric_features = [0, 1]  # ["vA", "vB"]
        categorical_features = [3, 4]  # ["vcat", "vcat2"]

        classifier = LogisticRegression(
            C=0.01,
            class_weight=dict(zip([False, True], [0.2, 0.8])),
            n_jobs=1,
            max_iter=10,
            solver="liblinear",
            tol=1e-3,
        )

        numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])

        categorical_transformer = Pipeline(
            steps=[("onehot",
                    OneHotEncoder(sparse=True, handle_unknown="ignore"))])

        preprocessor = ColumnTransformer(
            transformers=[
                ("num", numeric_transformer, numeric_features),
                ("cat", categorical_transformer, categorical_features),
            ],
            remainder="drop",
        )

        model = Pipeline(steps=[("precprocessor",
                                 preprocessor), ("classifier", classifier)])

        model.fit(X_train, y_train)

        X_test = X_train[:11]

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.predict_proba(X_test),
            torch_model.predict_proba(X_test.values),
            rtol=1e-06,
            atol=1e-06,
        )

    @unittest.skipIf(ColumnTransformer is None,
                     reason="ColumnTransformer not available in 0.19")
    @unittest.skipIf(not pandas_installed(),
                     reason="Test requires pandas installed")
    def test_pipeline_column_transformer_passthrough(self):
        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
        X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
        X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
        y_train = y % 2
        numeric_features = [0, 1]  # ["vA", "vB"]
        categorical_features = [3, 4]  # ["vcat", "vcat2"]

        classifier = LogisticRegression(
            C=0.01,
            class_weight=dict(zip([False, True], [0.2, 0.8])),
            n_jobs=1,
            max_iter=10,
            solver="liblinear",
            tol=1e-3,
        )

        numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])

        categorical_transformer = Pipeline(
            steps=[("onehot",
                    OneHotEncoder(sparse=True, handle_unknown="ignore"))])

        preprocessor = ColumnTransformer(
            transformers=[
                ("num", numeric_transformer, numeric_features),
                ("cat", categorical_transformer, categorical_features),
            ],
            transformer_weights={
                "num": 2,
                "cat": 3
            },
            remainder="passthrough",
        )

        model = Pipeline(steps=[("precprocessor",
                                 preprocessor), ("classifier", classifier)])

        model.fit(X_train, y_train)

        X_test = X_train[:11]

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.predict_proba(X_test),
            torch_model.predict_proba(X_test.values),
            rtol=1e-06,
            atol=1e-06,
        )

    @unittest.skipIf(ColumnTransformer is None,
                     reason="ColumnTransformer not available in 0.19")
    @unittest.skipIf(not pandas_installed(),
                     reason="Test requires pandas installed")
    def test_pipeline_column_transformer_passthrough_noweights(self):
        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
        X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
        X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
        y_train = y % 2
        numeric_features = [0, 1]  # ["vA", "vB"]
        categorical_features = [3, 4]  # ["vcat", "vcat2"]

        classifier = LogisticRegression(
            C=0.01,
            class_weight=dict(zip([False, True], [0.2, 0.8])),
            n_jobs=1,
            max_iter=10,
            solver="liblinear",
            tol=1e-3,
        )

        numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])

        categorical_transformer = Pipeline(
            steps=[("onehot",
                    OneHotEncoder(sparse=True, handle_unknown="ignore"))])

        preprocessor = ColumnTransformer(
            transformers=[
                ("num", numeric_transformer, numeric_features),
                ("cat", categorical_transformer, categorical_features),
            ],
            remainder="passthrough",
        )

        model = Pipeline(steps=[("precprocessor",
                                 preprocessor), ("classifier", classifier)])

        model.fit(X_train, y_train)

        X_test = X_train[:11]

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.predict_proba(X_test),
            torch_model.predict_proba(X_test.values),
            rtol=1e-06,
            atol=1e-06,
        )

    @unittest.skipIf(ColumnTransformer is None,
                     reason="ColumnTransformer not available in 0.19")
    @unittest.skipIf(not pandas_installed(),
                     reason="Test requires pandas installed")
    def test_pipeline_column_transformer_passthrough_slice(self):
        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
        X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
        X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
        y_train = y % 2
        numeric_features = slice(0, 1)  # ["vA", "vB"]
        categorical_features = slice(3, 4)  # ["vcat", "vcat2"]

        classifier = LogisticRegression(
            C=0.01,
            class_weight=dict(zip([False, True], [0.2, 0.8])),
            n_jobs=1,
            max_iter=10,
            solver="liblinear",
            tol=1e-3,
        )

        numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])

        categorical_transformer = Pipeline(
            steps=[("onehot",
                    OneHotEncoder(sparse=True, handle_unknown="ignore"))])

        preprocessor = ColumnTransformer(
            transformers=[
                ("num", numeric_transformer, numeric_features),
                ("cat", categorical_transformer, categorical_features),
            ],
            transformer_weights={
                "num": 2,
                "cat": 3
            },
            remainder="passthrough",
        )

        model = Pipeline(steps=[("precprocessor",
                                 preprocessor), ("classifier", classifier)])

        model.fit(X_train, y_train)

        X_test = X_train[:11]

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.predict_proba(X_test),
            torch_model.predict_proba(X_test.values),
            rtol=1e-06,
            atol=1e-06,
        )

    # Taken from https://github.com/microsoft/hummingbird/issues/388https://github.com/microsoft/hummingbird/issues/388
    def test_pipeline_pca_rf(self):
        X, y = make_regression(n_samples=1000,
                               n_features=8,
                               n_informative=5,
                               n_targets=1,
                               random_state=0,
                               shuffle=True)
        pca = PCA(n_components=8, svd_solver="randomized", whiten=True)
        clf = make_pipeline(
            StandardScaler(), pca,
            RandomForestRegressor(n_estimators=10,
                                  max_depth=30,
                                  random_state=0))
        clf.fit(X, y)

        model = hummingbird.ml.convert(clf, "pytorch")

        prediction_sk = clf.predict([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]])

        prediction_hb = model.predict(
            [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]])

        np.testing.assert_allclose(prediction_sk,
                                   prediction_hb,
                                   rtol=1e-06,
                                   atol=1e-06)

    @unittest.skipIf(ColumnTransformer is None,
                     reason="ColumnTransformer not available in 0.19")
    @unittest.skipIf(not onnx_runtime_installed(),
                     reason="Test requires ORT installed")
    def test_pipeline_many_inputs(self):
        n_features = 18
        X = np.random.rand(100, n_features)
        y = np.random.randint(1000, size=100)

        scaler_transformer = Pipeline(steps=[("scaler", StandardScaler())])
        preprocessor = ColumnTransformer(
            transformers=[("scaling", scaler_transformer,
                           list(range(n_features)))])
        model = RandomForestRegressor(n_estimators=10, max_depth=9)
        pipeline = Pipeline(steps=[("preprocessor",
                                    preprocessor), ("model", model)])

        pipeline.fit(X, y)

        X_test = tuple(np.split(X, n_features, axis=1))

        hb_model = hummingbird.ml.convert(pipeline, "onnx", X_test)

        assert len(hb_model.model.graph.input) == n_features

        np.testing.assert_allclose(
            pipeline.predict(X),
            np.array(hb_model.predict(X_test)).flatten(),
            rtol=1e-06,
            atol=1e-06,
        )

    @unittest.skipIf(ColumnTransformer is None,
                     reason="ColumnTransformer not available in 0.19")
    @unittest.skipIf(not onnx_runtime_installed(),
                     reason="Test requires ORT installed")
    def test_pipeline_many_inputs_with_schema(self):
        n_features = 5
        X = np.random.rand(100, n_features)
        y = np.random.randint(1000, size=100)
        input_column_names = ["A", "B", "C", "D", "E"]
        output_column_names = ["score"]

        scaler_transformer = Pipeline(steps=[("scaler", StandardScaler())])
        preprocessor = ColumnTransformer(
            transformers=[("scaling", scaler_transformer,
                           list(range(n_features)))])
        model = RandomForestRegressor(n_estimators=10, max_depth=9)
        pipeline = Pipeline(steps=[("preprocessor",
                                    preprocessor), ("model", model)])

        pipeline.fit(X, y)

        X_test = tuple(np.split(X, n_features, axis=1))
        extra_config = {
            constants.INPUT_NAMES: input_column_names,
            constants.OUTPUT_NAMES: output_column_names
        }

        hb_model = hummingbird.ml.convert(pipeline,
                                          "onnx",
                                          X_test,
                                          extra_config=extra_config)

        graph_inputs = [input.name for input in hb_model.model.graph.input]
        graph_outputs = [output.name for output in hb_model.model.graph.output]

        assert len(hb_model.model.graph.input) == n_features
        assert graph_inputs == input_column_names
        assert graph_outputs == output_column_names
Пример #12
0
class TestONNXOneHotEncoder(unittest.TestCase):

    # Test OneHotEncoder with ints
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS"
    )
    def test_one_hot_encoder_onnx_int(self, rtol=1e-06, atol=1e-06):
        model = OneHotEncoder()
        X = np.array([[1, 2, 3]], dtype=np.int32)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model, initial_types=[("int_input", IntTensorType_onnx(X.shape))])

        # Create ONNX model by calling converter
        onnx_model = convert(onnx_ml_model, "onnx", X)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = session.run(output_names, inputs)[0]

        # Get the predictions for the ONNX model
        onnx_pred = onnx_model.transform(X)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol)

    # Test OneHotEncoder with 2 inputs
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS"
    )
    def test_one_hot_encoder_onnx2(self, rtol=1e-06, atol=1e-06):
        model = OneHotEncoder()
        X = np.array([[1, 2, 3], [2, 1, 3]], dtype=np.int32)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model, initial_types=[("int_input", IntTensorType_onnx(X.shape))])

        # Create ONNX model by calling converter
        onnx_model = convert(onnx_ml_model, "onnx", X)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = session.run(output_names, inputs)[0]

        # Get the predictions for the ONNX model
        onnx_pred = onnx_model.transform(X)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol)

    # Test OneHotEncoder with int64
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS"
    )
    def test_one_hot_encoder_onnx_int64(self, rtol=1e-06, atol=1e-06):
        model = OneHotEncoder()
        X = np.array([[1, 2, 3]], dtype=np.int64)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model, initial_types=[("int_input", LongTensorType_onnx(X.shape))])

        # Create ONNX model by calling converter
        onnx_model = convert(onnx_ml_model, "onnx", X)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = session.run(output_names, inputs)[0]

        # Get the predictions for the ONNX model
        onnx_pred = onnx_model.transform(X)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol)

    # Test OneHotEncoder with strings. This test only works with pytorch >= 1.8
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS"
    )
    @unittest.skipIf(
        LooseVersion(torch.__version__) < LooseVersion("1.8.0"),
        reason="PyTorch exporter returns an error until version 1.8.0",
    )
    def test_model_one_hot_encoder_string(self):
        model = OneHotEncoder()
        data = [["a", "r", "x"], ["a", "r", "x"], ["aaaa", "r", "x"], ["a", "r", "xx"]]
        model.fit(data)

        onnx_ml_model = convert_sklearn(model, initial_types=[("input", StringTensorType_onnx([4, 3]))])

        # Create ONNX model by calling converter
        onnx_model = convert(onnx_ml_model, "onnx", data)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))]
        inputs = {session.get_inputs()[0].name: data}
        onnx_ml_pred = session.run(output_names, inputs)

        # Get the predictions for the ONNX model
        onnx_pred = onnx_model.transform(data)

        return onnx_ml_pred, onnx_pred

    # Test OneHotEncoder failcase when input data type is not supported
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS"
    )
    def test_ohe_string_raises_type_error_onnx(self):
        warnings.filterwarnings("ignore")
        model = OneHotEncoder()
        data = [["a", "r", "x"], ["a", "r", "x"], ["aaaa", "r", "x"], ["a", "r", "xx"]]
        model.fit(data)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model, initial_types=[("input", StringTensorType_onnx([4, 3]))])

        # Create ONNX model by calling converter, should raise error for strings
        self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx")
class TestIsolationForestConverter(unittest.TestCase):
    # Check tree implementation
    def test_iforest_implementation(self):
        warnings.filterwarnings("ignore")
        np.random.seed(0)
        X = np.random.rand(10, 1)
        X = np.array(X, dtype=np.float32)
        model = IsolationForest(n_estimators=1, max_samples=2)
        for extra_config_param in ["tree_trav", "perf_tree_trav", "gemm"]:
            model.fit(X)
            torch_model = hummingbird.ml.convert(
                model,
                "torch",
                extra_config={"tree_implementation": extra_config_param})
            self.assertIsNotNone(torch_model)
            self.assertEqual(
                str(type(list(torch_model.model._operator_map.values())[0])),
                iforest_implementation_map[extra_config_param])

    def _run_isolation_forest_converter(self, extra_config={}):
        warnings.filterwarnings("ignore")
        for max_samples in [2**1, 2**3, 2**8, 2**10, 2**12]:
            model = IsolationForest(n_estimators=10, max_samples=max_samples)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            model.fit(X)
            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.decision_function(X),
                                       torch_model.decision_function(X),
                                       rtol=1e-06,
                                       atol=1e-06)
            np.testing.assert_allclose(model.score_samples(X),
                                       torch_model.score_samples(X),
                                       rtol=1e-06,
                                       atol=1e-06)
            np.testing.assert_array_equal(model.predict(X),
                                          torch_model.predict(X))

    # Isolation Forest
    def test_isolation_forest_converter(self):
        self._run_isolation_forest_converter()

    # Gemm Isolation Forest
    def test_isolation_forest_gemm_converter(self):
        self._run_isolation_forest_converter(
            extra_config={"tree_implementation": "gemm"})

    # Tree_trav Isolation Forest
    def test_isolation_forest_tree_trav_converter(self):
        self._run_isolation_forest_converter(
            extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav Isolation Forest
    def test_isolation_forest_perf_tree_trav_converter(self):
        self._run_isolation_forest_converter(
            extra_config={"tree_implementation": "perf_tree_trav"})

    # Float 64 data tests
    def test_float64_isolation_forest_converter(self):
        warnings.filterwarnings("ignore")
        for max_samples in [2**1, 2**3, 2**8, 2**10, 2**12]:
            model = IsolationForest(n_estimators=10, max_samples=max_samples)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            model.fit(X)
            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config={})
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.decision_function(X),
                                       torch_model.decision_function(X),
                                       rtol=1e-06,
                                       atol=1e-06)
            np.testing.assert_allclose(model.score_samples(X),
                                       torch_model.score_samples(X),
                                       rtol=1e-06,
                                       atol=1e-06)
            np.testing.assert_array_equal(model.predict(X),
                                          torch_model.predict(X))

    # Test TorchScript backend.
    def test_isolation_forest_ts_converter(self):
        warnings.filterwarnings("ignore")
        for max_samples in [2**1, 2**3, 2**8, 2**10, 2**12]:
            model = IsolationForest(n_estimators=10, max_samples=max_samples)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            model.fit(X)
            torch_model = hummingbird.ml.convert(model,
                                                 "torch.jit",
                                                 X,
                                                 extra_config={})
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.decision_function(X),
                                       torch_model.decision_function(X),
                                       rtol=1e-06,
                                       atol=1e-06)
            np.testing.assert_allclose(model.score_samples(X),
                                       torch_model.score_samples(X),
                                       rtol=1e-06,
                                       atol=1e-06)
            np.testing.assert_array_equal(model.predict(X),
                                          torch_model.predict(X))

    # Test ONNX backend.
    @unittest.skipIf(not (onnx_runtime_installed()),
                     reason="ONNX tests require ORT")
    def test_isolation_forest_onnx_converter(self):
        warnings.filterwarnings("ignore")
        for max_samples in [2**1, 2**3, 2**8, 2**10, 2**12]:
            model = IsolationForest(n_estimators=10, max_samples=max_samples)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            model.fit(X)
            onnx_model = hummingbird.ml.convert(model,
                                                "onnx",
                                                X,
                                                extra_config={})
            self.assertIsNotNone(onnx_model)
            np.testing.assert_allclose(model.decision_function(X),
                                       onnx_model.decision_function(X),
                                       rtol=1e-06,
                                       atol=1e-06)
            np.testing.assert_allclose(model.score_samples(X),
                                       onnx_model.score_samples(X),
                                       rtol=1e-06,
                                       atol=1e-06)
            np.testing.assert_array_equal(model.predict(X),
                                          onnx_model.predict(X))

    # Test TVM backend.
    @unittest.skipIf(not (tvm_installed()), reason="TVM test requires TVM")
    def test_isolation_forest_tvm_converter(self):
        warnings.filterwarnings("ignore")
        for max_samples in [2**1, 2**3, 2**8, 2**10, 2**12]:
            model = IsolationForest(n_estimators=10, max_samples=max_samples)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            model.fit(X)
            hb_model = hummingbird.ml.convert(
                model,
                "tvm",
                X,
                extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})

            self.assertIsNotNone(hb_model)
            np.testing.assert_allclose(model.decision_function(X),
                                       hb_model.decision_function(X),
                                       rtol=1e-06,
                                       atol=1e-06)
            np.testing.assert_allclose(model.score_samples(X),
                                       hb_model.score_samples(X),
                                       rtol=1e-06,
                                       atol=1e-06)
            np.testing.assert_array_equal(model.predict(X),
                                          hb_model.predict(X))
class TestSklearnNormalizer(unittest.TestCase):
    def _test_regressor(self, classes):
        n_features = 20
        n_total = 100
        np.random.seed(0)
        warnings.filterwarnings("ignore")
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(classes, size=n_total)

        # Create SKL model for testing
        model = LogisticRegression(solver="liblinear", multi_class="ovr", fit_intercept=True)
        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model, initial_types=[("float_input", FloatTensorType_onnx(X.shape))])

        # Create ONNX model by calling converter
        onnx_model = convert(onnx_ml_model, "onnx", X)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))]
        onnx_ml_pred = [[] for i in range(len(output_names))]
        inputs = {session.get_inputs()[0].name: X}
        pred = session.run(output_names, inputs)
        for i in range(len(output_names)):
            if output_names[i] == "output_label":
                onnx_ml_pred[1] = pred[i]
            else:
                onnx_ml_pred[0] = pred[i]

        # Get the predictions for the ONNX model
        session = ort.InferenceSession(onnx_model.SerializeToString())
        onnx_pred = [[] for i in range(len(output_names))]
        pred = session.run(output_names, inputs)
        for i in range(len(output_names)):
            if output_names[i] == "output_label":
                onnx_pred[1] = pred[i]
            else:
                onnx_pred[0] = pred[i]

        return onnx_ml_pred, onnx_pred

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS"
    )
    def test_logistic_regression_onnxml_binary(self, rtol=1e-06, atol=1e-06):
        onnx_ml_pred, onnx_pred = self._test_regressor(2)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred[1], onnx_pred[1], rtol=rtol, atol=atol)  # labels
        np.testing.assert_allclose(
            list(map(lambda x: list(x.values()), onnx_ml_pred[0])), onnx_pred[0], rtol=rtol, atol=atol
        )  # probs

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS"
    )
    def test_logistic_regression_onnxml_multi(self, rtol=1e-06, atol=1e-06):
        onnx_ml_pred, onnx_pred = self._test_regressor(3)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred[1], onnx_pred[1], rtol=rtol, atol=atol)  # labels
        np.testing.assert_allclose(
            list(map(lambda x: list(x.values()), onnx_ml_pred[0])), onnx_pred[0], rtol=rtol, atol=atol
        )  # probs
Пример #15
0
class TestONNXLightGBMConverter(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super(TestONNXLightGBMConverter, self).__init__(*args, **kwargs)

    # Base test implementation comparing ONNXML and ONNX models.
    def _test_lgbm(self, X, model, extra_config={}):
        # Create ONNX-ML model
        onnx_ml_model = convert_lightgbm(
            model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9
        )

        # Create ONNX model
        onnx_model = convert(onnx_ml_model, "onnx", X, extra_config)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))]
        onnx_ml_pred = [[] for i in range(len(output_names))]
        inputs = {session.get_inputs()[0].name: X}
        pred = session.run(output_names, inputs)
        for i in range(len(output_names)):
            if output_names[i] == "label":
                onnx_ml_pred[1] = pred[i]
            else:
                onnx_ml_pred[0] = pred[i]

        # Get the predictions for the ONNX model
        session = ort.InferenceSession(onnx_model.SerializeToString())
        onnx_pred = [[] for i in range(len(output_names))]
        pred = session.run(output_names, inputs)
        for i in range(len(output_names)):
            if output_names[i] == "label":
                onnx_pred[1] = pred[i]
            else:
                onnx_pred[0] = pred[i]

        return onnx_ml_pred, onnx_pred, output_names

    # Utility function for testing regression models.
    def _test_regressor(self, X, model, rtol=1e-06, atol=1e-06, extra_config={}):
        onnx_ml_pred, onnx_pred, output_names = self._test_lgbm(X, model, extra_config)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred[0], onnx_pred[0], rtol=rtol, atol=atol)

    # Utility function for testing classification models.
    def _test_classifier(self, X, model, rtol=1e-06, atol=1e-06, extra_config={}):
        onnx_ml_pred, onnx_pred, output_names = self._test_lgbm(X, model, extra_config)

        np.testing.assert_allclose(onnx_ml_pred[1], onnx_pred[1], rtol=rtol, atol=atol)  # labels
        np.testing.assert_allclose(
            list(map(lambda x: list(x.values()), onnx_ml_pred[0])), onnx_pred[0], rtol=rtol, atol=atol
        )  # probs

    # Check that ONNXML models can only target the ONNX backend.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_lightgbm_pytorch(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_lightgbm(
            model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9
        )

        self.assertRaises(RuntimeError, convert, onnx_ml_model, "torch")

    # Check converter with extra configs.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_lightgbm_pytorch_extra_config(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_lightgbm(
            model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9
        )

        # Create ONNX model
        model_name = "hummingbird.ml.test.lightgbm"
        extra_config = {}
        extra_config[constants.ONNX_OUTPUT_MODEL_NAME] = model_name
        extra_config[constants.ONNX_INITIAL_TYPES] = [("input", FloatTensorType([X.shape[0], X.shape[1]]))]
        onnx_model = convert(onnx_ml_model, "onnx", extra_config=extra_config)

        assert onnx_model.graph.name == model_name

    # Basic regression test.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_lgbm_onnxml_model_regressor(self):
        warnings.filterwarnings("ignore")
        n_features = 28
        n_total = 100
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(n_total, size=n_total)

        # Create LightGBM model
        model = lgb.LGBMRegressor()
        model.fit(X, y)
        self._test_regressor(X, model, rtol=1e-02, atol=1e-02)  # Lower tolerance to avoid random errors

    # Regression test with 3 estimators (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_lightgbm_regressor(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)
        self._test_regressor(X, model)

    # Regression test with 1 estimator (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_lightgbm_regressor1(self):
        warnings.filterwarnings("ignore")
        model = lgb.LGBMRegressor(n_estimators=1, min_child_samples=1)
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model.fit(X, y)
        self._test_regressor(X, model)

    # Regression test with 2 estimators (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_lightgbm_regressor2(self):
        warnings.filterwarnings("ignore")
        model = lgb.LGBMRegressor(n_estimators=2, max_depth=1, min_child_samples=1)
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model.fit(X, y)
        self._test_regressor(X, model)

    # Regression test with gbdt boosting type (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_lightgbm_booster_regressor(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 1.1]
        data = lgb.Dataset(X, label=y)
        model = lgb.train(
            {"boosting_type": "gbdt", "objective": "regression", "n_estimators": 3, "min_child_samples": 1, "max_depth": 1},
            data,
        )
        self._test_regressor(X, model)

    # Binary classication test.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_lgbm_onnxml_model_binary(self):
        warnings.filterwarnings("ignore")
        n_features = 28
        n_total = 100
        np.random.seed(0)
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=n_total)

        # Create LightGBM model
        model = lgb.LGBMClassifier()
        model.fit(X, y)
        self._test_classifier(X, model)

    # Binary classification test with 3 estimators (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_lightgbm_classifier(self):
        warnings.filterwarnings("ignore")
        model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1)
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 0]
        model.fit(X, y)
        self._test_classifier(X, model)

    # Binary classification test with 3 estimators zipmap (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_lightgbm_classifier_zipmap(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0], [1, 2]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 0, 1]
        model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1)
        model.fit(X, y)
        self._test_classifier(X, model)

    # Binary classification test with 3 estimators and selecting boosting type (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_lightgbm_booster_classifier(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0], [1, 2]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 0, 1]
        data = lgb.Dataset(X, label=y)
        model = lgb.train({"boosting_type": "gbdt", "objective": "binary", "n_estimators": 3, "min_child_samples": 1}, data)
        self._test_classifier(X, model)

    # Binary classification test with 3 estimators and selecting boosting type zipmap (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_lightgbm_booster_classifier_zipmap(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0], [1, 2]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 0, 1]
        data = lgb.Dataset(X, label=y)
        model = lgb.train({"boosting_type": "gbdt", "objective": "binary", "n_estimators": 3, "min_child_samples": 1}, data)
        self._test_classifier(X, model)

    # Multiclass classification test.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_lgbm_onnxml_model_multi(self):
        warnings.filterwarnings("ignore")
        n_features = 28
        n_total = 100
        np.random.seed(0)
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(3, size=n_total)

        # Create LightGBM model
        model = lgb.LGBMClassifier()
        model.fit(X, y)
        self._test_classifier(X, model)

    # Multiclass classification test with 3 estimators (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_lightgbm_classifier_multi(self):
        warnings.filterwarnings("ignore")
        model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1)
        X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 2, 1, 1, 2]
        model.fit(X, y)
        self._test_classifier(X, model)

    # Multiclass classification test with 3 estimators and selecting boosting type (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    @unittest.skipIf(
        True, reason='ONNXMLTOOLS fails with "ValueError: unsupported LightGbm objective: multiclass num_class:3"'
    )
    def test_lightgbm_booster_multi_classifier(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0], [1, 2], [-1, 2], [1, -2]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 0, 1, 2, 2]
        data = lgb.Dataset(X, label=y)
        model = lgb.train(
            {"boosting_type": "gbdt", "objective": "multiclass", "n_estimators": 3, "min_child_samples": 1, "num_class": 3},
            data,
        )
        self._test_classifier(X, model)
Пример #16
0
class TestONNXSVC(unittest.TestCase):
    def _test_sv(self, classes, mode="torch"):
        """
        This helper function tests conversion of `ai.onnx.ml.SVMClassifier`
        which is created from a scikit-learn SVC.

        This then calls either "_to_onnx" or "_to_torch"
        """
        n_features = 20
        n_total = 100
        np.random.seed(0)
        warnings.filterwarnings("ignore")
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(classes, size=n_total)

        # Create SKL model for testing
        model = SVC()
        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("float_input",
                                             FloatTensorType_onnx(X.shape))
                                        ])

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        onnx_ml_pred = [[] for i in range(len(output_names))]
        inputs = {session.get_inputs()[0].name: X}
        pred = session.run(output_names, inputs)
        for i in range(len(output_names)):
            if "label" in output_names[i]:
                onnx_ml_pred[1] = pred[i]
            else:
                onnx_ml_pred[0] = pred[i]

        model = convert(onnx_ml_model, mode, X)

        pred = model.predict(X)

        return onnx_ml_pred, pred

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    # test ai.onnx.ml.SVMClassifier with 2 classes for onnxml-> pytorch
    def test_logistic_regression_onnxml_binary_torch(self,
                                                     rtol=1e-06,
                                                     atol=1e-06):
        onnx_ml_pred, pred = self._test_sv(2)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred[1], pred, rtol=rtol, atol=atol)

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    # test ai.onnx.ml.SVMClassifier with 3 classes for onnxml-> pytorch
    def test_logistic_regression_onnxml_multi_torch(self,
                                                    rtol=1e-06,
                                                    atol=1e-06):
        onnx_ml_pred, pred = self._test_sv(3)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred[1], pred, rtol=rtol, atol=atol)
Пример #17
0
class TestONNXLightGBMConverter(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super(TestONNXLightGBMConverter, self).__init__(*args, **kwargs)

    # Base test implementation comparing ONNXML and ONNX models.
    def _test_lgbm(self, X, model, extra_config={}):
        # Create ONNX-ML model
        onnx_ml_model = convert_lightgbm(
            model,
            initial_types=[("input", FloatTensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=9)

        # Create ONNX model
        onnx_model = convert(onnx_ml_model, "onnx", extra_config=extra_config)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        onnx_ml_pred = [[] for i in range(len(output_names))]
        inputs = {session.get_inputs()[0].name: X}
        pred = session.run(output_names, inputs)
        for i in range(len(output_names)):
            if "label" in output_names[i]:
                onnx_ml_pred[1] = pred[i]
            else:
                onnx_ml_pred[0] = pred[i]

        # Get the predictions for the ONNX model
        onnx_pred = [[] for i in range(len(output_names))]
        if len(output_names) == 1:  # regression
            onnx_pred = onnx_model.predict(X)
        else:  # classification
            onnx_pred[0] = onnx_model.predict_proba(X)
            onnx_pred[1] = onnx_model.predict(X)

        return onnx_ml_pred, onnx_pred, output_names

    # Utility function for testing regression models.
    def _test_regressor(self,
                        X,
                        model,
                        rtol=1e-06,
                        atol=1e-06,
                        extra_config={}):
        onnx_ml_pred, onnx_pred, output_names = self._test_lgbm(
            X, model, extra_config)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred[0].ravel(),
                                   onnx_pred,
                                   rtol=rtol,
                                   atol=atol)

    # Utility function for testing classification models.
    def _test_classifier(self,
                         X,
                         model,
                         rtol=1e-06,
                         atol=1e-06,
                         extra_config={}):
        onnx_ml_pred, onnx_pred, output_names = self._test_lgbm(
            X, model, extra_config)

        np.testing.assert_allclose(onnx_ml_pred[1],
                                   onnx_pred[1],
                                   rtol=rtol,
                                   atol=atol)  # labels
        np.testing.assert_allclose(list(
            map(lambda x: list(x.values()), onnx_ml_pred[0])),
                                   onnx_pred[0],
                                   rtol=rtol,
                                   atol=atol)  # probs

    # Check that ONNXML models can also target other backends.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_onnx_pytorch(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_lightgbm(
            model,
            initial_types=[("input", FloatTensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=9)

        pt_model = convert(onnx_ml_model, "torch", X)
        assert pt_model

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        onnx_ml_pred = [[] for i in range(len(output_names))]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = session.run(output_names, inputs)

        np.testing.assert_allclose(onnx_ml_pred[0].flatten(),
                                   pt_model.predict(X))

    # Basic regression test.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_onnxml_model_regressor(self):
        warnings.filterwarnings("ignore")
        n_features = 28
        n_total = 100
        np.random.seed(0)
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(n_total, size=n_total)

        # Create LightGBM model
        model = lgb.LGBMRegressor()
        model.fit(X, y)
        import platform

        # TODO bug on newer macOS versions?
        if platform.system() == "Darwin":
            self._test_regressor(X, model, rtol=1e-05, atol=1e-04)
        else:
            self._test_regressor(X, model)

    # Regression test with 3 estimators (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_regressor(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)
        self._test_regressor(X, model)

    # Regression test with 1 estimator (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_regressor1(self):
        warnings.filterwarnings("ignore")
        model = lgb.LGBMRegressor(n_estimators=1, min_child_samples=1)
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model.fit(X, y)
        self._test_regressor(X, model)

    # Regression test with 2 estimators (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_regressor2(self):
        warnings.filterwarnings("ignore")
        model = lgb.LGBMRegressor(n_estimators=2,
                                  max_depth=1,
                                  min_child_samples=1)
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model.fit(X, y)
        self._test_regressor(X, model)

    # Regression test with gbdt boosting type (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_booster_regressor(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 1.1]
        data = lgb.Dataset(X, label=y)
        model = lgb.train(
            {
                "boosting_type": "gbdt",
                "objective": "regression",
                "n_estimators": 3,
                "min_child_samples": 1,
                "max_depth": 1
            },
            data,
        )
        self._test_regressor(X, model)

    # Binary classication test.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_onnxml_model_binary(self):
        warnings.filterwarnings("ignore")
        n_features = 28
        n_total = 100
        np.random.seed(0)
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=n_total)

        # Create LightGBM model
        model = lgb.LGBMClassifier()
        model.fit(X, y)
        self._test_classifier(X, model)

    # Binary classication test with float64.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_onnxml_model_binary_float64(self):
        warnings.filterwarnings("ignore")
        n_features = 28
        n_total = 100
        np.random.seed(0)
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=n_total)

        # Create LightGBM model
        model = lgb.LGBMClassifier()
        model.fit(X, y)

        onnx_model = convert(model, "onnx", X)

        np.testing.assert_allclose(model.predict(X), onnx_model.predict(X))

    # Binary classification test with 3 estimators (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_classifier(self):
        warnings.filterwarnings("ignore")
        model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1)
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 0]
        model.fit(X, y)
        self._test_classifier(X, model)

    # Binary classification test with 3 estimators zipmap (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_classifier_zipmap(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0], [1, 2]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 0, 1]
        model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1)
        model.fit(X, y)
        self._test_classifier(X, model)

    # Binary classification test with 3 estimators and selecting boosting type (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_booster_classifier(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0], [1, 2]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 0, 1]
        data = lgb.Dataset(X, label=y)
        model = lgb.train(
            {
                "boosting_type": "gbdt",
                "objective": "binary",
                "n_estimators": 3,
                "min_child_samples": 1
            }, data)
        self._test_classifier(X, model)

    # Binary classification test with 3 estimators and selecting boosting type zipmap (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_booster_classifier_zipmap(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0], [1, 2]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 0, 1]
        data = lgb.Dataset(X, label=y)
        model = lgb.train(
            {
                "boosting_type": "gbdt",
                "objective": "binary",
                "n_estimators": 3,
                "min_child_samples": 1
            }, data)
        self._test_classifier(X, model)

    # Multiclass classification test.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_onnxml_model_multi(self):
        warnings.filterwarnings("ignore")
        n_features = 28
        n_total = 100
        np.random.seed(0)
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(3, size=n_total)

        # Create LightGBM model
        model = lgb.LGBMClassifier()
        model.fit(X, y)
        self._test_classifier(X, model)

    # Multiclass classification test with 3 estimators (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_classifier_multi(self):
        warnings.filterwarnings("ignore")
        model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1)
        X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 2, 1, 1, 2]
        model.fit(X, y)
        self._test_classifier(X, model)

    # Multiclass classification test with 3 estimators and selecting boosting type (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_booster_multi_classifier(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0], [1, 2], [-1, 2], [1, -2]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 0, 1, 2, 2]
        data = lgb.Dataset(X, label=y)
        model = lgb.train(
            {
                "boosting_type": "gbdt",
                "objective": "multiclass",
                "n_estimators": 3,
                "min_child_samples": 1,
                "num_class": 3
            },
            data,
        )
        self._test_classifier(X, model)
Пример #18
0
class TestONNXDecisionTreeConverter(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super(TestONNXDecisionTreeConverter, self).__init__(*args, **kwargs)

    # Base test implementation comparing ONNXML and ONNX models.
    def _test_decision_tree(self, X, model, extra_config={}):
        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", FloatTensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=11)

        # Create ONNX model
        onnx_model = convert(onnx_ml_model, "onnx", X, extra_config)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        onnx_ml_pred = [[] for i in range(len(output_names))]
        inputs = {session.get_inputs()[0].name: X}
        pred = session.run(output_names, inputs)
        for i in range(len(output_names)):
            if "label" in output_names[i]:
                onnx_ml_pred[1] = pred[i]
            else:
                onnx_ml_pred[0] = pred[i]

        # Get the predictions for the ONNX model
        onnx_pred = [[] for i in range(len(output_names))]
        if len(output_names) == 1:  # regression
            onnx_pred = onnx_model.predict(X)
        else:  # classification
            for i in range(len(output_names)):
                if "label" in output_names[i]:
                    onnx_pred[1] = onnx_model.predict(X)
                else:
                    onnx_pred[0] = onnx_model.predict_proba(X)

        return onnx_ml_pred, onnx_pred, output_names

    # Utility function for testing regression models.
    def _test_regressor(self,
                        X,
                        model,
                        rtol=1e-06,
                        atol=1e-06,
                        extra_config={}):
        onnx_ml_pred, onnx_pred, output_names = self._test_decision_tree(
            X, model, extra_config)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred[0].ravel(),
                                   onnx_pred,
                                   rtol=rtol,
                                   atol=atol)

    # Utility function for testing classification models.
    def _test_classifier(self,
                         X,
                         model,
                         rtol=1e-06,
                         atol=1e-06,
                         extra_config={}):
        onnx_ml_pred, onnx_pred, output_names = self._test_decision_tree(
            X, model, extra_config)

        np.testing.assert_allclose(onnx_ml_pred[1],
                                   onnx_pred[1],
                                   rtol=rtol,
                                   atol=atol)  # labels
        np.testing.assert_allclose(list(
            map(lambda x: list(x.values()), onnx_ml_pred[0])),
                                   onnx_pred[0],
                                   rtol=rtol,
                                   atol=atol)  # probs

    # Regression.
    # Regression test with Decision Tree.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_decision_tree_regressor(self):
        warnings.filterwarnings("ignore")
        model = DecisionTreeRegressor()
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model.fit(X, y)
        self._test_regressor(X, model)

    # Basic regression test with decision tree.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_decision_tree_regressor_random(self):
        warnings.filterwarnings("ignore")
        n_features = 28
        n_total = 100
        np.random.seed(0)
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(n_total, size=n_total)

        # Create DecisionTree model
        model = DecisionTreeRegressor()
        model.fit(X, y)
        self._test_regressor(X, model)

    # Regression test with Random Forest, 1 estimator.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_random_forest_regressor_1(self):
        warnings.filterwarnings("ignore")
        model = RandomForestRegressor(n_estimators=1)
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model.fit(X, y)
        self._test_regressor(X, model)

    # Basic regression test with Random Forest.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_random_forest_regressor_random(self):
        warnings.filterwarnings("ignore")
        n_features = 28
        n_total = 100
        np.random.seed(0)
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(n_total, size=n_total)

        # Create RandomForest model
        model = RandomForestRegressor()
        model.fit(X, y)
        self._test_regressor(X, model, rtol=1e-03, atol=1e-03)

    # Binary.
    # Binary classication test random.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_decision_tree_binary_random(self):
        warnings.filterwarnings("ignore")
        n_features = 28
        n_total = 100
        np.random.seed(0)
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=n_total)

        # Create DecisionTree model
        model = DecisionTreeClassifier()
        model.fit(X, y)
        self._test_classifier(X, model)

    # Binary classification test Decision Tree.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_decision_tree_binary(self):
        warnings.filterwarnings("ignore")
        model = DecisionTreeClassifier()
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 0]
        model.fit(X, y)
        self._test_classifier(X, model)

    # Binary classification test Random Forest with 3 estimators (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_random_forest_classifier(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0], [1, 2]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 0, 1]
        model = RandomForestClassifier(n_estimators=3)
        model.fit(X, y)
        self._test_classifier(X, model)

    # Binary classification test Random Forest with 3 estimators random.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_random_forest_classifier_random(self):
        warnings.filterwarnings("ignore")
        n_features = 28
        n_total = 100
        np.random.seed(0)
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=n_total)

        model = RandomForestClassifier(n_estimators=10)
        model.fit(X, y)
        self._test_classifier(X, model)

    # Multiclass classification test.
    # Multiclass classification test with DecisionTree, random.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_decision_tree_multi_random(self):
        warnings.filterwarnings("ignore")
        n_features = 28
        n_total = 100
        np.random.seed(0)
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(3, size=n_total)

        # Create the DecisionTree model
        model = DecisionTreeClassifier()
        model.fit(X, y)
        self._test_classifier(X, model)

    # Multiclass classification test with DecisionTree (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_decision_tree_multi(self):
        warnings.filterwarnings("ignore")
        model = DecisionTreeClassifier()
        X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 2, 1, 1, 2]
        model.fit(X, y)
        self._test_classifier(X, model)

    # Multiclass classification test with Random Forest.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_random_forest_multi_random(self):
        warnings.filterwarnings("ignore")
        n_features = 28
        n_total = 100
        np.random.seed(0)
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(3, size=n_total)

        # Create the RandomForest model
        model = RandomForestClassifier(n_estimators=10)
        model.fit(X, y)
        self._test_classifier(X, model)
class TestSklearnNormalizer(unittest.TestCase):
    def test_normalizer_converter(self):
        # Generate a random 2D array with values in [0, 1000)
        np.random.seed(0)
        data = np.random.rand(100, 200) * 1000
        data = np.array(data, dtype=np.float32)
        data_tensor = torch.from_numpy(data)

        for norm in ["l1", "l2", "max"]:
            model = Normalizer(norm=norm)
            model.fit(data)

            torch_model = hummingbird.ml.convert(model, "torch")

            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(
                model.transform(data),
                torch_model.transform(data_tensor),
                rtol=1e-06,
                atol=1e-06,
            )

    def test_normalizer_converter_raises_wrong_type(self):
        # Generate a random 2D array with values in [0, 1000)
        np.random.seed(0)
        data = np.random.rand(100, 200) * 1000
        data = np.array(data, dtype=np.float32)

        model = Normalizer(norm="invalid")
        model.fit(data)

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertRaises(RuntimeError,
                          torch_model.model._operator_map.SklearnNormalizer,
                          torch.from_numpy(data))

    # Float 64 data tests
    def test_float64_normalizer_converter(self):
        # Generate a random 2D array with values in [0, 1000)
        np.random.seed(0)
        data = np.random.rand(100, 200) * 1000
        data_tensor = torch.from_numpy(data)

        for norm in ["l1", "l2", "max"]:
            model = Normalizer(norm=norm)
            model.fit(data)

            torch_model = hummingbird.ml.convert(model, "torch")

            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(
                model.transform(data),
                torch_model.transform(data_tensor),
                rtol=1e-06,
                atol=1e-06,
            )

    # ONNX backend
    @unittest.skipIf(not (onnx_runtime_installed()),
                     reason="ONNX test requires ONNX and  ORT")
    def test_normalizer_converter_onnx(self):
        # Generate a random 2D array with values in [0, 1000)
        np.random.seed(0)
        data = np.random.rand(100, 200) * 1000
        data = np.array(data, dtype=np.float32)
        data_tensor = torch.from_numpy(data)

        for norm in ["l1", "l2", "max"]:
            model = Normalizer(norm=norm)
            model.fit(data)

            hb_model = hummingbird.ml.convert(model, "onnx", data)

            self.assertIsNotNone(hb_model)
            np.testing.assert_allclose(
                model.transform(data),
                hb_model.transform(data_tensor),
                rtol=1e-06,
                atol=1e-06,
            )

    # TVM backend
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_normalizer_converter_tvm(self):
        # Generate a random 2D array with values in [0, 1000)
        np.random.seed(0)
        data = np.random.rand(100, 200) * 1000
        data = np.array(data, dtype=np.float32)
        data_tensor = torch.from_numpy(data)

        for norm in ["l1", "l2", "max"]:
            model = Normalizer(norm=norm)
            model.fit(data)

            torch_model = hummingbird.ml.convert(
                model,
                "tvm",
                data,
                extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})

            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(
                model.transform(data),
                torch_model.transform(data_tensor),
                rtol=1e-06,
                atol=1e-06,
            )
Пример #20
0
class TestSklearnPipeline(unittest.TestCase):
    def test_pipeline(self):
        data = np.array([[0, 0], [0, 0], [1, 1], [1, 1]], dtype=np.float32)
        scaler = StandardScaler()
        scaler.fit(data)
        model = Pipeline([("scaler1", scaler), ("scaler2", scaler)])

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.transform(data),
            torch_model.transform(data),
            rtol=1e-06,
            atol=1e-06,
        )

    def test_pipeline2(self):
        data = np.array([[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]],
                        dtype=np.float32)
        scaler = StandardScaler()
        scaler.fit(data)
        model = Pipeline([("scaler1", scaler), ("scaler2", scaler)])

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.transform(data),
            torch_model.transform(data),
            rtol=1e-06,
            atol=1e-06,
        )

    def test_combine_inputs_union_in_pipeline(self):
        from sklearn.preprocessing import StandardScaler
        from sklearn.pipeline import Pipeline

        data = np.array([[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]],
                        dtype=np.float32)
        model = Pipeline([
            ("scaler1", StandardScaler()),
            ("union",
             FeatureUnion([("scaler2", StandardScaler()),
                           ("scaler3", MinMaxScaler())])),
        ])
        model.fit(data)

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.transform(data),
            torch_model.transform(data),
            rtol=1e-06,
            atol=1e-06,
        )

    def test_combine_inputs_floats_ints(self):
        data = [[0, 0.0], [0, 0.0], [1, 1.0], [1, 1.0]]
        scaler = StandardScaler()
        scaler.fit(data)
        model = Pipeline([("scaler1", scaler), ("scaler2", scaler)])

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.transform(data),
            torch_model.transform(data),
            rtol=1e-06,
            atol=1e-06,
        )

    @unittest.skipIf(not pandas_installed(),
                     reason="Test requires pandas installed")
    def test_pipeline_column_transformer_1(self):
        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
        X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
        X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
        y_train = y % 2
        numeric_features = [0, 1, 2]  # ["vA", "vB", "vC"]

        classifier = LogisticRegression(
            C=0.01,
            class_weight=dict(zip([False, True], [0.2, 0.8])),
            n_jobs=1,
            max_iter=10,
            solver="liblinear",
            tol=1e-3,
        )

        numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])

        preprocessor = ColumnTransformer(transformers=[("num",
                                                        numeric_transformer,
                                                        numeric_features)])

        model = Pipeline(steps=[("precprocessor",
                                 preprocessor), ("classifier", classifier)])

        model.fit(X_train, y_train)

        X_test = X_train[:11]

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.predict_proba(X_test),
            torch_model.predict_proba(X_test.values),
            rtol=1e-06,
            atol=1e-06,
        )

    @unittest.skipIf(not pandas_installed(),
                     reason="Test requires pandas installed")
    def test_pipeline_column_transformer(self):
        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
        X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
        X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
        y_train = y % 2
        numeric_features = [0, 1, 2]  # ["vA", "vB", "vC"]
        categorical_features = [3, 4]  # ["vcat", "vcat2"]

        classifier = LogisticRegression(
            C=0.01,
            class_weight=dict(zip([False, True], [0.2, 0.8])),
            n_jobs=1,
            max_iter=10,
            solver="liblinear",
            tol=1e-3,
        )

        numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])

        categorical_transformer = Pipeline(
            steps=[("onehot",
                    OneHotEncoder(sparse=True, handle_unknown="ignore"))])

        preprocessor = ColumnTransformer(transformers=[
            ("num", numeric_transformer, numeric_features),
            ("cat", categorical_transformer, categorical_features),
        ])

        model = Pipeline(steps=[("precprocessor",
                                 preprocessor), ("classifier", classifier)])

        model.fit(X_train, y_train)

        X_test = X_train[:11]

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.predict_proba(X_test),
            torch_model.predict_proba(X_test.values),
            rtol=1e-06,
            atol=1e-06,
        )

    @unittest.skipIf(not pandas_installed(),
                     reason="Test requires pandas installed")
    def test_pipeline_column_transformer_weights(self):
        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
        X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
        X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
        y_train = y % 2
        numeric_features = [0, 1, 2]  # ["vA", "vB", "vC"]
        categorical_features = [3, 4]  # ["vcat", "vcat2"]

        classifier = LogisticRegression(
            C=0.01,
            class_weight=dict(zip([False, True], [0.2, 0.8])),
            n_jobs=1,
            max_iter=10,
            solver="liblinear",
            tol=1e-3,
        )

        numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])

        categorical_transformer = Pipeline(
            steps=[("onehot",
                    OneHotEncoder(sparse=True, handle_unknown="ignore"))])

        preprocessor = ColumnTransformer(
            transformers=[
                ("num", numeric_transformer, numeric_features),
                ("cat", categorical_transformer, categorical_features),
            ],
            transformer_weights={
                "num": 2,
                "cat": 3
            },
        )

        model = Pipeline(steps=[("precprocessor",
                                 preprocessor), ("classifier", classifier)])

        model.fit(X_train, y_train)

        X_test = X_train[:11]

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.predict_proba(X_test),
            torch_model.predict_proba(X_test.values),
            rtol=1e-06,
            atol=1e-06,
        )

    @unittest.skipIf(not pandas_installed(),
                     reason="Test requires pandas installed")
    def test_pipeline_column_transformer_drop(self):
        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
        X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
        X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
        y_train = y % 2
        numeric_features = [0, 1]  # ["vA", "vB"]
        categorical_features = [3, 4]  # ["vcat", "vcat2"]

        classifier = LogisticRegression(
            C=0.01,
            class_weight=dict(zip([False, True], [0.2, 0.8])),
            n_jobs=1,
            max_iter=10,
            solver="liblinear",
            tol=1e-3,
        )

        numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])

        categorical_transformer = Pipeline(
            steps=[("onehot",
                    OneHotEncoder(sparse=True, handle_unknown="ignore"))])

        preprocessor = ColumnTransformer(
            transformers=[
                ("num", numeric_transformer, numeric_features),
                ("cat", categorical_transformer, categorical_features),
            ],
            transformer_weights={
                "num": 2,
                "cat": 3
            },
            remainder="drop",
        )

        model = Pipeline(steps=[("precprocessor",
                                 preprocessor), ("classifier", classifier)])

        model.fit(X_train, y_train)

        X_test = X_train[:11]

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.predict_proba(X_test),
            torch_model.predict_proba(X_test.values),
            rtol=1e-06,
            atol=1e-06,
        )

    @unittest.skipIf(not pandas_installed(),
                     reason="Test requires pandas installed")
    def test_pipeline_column_transformer_drop_noweights(self):
        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
        X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
        X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
        y_train = y % 2
        numeric_features = [0, 1]  # ["vA", "vB"]
        categorical_features = [3, 4]  # ["vcat", "vcat2"]

        classifier = LogisticRegression(
            C=0.01,
            class_weight=dict(zip([False, True], [0.2, 0.8])),
            n_jobs=1,
            max_iter=10,
            solver="liblinear",
            tol=1e-3,
        )

        numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])

        categorical_transformer = Pipeline(
            steps=[("onehot",
                    OneHotEncoder(sparse=True, handle_unknown="ignore"))])

        preprocessor = ColumnTransformer(
            transformers=[
                ("num", numeric_transformer, numeric_features),
                ("cat", categorical_transformer, categorical_features),
            ],
            remainder="drop",
        )

        model = Pipeline(steps=[("precprocessor",
                                 preprocessor), ("classifier", classifier)])

        model.fit(X_train, y_train)

        X_test = X_train[:11]

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.predict_proba(X_test),
            torch_model.predict_proba(X_test.values),
            rtol=1e-06,
            atol=1e-06,
        )

    @unittest.skipIf(ColumnTransformer is None,
                     reason="ColumnTransformer not available in 0.19")
    @unittest.skipIf(not pandas_installed(),
                     reason="Test requires pandas installed")
    def test_pipeline_column_transformer_passthrough(self):
        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
        X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
        X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
        y_train = y % 2
        numeric_features = [0, 1]  # ["vA", "vB"]
        categorical_features = [3, 4]  # ["vcat", "vcat2"]

        classifier = LogisticRegression(
            C=0.01,
            class_weight=dict(zip([False, True], [0.2, 0.8])),
            n_jobs=1,
            max_iter=10,
            solver="liblinear",
            tol=1e-3,
        )

        numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])

        categorical_transformer = Pipeline(
            steps=[("onehot",
                    OneHotEncoder(sparse=True, handle_unknown="ignore"))])

        preprocessor = ColumnTransformer(
            transformers=[
                ("num", numeric_transformer, numeric_features),
                ("cat", categorical_transformer, categorical_features),
            ],
            transformer_weights={
                "num": 2,
                "cat": 3
            },
            remainder="passthrough",
        )

        model = Pipeline(steps=[("precprocessor",
                                 preprocessor), ("classifier", classifier)])

        model.fit(X_train, y_train)

        X_test = X_train[:11]

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.predict_proba(X_test),
            torch_model.predict_proba(X_test.values),
            rtol=1e-06,
            atol=1e-06,
        )

    @unittest.skipIf(ColumnTransformer is None,
                     reason="ColumnTransformer not available in 0.19")
    @unittest.skipIf(not pandas_installed(),
                     reason="Test requires pandas installed")
    def test_pipeline_column_transformer_passthrough_noweights(self):
        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
        X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
        X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
        y_train = y % 2
        numeric_features = [0, 1]  # ["vA", "vB"]
        categorical_features = [3, 4]  # ["vcat", "vcat2"]

        classifier = LogisticRegression(
            C=0.01,
            class_weight=dict(zip([False, True], [0.2, 0.8])),
            n_jobs=1,
            max_iter=10,
            solver="liblinear",
            tol=1e-3,
        )

        numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])

        categorical_transformer = Pipeline(
            steps=[("onehot",
                    OneHotEncoder(sparse=True, handle_unknown="ignore"))])

        preprocessor = ColumnTransformer(
            transformers=[
                ("num", numeric_transformer, numeric_features),
                ("cat", categorical_transformer, categorical_features),
            ],
            remainder="passthrough",
        )

        model = Pipeline(steps=[("precprocessor",
                                 preprocessor), ("classifier", classifier)])

        model.fit(X_train, y_train)

        X_test = X_train[:11]

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.predict_proba(X_test),
            torch_model.predict_proba(X_test.values),
            rtol=1e-06,
            atol=1e-06,
        )

    @unittest.skipIf(ColumnTransformer is None,
                     reason="ColumnTransformer not available in 0.19")
    @unittest.skipIf(not pandas_installed(),
                     reason="Test requires pandas installed")
    def test_pipeline_column_transformer_passthrough_slice(self):
        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
        X_train["vcat"] = X_train["vA"].apply(lambda x: 1 if x > 0.5 else 2)
        X_train["vcat2"] = X_train["vB"].apply(lambda x: 3 if x > 0.5 else 4)
        y_train = y % 2
        numeric_features = slice(0, 1)  # ["vA", "vB"]
        categorical_features = slice(3, 4)  # ["vcat", "vcat2"]

        classifier = LogisticRegression(
            C=0.01,
            class_weight=dict(zip([False, True], [0.2, 0.8])),
            n_jobs=1,
            max_iter=10,
            solver="liblinear",
            tol=1e-3,
        )

        numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])

        categorical_transformer = Pipeline(
            steps=[("onehot",
                    OneHotEncoder(sparse=True, handle_unknown="ignore"))])

        preprocessor = ColumnTransformer(
            transformers=[
                ("num", numeric_transformer, numeric_features),
                ("cat", categorical_transformer, categorical_features),
            ],
            transformer_weights={
                "num": 2,
                "cat": 3
            },
            remainder="passthrough",
        )

        model = Pipeline(steps=[("precprocessor",
                                 preprocessor), ("classifier", classifier)])

        model.fit(X_train, y_train)

        X_test = X_train[:11]

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            model.predict_proba(X_test),
            torch_model.predict_proba(X_test.values),
            rtol=1e-06,
            atol=1e-06,
        )

    @unittest.skipIf(ColumnTransformer is None,
                     reason="ColumnTransformer not available in 0.19")
    @unittest.skipIf(not onnx_runtime_installed(),
                     reason="Test requires ORT installed")
    def test_pipeline_many_inputs(self):
        n_features = 18
        X = np.random.rand(100, n_features)
        y = np.random.randint(1000, size=100)

        scaler_transformer = Pipeline(steps=[("scaler", StandardScaler())])
        preprocessor = ColumnTransformer(
            transformers=[("scaling", scaler_transformer,
                           list(range(n_features)))])
        model = RandomForestRegressor(n_estimators=10, max_depth=9)
        pipeline = Pipeline(steps=[("preprocessor",
                                    preprocessor), ("model", model)])

        pipeline.fit(X, y)

        X_test = tuple(np.split(X, n_features, axis=1))

        hb_model = hummingbird.ml.convert(pipeline, "onnx", X_test)

        assert len(hb_model.model.graph.input) == n_features

        np.testing.assert_allclose(
            pipeline.predict(X),
            np.array(hb_model.predict(X_test)).flatten(),
            rtol=1e-06,
            atol=1e-06,
        )

    @unittest.skipIf(ColumnTransformer is None,
                     reason="ColumnTransformer not available in 0.19")
    @unittest.skipIf(not onnx_runtime_installed(),
                     reason="Test requires ORT installed")
    def test_pipeline_many_inputs_with_schema(self):
        n_features = 5
        X = np.random.rand(100, n_features)
        y = np.random.randint(1000, size=100)
        input_column_names = ["A", "B", "C", "D", "E"]
        output_column_names = ["score"]

        scaler_transformer = Pipeline(steps=[("scaler", StandardScaler())])
        preprocessor = ColumnTransformer(
            transformers=[("scaling", scaler_transformer,
                           list(range(n_features)))])
        model = RandomForestRegressor(n_estimators=10, max_depth=9)
        pipeline = Pipeline(steps=[("preprocessor",
                                    preprocessor), ("model", model)])

        pipeline.fit(X, y)

        X_test = tuple(np.split(X, n_features, axis=1))
        extra_config = {
            constants.INPUT_NAMES: input_column_names,
            constants.OUTPUT_NAMES: output_column_names
        }

        hb_model = hummingbird.ml.convert(pipeline,
                                          "onnx",
                                          X_test,
                                          extra_config=extra_config)

        graph_inputs = [input.name for input in hb_model.model.graph.input]
        graph_outputs = [output.name for output in hb_model.model.graph.output]

        assert len(hb_model.model.graph.input) == n_features
        assert graph_inputs == input_column_names
        assert graph_outputs == output_column_names
Пример #21
0
class TestONNXOneHotEncoder(unittest.TestCase):

    # Test OneHotEncoder with ints
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS"
    )
    def test_one_hot_encoder_onnx_int(self, rtol=1e-06, atol=1e-06):
        model = OneHotEncoder()
        X = np.array([[1, 2, 3]], dtype=np.int32)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model, initial_types=[("int_input", IntTensorType_onnx(X.shape))])

        # Create ONNX model by calling converter
        onnx_model = convert(onnx_ml_model, "onnx", X)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = session.run(output_names, inputs)

        # Get the predictions for the ONNX model
        onnx_pred = onnx_model.transform(X)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol)

    # Test OneHotEncoder with 2 inputs
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS"
    )
    def test_one_hot_encoder_onnx2(self, rtol=1e-06, atol=1e-06):
        model = OneHotEncoder()
        X = np.array([[1, 2, 3], [2, 1, 3]], dtype=np.int32)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model, initial_types=[("int_input", IntTensorType_onnx(X.shape))])

        # Create ONNX model by calling converter
        onnx_model = convert(onnx_ml_model, "onnx", X)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = session.run(output_names, inputs)

        # Get the predictions for the ONNX model
        onnx_pred = onnx_model.transform(X)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol)

    # Test OneHotEncoder with int64
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS"
    )
    def test_one_hot_encoder_onnx_int64(self, rtol=1e-06, atol=1e-06):
        model = OneHotEncoder()
        X = np.array([[1, 2, 3]], dtype=np.int64)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model, initial_types=[("int_input", LongTensorType_onnx(X.shape))])

        # Create ONNX model by calling converter
        onnx_model = convert(onnx_ml_model, "onnx", X)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = session.run(output_names, inputs)

        # Get the predictions for the ONNX model
        onnx_pred = onnx_model.transform(X)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred, onnx_pred, rtol=rtol, atol=atol)

    # # Test OneHotEncoder with strings
    # @unittest.skipIf(
    #     not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS"
    # )
    # def test_model_one_hot_encoder_string(self):
    #     model = OneHotEncoder()
    #     data = [['a', 'r', 'x'], ['a', 'r', 'x'], ['aaaa', 'r', 'x'], ['a', 'r', 'xx']]
    #     model.fit(data)

    #     # max word length is the smallest number which is divisible by 4 and larger than or equal to the length of any word
    #     max_word_length = 4
    #     num_columns = 3
    #     # Create ONNX-ML model
    #     onnx_ml_model = convert_sklearn(
    #         model,
    #         initial_types=[("input", StringTensorType_onnx([4, 3]))]
    #     )

    #     pytorch_input = np.array(data, dtype='|S'+str(max_word_length)).view(np.int32).reshape(-1, num_columns, max_word_length // 4)

    #     # Create ONNX model by calling converter
    #     onnx_model = convert(onnx_ml_model, "onnx", pytorch_input)

    #     # Get the predictions for the ONNX-ML model
    #     session = ort.InferenceSession(onnx_ml_model.SerializeToString())
    #     output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))]
    #     inputs = {session.get_inputs()[0].name: data}
    #     onnx_ml_pred = session.run(output_names, inputs)

    #     # Get the predictions for the ONNX model
    #     session = ort.InferenceSession(onnx_model.SerializeToString())
    #     inputs_pyt = {session.get_inputs()[0].name: pytorch_input}
    #     onnx_pred = session.run(output_names, inputs_pyt)

    #     return onnx_ml_pred, onnx_pred

    # Test OneHotEncoder temporary failcase
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS"
    )
    def test_ohe_string_raises_notimpl_onnx(self):
        warnings.filterwarnings("ignore")
        model = OneHotEncoder()
        data = [["a", "r", "x"], ["a", "r", "x"], ["aaaa", "r", "x"], ["a", "r", "xx"]]
        model.fit(data)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model, initial_types=[("input", StringTensorType_onnx([4, 3]))])

        # Create ONNX model by calling converter, should raise error for strings
        self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", data)

    # Test OneHotEncoder failcase when input data type is not supported
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS"
    )
    def test_ohe_string_raises_type_error_onnx(self):
        warnings.filterwarnings("ignore")
        model = OneHotEncoder()
        data = [["a", "r", "x"], ["a", "r", "x"], ["aaaa", "r", "x"], ["a", "r", "xx"]]
        model.fit(data)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model, initial_types=[("input", StringTensorType_onnx([4, 3]))])

        # Create ONNX model by calling converter, should raise error for strings
        self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx")
Пример #22
0
class TestProphet(unittest.TestCase):
    def _get_data(self):
        local_path = "tests/resources"
        local_data = os.path.join(local_path,
                                  "example_wp_log_peyton_manning.csv")
        url = "https://raw.githubusercontent.com/facebook/prophet/master/examples/example_wp_log_peyton_manning.csv"
        if not os.path.isfile(local_data):
            os.makedirs(local_path)
            urlretrieve(url, local_data)
        data = pd.read_csv(local_data)
        return data

    @unittest.skipIf(not (pandas_installed() and prophet_installed()),
                     reason="Test requires Prophet and Pandas")
    def test_prophet_trend(self):
        df = self._get_data()

        m = Prophet()
        m.fit(df)

        # Convert with Hummingbird.
        hb_model = hummingbird.ml.convert(m, "torch")

        # Predictions.
        future = m.make_future_dataframe(periods=365)
        prophet_trend = m.predict(future)["trend"].values
        hb_trend = hb_model.predict(future)

        np.testing.assert_allclose(prophet_trend,
                                   hb_trend,
                                   rtol=1e-06,
                                   atol=1e-06)

    @unittest.skipIf(
        not (pandas_installed() and prophet_installed()),
        reason="Test requires Prophet, Pandas and ONNX runtime.",
    )
    @unittest.skipIf(
        LooseVersion(torch.__version__) < LooseVersion("1.8.1"),
        reason="Test requires Torch 1.8.1.",
    )
    @unittest.skipIf(
        not onnx_runtime_installed()
        or LooseVersion(onnxruntime.__version__) < LooseVersion("1.7.0"),
        reason="Prophet test requires onnxruntime => 1.7.0",
    )
    def test_prophet_trend_onnx(self):
        df = self._get_data()

        m = Prophet()
        m.fit(df)

        future = m.make_future_dataframe(periods=365)
        future_np = (future.values -
                     np.datetime64("1970-01-01T00:00:00.000000000")).astype(
                         np.int64) / 1000000000

        # Convert with Hummingbird.
        hb_model = hummingbird.ml.convert(m, "onnx", future_np)

        # Predictions.
        prophet_trend = m.predict(future)["trend"]
        hb_trend = hb_model.predict(future_np)
        import onnx

        onnx.save(hb_model.model, "prophet.onnx")

        np.testing.assert_allclose(prophet_trend,
                                   hb_trend,
                                   rtol=1e-06,
                                   atol=1e-06)
Пример #23
0
class TestSklearnNormalizer(unittest.TestCase):
    def _test_normalizer_converter(self, norm):
        warnings.filterwarnings("ignore")
        X = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]],
                     dtype=np.float32)

        # Create SKL model for testing
        model = Normalizer(norm=norm)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("float_input",
                                             FloatTensorType_onnx(X.shape))
                                        ])

        # Create ONNX model by calling converter
        onnx_model = convert(onnx_ml_model, "onnx", X)
        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        onnx_ml_pred = [[] for i in range(len(output_names))]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = session.run(output_names, inputs)

        # Get the predictions for the ONNX model
        session = ort.InferenceSession(onnx_model.SerializeToString())
        onnx_pred = [[] for i in range(len(output_names))]
        onnx_pred = session.run(output_names, inputs)

        return onnx_ml_pred, onnx_pred

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_normalizer_l1(self, rtol=1e-06, atol=1e-06):
        onnx_ml_pred, onnx_pred = self._test_normalizer_converter("l1")

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred,
                                   onnx_pred,
                                   rtol=rtol,
                                   atol=atol)

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_normalizer_l2(self, rtol=1e-06, atol=1e-06):
        onnx_ml_pred, onnx_pred = self._test_normalizer_converter("l2")

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred,
                                   onnx_pred,
                                   rtol=rtol,
                                   atol=atol)

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_normalizer_max(self, rtol=1e-06, atol=1e-06):
        onnx_ml_pred, onnx_pred = self._test_normalizer_converter("max")

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred,
                                   onnx_pred,
                                   rtol=rtol,
                                   atol=atol)

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_normalizer_converter_raises_rt(self):
        warnings.filterwarnings("ignore")
        X = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]],
                     dtype=np.float32)
        model = Normalizer(norm="l1")
        model.fit(X)

        # generate test input
        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("float_input",
                                             FloatTensorType_onnx(X.shape))
                                        ])
        onnx_ml_model.graph.node[0].attribute[0].s = "".encode()

        self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", X)
Пример #24
0
class TestONNXLinear(unittest.TestCase):
    def _test_linear(self, classes):
        """
        This helper function tests conversion of `ai.onnx.ml.LinearClassifier`
        which is created from a scikit-learn LogisticRegression.

        This tests `convert_onnx_linear_model` in `hummingbird.ml.operator_converters.onnxml_linear`
        """
        n_features = 20
        n_total = 100
        np.random.seed(0)
        warnings.filterwarnings("ignore")
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(classes, size=n_total)

        # Create SKL model for testing
        model = LogisticRegression(solver="liblinear",
                                   multi_class="ovr",
                                   fit_intercept=True)
        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("float_input",
                                             FloatTensorType_onnx(X.shape))
                                        ])

        # Create ONNX model by calling converter
        onnx_model = convert(onnx_ml_model, "onnx", X)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        onnx_ml_pred = [[] for i in range(len(output_names))]
        inputs = {session.get_inputs()[0].name: X}
        pred = session.run(output_names, inputs)
        for i in range(len(output_names)):
            if "label" in output_names[i]:
                onnx_ml_pred[1] = pred[i]
            else:
                onnx_ml_pred[0] = pred[i]

        # Get the predictions for the ONNX model
        onnx_pred = [[] for i in range(len(output_names))]
        if len(output_names) == 1:  # regression
            onnx_pred = onnx_model.predict(X)
        else:  # classification
            onnx_pred[0] = onnx_model.predict_proba(X)
            onnx_pred[1] = onnx_model.predict(X)

        return onnx_ml_pred, onnx_pred

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    # test ai.onnx.ml.LinearClassifier with 2 classes
    def test_logistic_regression_onnxml_binary(self, rtol=1e-06, atol=1e-06):
        onnx_ml_pred, onnx_pred = self._test_linear(2)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred[1],
                                   onnx_pred[1],
                                   rtol=rtol,
                                   atol=atol)  # labels
        np.testing.assert_allclose(list(
            map(lambda x: list(x.values()), onnx_ml_pred[0])),
                                   onnx_pred[0],
                                   rtol=rtol,
                                   atol=atol)  # probs

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    # test ai.onnx.ml.LinearClassifier with 3 classes
    def test_logistic_regression_onnxml_multi(self, rtol=1e-06, atol=1e-06):
        onnx_ml_pred, onnx_pred = self._test_linear(3)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred[1],
                                   onnx_pred[1],
                                   rtol=rtol,
                                   atol=atol)  # labels
        np.testing.assert_allclose(list(
            map(lambda x: list(x.values()), onnx_ml_pred[0])),
                                   onnx_pred[0],
                                   rtol=rtol,
                                   atol=atol)  # probs

    def _test_regressor(self, values):
        """
        This helper function tests conversion of `ai.onnx.ml.LinearRegressor`
        which is created from a scikit-learn LinearRegression.

        This tests `convert_onnx_linear_regression_model` in `hummingbird.ml.operator_converters.onnxml_linear`
        """
        n_features = 20
        n_total = 100
        np.random.seed(0)
        warnings.filterwarnings("ignore")
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(values, size=n_total)

        # Create SKL model for testing
        model = LinearRegression()
        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("float_input",
                                             FloatTensorType_onnx(X.shape))
                                        ])

        # Create ONNX model by calling converter
        onnx_model = convert(onnx_ml_model, "onnx", X)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = session.run(output_names, inputs)

        # Get the predictions for the ONNX model
        onnx_pred = onnx_model.predict(X)

        return onnx_ml_pred, onnx_pred

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    # test ai.onnx.ml.LinearRegressor with 2 values
    def test_linear_regression_onnxml_small(self, rtol=1e-06, atol=1e-06):
        onnx_ml_pred, onnx_pred = self._test_regressor(2)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred[0].ravel(),
                                   onnx_pred,
                                   rtol=rtol,
                                   atol=atol)

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    # test ai.onnx.ml.LinearRegressor with 100 values
    def test_linear_regression_onnxml_large(self, rtol=1e-06, atol=1e-06):
        onnx_ml_pred, onnx_pred = self._test_regressor(100)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred[0].ravel(),
                                   onnx_pred,
                                   rtol=rtol,
                                   atol=atol)

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    # test for malformed model/problem with parsing
    def test_onnx_linear_converter_raises_rt(self):
        n_features = 20
        n_total = 100
        np.random.seed(0)
        warnings.filterwarnings("ignore")
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(3, size=n_total)
        model = LinearRegression()
        model.fit(X, y)

        # generate test input
        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("float_input",
                                             FloatTensorType_onnx(X.shape))
                                        ])
        onnx_ml_model.graph.node[0].attribute[0].name = "".encode()

        self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", X)
"""
Tests onnxml Binarizer converter
"""
import unittest
import warnings

import numpy as np
import torch
from sklearn.preprocessing import Binarizer

from hummingbird.ml._utils import onnx_ml_tools_installed, onnx_runtime_installed, lightgbm_installed
from hummingbird.ml import convert

if onnx_runtime_installed():
    import onnxruntime as ort
if onnx_ml_tools_installed():
    from onnxmltools import convert_sklearn
    from onnxmltools.convert.common.data_types import FloatTensorType as FloatTensorType_onnx


class TestONNXBinarizer(unittest.TestCase):
    def _test_binarizer_converter(self, threshold):
        warnings.filterwarnings("ignore")
        X = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]],
                     dtype=np.float32)

        # Create SKL model for testing
        model = Binarizer(threshold=threshold)
        model.fit(X)

        # Create ONNX-ML model
Пример #26
0
class TestLGBMConverter(unittest.TestCase):
    # Check tree implementation
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_implementation(self):
        warnings.filterwarnings("ignore")
        np.random.seed(0)
        X = np.random.rand(10, 1)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=10)

        for model in [
                lgb.LGBMClassifier(n_estimators=1, max_depth=1),
                lgb.LGBMRegressor(n_estimators=1, max_depth=1)
        ]:
            for extra_config_param in ["tree_trav", "perf_tree_trav", "gemm"]:
                model.fit(X, y)

                torch_model = hummingbird.ml.convert(
                    model,
                    "torch",
                    extra_config={"tree_implementation": extra_config_param})
                self.assertIsNotNone(torch_model)
                self.assertEqual(
                    str(type(list(torch_model.model._operators)[0])),
                    gbdt_implementation_map[extra_config_param])

    def _run_lgbm_classifier_converter(self, num_classes, extra_config={}):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12, None]:
            model = lgb.LGBMClassifier(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict_proba(X),
                                       torch_model.predict_proba(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Binary classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_binary_classifier_converter(self):
        self._run_lgbm_classifier_converter(2)

    # Gemm classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_gemm_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            2, extra_config={"tree_implementation": "gemm"})

    # Tree_trav classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_tree_trav_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            2, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_perf_tree_trav_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            2, extra_config={"tree_implementation": "perf_tree_trav"})

    # Multi classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_multi_classifier_converter(self):
        self._run_lgbm_classifier_converter(3)

    # Gemm multi classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_gemm_multi_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            3, extra_config={"tree_implementation": "gemm"})

    # Tree_trav multi classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_tree_trav_multi_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            3, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav multi classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_perf_tree_trav_multi_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            3, extra_config={"tree_implementation": "perf_tree_trav"})

    def _run_lgbm_ranker_converter(self,
                                   num_classes,
                                   extra_config={},
                                   label_gain=None):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12, None]:
            model = lgb.LGBMRanker(n_estimators=10,
                                   max_depth=max_depth,
                                   label_gain=label_gain)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(num_classes, size=100)

            model.fit(X,
                      y,
                      group=[X.shape[0]],
                      eval_set=[(X, y)],
                      eval_group=[X.shape[0]])

            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X),
                                       torch_model.predict(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Ranker - small, no label gain
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_binary_ranker_converter_no_label(self):
        self._run_lgbm_ranker_converter(30)

    # Ranker
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_binary_ranker_converter(self):
        self._run_lgbm_ranker_converter(1000, label_gain=list(range(1000)))

    # Gemm ranker
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_gemm_ranker_converter(self):
        self._run_lgbm_ranker_converter(
            1000,
            extra_config={"tree_implementation": "gemm"},
            label_gain=list(range(1000)))

    # Tree_trav ranker
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_tree_trav_ranker_converter(self):
        self._run_lgbm_ranker_converter(
            1000,
            extra_config={"tree_implementation": "tree_trav"},
            label_gain=list(range(1000)))

    # Perf_tree_trav ranker
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_perf_tree_trav_ranker_converter(self):
        self._run_lgbm_ranker_converter(
            1000,
            extra_config={"tree_implementation": "perf_tree_trav"},
            label_gain=list(range(1000)))

    def _run_lgbm_regressor_converter(self, num_classes, extra_config={}):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12, None]:
            model = lgb.LGBMRegressor(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X),
                                       torch_model.predict(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Regressor
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_binary_regressor_converter(self):
        self._run_lgbm_regressor_converter(1000)

    # Gemm regressor
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_gemm_regressor_converter(self):
        self._run_lgbm_regressor_converter(
            1000, extra_config={"tree_implementation": "gemm"})

    # Tree_trav regressor
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_tree_trav_regressor_converter(self):
        self._run_lgbm_regressor_converter(
            1000, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav regressor
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_perf_tree_trav_regressor_converter(self):
        self._run_lgbm_regressor_converter(
            1000, extra_config={"tree_implementation": "perf_tree_trav"})

    # Float 64 classification test helper
    def _run_float64_lgbm_classifier_converter(self,
                                               num_classes,
                                               extra_config={}):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12, None]:
            model = lgb.LGBMClassifier(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict_proba(X),
                                       torch_model.predict_proba(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Gemm classifier (float64 data)
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_float64_lgbm_gemm_classifier_converter(self):
        self._run_float64_lgbm_classifier_converter(
            2, extra_config={"tree_implementation": "gemm"})

    # Tree_trav classifier (float64 data)
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_float64_lgbm_tree_trav_classifier_converter(self):
        self._run_float64_lgbm_classifier_converter(
            2, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav classifier (float64 data)
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_float64_lgbm_perf_tree_trav_classifier_converter(self):
        self._run_float64_lgbm_classifier_converter(
            2, extra_config={"tree_implementation": "perf_tree_trav"})

    # Float 64 regression test helper
    def _run_float64_lgbm_regressor_converter(self,
                                              num_classes,
                                              extra_config={}):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12, None]:
            model = lgb.LGBMRegressor(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X),
                                       torch_model.predict(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Gemm regressor (float64 data)
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_float64_lgbm_gemm_regressor_converter(self):
        self._run_float64_lgbm_regressor_converter(
            1000, extra_config={"tree_implementation": "gemm"})

    # Tree_trav regressor (float64 data)
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_float64_lgbm_tree_trav_regressor_converter(self):
        self._run_float64_lgbm_regressor_converter(
            1000, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav regressor (float64 data)
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_float64_lgbm_perf_tree_trav_regressor_converter(self):
        self._run_float64_lgbm_regressor_converter(
            1000, extra_config={"tree_implementation": "perf_tree_trav"})

    # Random forest in lgbm, the conversion fails with the latest
    # version of lightgbm. The direct converter to pytorch should be
    # updated or the model could be converted into ONNX then
    # converted into pytorch.
    # For more details, see ONNX converter at https://github.com/onnx/
    # onnxmltools/blob/master/onnxmltools/convert/lightgbm/
    # operator_converters/LightGbm.py#L313.
    @unittest.skipIf(
        True, reason="boosting_type=='rf' produces different probabilites.")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_classifier_random_forest_rf(self):
        warnings.filterwarnings("ignore")

        model = lgb.LGBMClassifier(boosting_type="rf",
                                   n_estimators=128,
                                   max_depth=5,
                                   subsample=0.3,
                                   bagging_freq=1)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=100)

        model.fit(X, y)

        torch_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(torch_model)
        np.testing.assert_allclose(model.predict_proba(X),
                                   torch_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

    # Random forest in lgbm
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_classifier_random_forest_gbdt(self):
        warnings.filterwarnings("ignore")

        model = lgb.LGBMClassifier(boosting_type="gbdt",
                                   n_estimators=128,
                                   max_depth=5,
                                   subsample=0.3,
                                   bagging_freq=1)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=100)

        model.fit(X, y)

        torch_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(torch_model)
        np.testing.assert_allclose(model.predict_proba(X),
                                   torch_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

    # Test Tweedie loss in lgbm
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_tweedie(self):
        warnings.filterwarnings("ignore")
        model = lgb.LGBMRegressor(objective="tweedie",
                                  n_estimators=2,
                                  max_depth=5)

        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(100, size=100)

        model.fit(X, y)

        torch_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(torch_model)
        np.testing.assert_allclose(model.predict(X),
                                   torch_model.predict(X),
                                   rtol=1e-06,
                                   atol=1e-06)

    # Backend tests.
    # Test TorchScript backend regression.
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_regressor_converter_torchscript(self):
        warnings.filterwarnings("ignore")

        for max_depth in [1, 3, 8, 10, 12]:
            model = lgb.LGBMRegressor(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(1000, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model,
                                                 "torchscript",
                                                 X,
                                                 extra_config={})
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X),
                                       torch_model.predict(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Test TorchScript backend classification.
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_classifier_converter_torchscript(self):
        warnings.filterwarnings("ignore")

        for max_depth in [1, 3, 8, 10, 12]:
            model = lgb.LGBMClassifier(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(2, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model,
                                                 "torchscript",
                                                 X,
                                                 extra_config={})
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict_proba(X),
                                       torch_model.predict_proba(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Check that we can export into ONNX.
    @unittest.skipIf(not onnx_runtime_installed(),
                     reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_onnx(self):
        warnings.filterwarnings("ignore")

        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)

        # Create ONNX model
        onnx_model = hummingbird.ml.convert(model, "onnx", X)

        np.testing.assert_allclose(
            onnx_model.predict(X).flatten(), model.predict(X))

    # TVM backend tests.
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_lightgbm_tvm_regressor(self):
        warnings.filterwarnings("ignore")

        for tree_implementation in ["gemm", "tree_trav", "perf_tree_trav"]:
            X = [[0, 1], [1, 1], [2, 0]]
            X = np.array(X, dtype=np.float32)
            y = np.array([100, -10, 50], dtype=np.float32)
            model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
            model.fit(X, y)

            # Create TVM model.
            tvm_model = hummingbird.ml.convert(
                model,
                "tvm",
                X,
                extra_config={"tree_implementation": tree_implementation})

            # Check results.
            np.testing.assert_allclose(tvm_model.predict(X), model.predict(X))

    @unittest.skipIf(not (tvm_installed()),
                     reason="TVM tests require TVM installed")
    def test_lightgbm_tvm_classifier(self):
        warnings.filterwarnings("ignore")

        for tree_implementation in ["gemm", "tree_trav", "perf_tree_trav"]:
            X = [[0, 1], [1, 1], [2, 0]]
            X = np.array(X, dtype=np.float32)
            y = np.array([0, 1, 0], dtype=np.float32)
            model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1)
            model.fit(X, y)

            # Create TVM model.
            tvm_model = hummingbird.ml.convert(
                model,
                "tvm",
                X,
                extra_config={"tree_implementation": tree_implementation})

            # Check results.
            np.testing.assert_allclose(tvm_model.predict(X), model.predict(X))
            np.testing.assert_allclose(tvm_model.predict_proba(X),
                                       model.predict_proba(X))

    # Test TVM with large input datasets.
    @unittest.skipIf(not (tvm_installed()),
                     reason="TVM tests require TVM installed")
    def test_lightgbm_tvm_classifier_large_dataset(self):
        warnings.filterwarnings("ignore")

        for tree_implementation in ["gemm", "tree_trav", "perf_tree_trav"]:
            size = 200000
            X = np.random.rand(size, 28)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(2, size=size)
            model = lgb.LGBMClassifier(n_estimators=100, max_depth=3)
            model.fit(X, y)

            # Create TVM model.
            tvm_model = hummingbird.ml.convert(
                model,
                "tvm",
                X,
                extra_config={
                    constants.TREE_IMPLEMENTATION: tree_implementation,
                    constants.TREE_OP_PRECISION_DTYPE: "float64"
                })

            # Check results.
            np.testing.assert_allclose(tvm_model.predict_proba(X),
                                       model.predict_proba(X),
                                       rtol=1e-04,
                                       atol=1e-04)
Пример #27
0
class TestExtraConf(unittest.TestCase):
    # Test default number of threads. It will only work on mac after 1.6 https://github.com/pytorch/pytorch/issues/43036
    @unittest.skipIf(
        sys.platform == "darwin" and LooseVersion(torch.__version__) <= LooseVersion("1.6.0"),
        reason="PyTorch has a bug on mac related to multi-threading",
    )
    def test_torch_deafault_n_threads(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch")

        self.assertIsNotNone(hb_model)
        self.assertTrue(torch.get_num_threads() == psutil.cpu_count(logical=False))
        self.assertTrue(torch.get_num_interop_threads() == 1)

    # Test one thread in pytorch.
    @unittest.skipIf(
        sys.platform == "darwin" and LooseVersion(torch.__version__) > LooseVersion("1.6.0"),
        reason="Setting threading multi times will break on mac",
    )
    def test_torch_one_thread(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch", extra_config={constants.N_THREADS: 1})

        self.assertIsNotNone(hb_model)
        self.assertTrue(torch.get_num_threads() == 1)
        self.assertTrue(torch.get_num_interop_threads() == 1)

    # Test default number of threads onnx.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_deafault_n_threads(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9
        )

        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx", X)

        self.assertIsNotNone(hb_model)
        self.assertTrue(hb_model._session.get_session_options().intra_op_num_threads == psutil.cpu_count(logical=False))
        self.assertTrue(hb_model._session.get_session_options().inter_op_num_threads == 1)

    # Test one thread onnx.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_one_thread(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "onnx", X, extra_config={constants.N_THREADS: 1})

        self.assertIsNotNone(hb_model)
        self.assertTrue(hb_model._session.get_session_options().intra_op_num_threads == 1)
        self.assertTrue(hb_model._session.get_session_options().inter_op_num_threads == 1)

    # Test pytorch regressor with batching.
    def test_torch_regression_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test pytorch classifier with batching.
    def test_torch_classification_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06)

    # Test pytorch classifier with batching.
    def test_torch_iforest_batch(self):
        warnings.filterwarnings("ignore")
        num_classes = 2
        model = IsolationForest(n_estimators=10, max_samples=2)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06)

    # Test pytorch regressor with batching and uneven rows.
    def test_torch_batch_regression_uneven(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(105, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=105)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test pytorch classification with batching and uneven rows.
    def test_torch_batch_classification_uneven(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(105, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=105)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test pytorch transform with batching and uneven rows.
    def test_torch_batch_transform(self):
        warnings.filterwarnings("ignore")
        model = StandardScaler(with_mean=True, with_std=True)
        np.random.seed(0)
        X = np.random.rand(105, 200)
        X = np.array(X, dtype=np.float32)

        model.fit(X)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06)

    # Test torchscript regression with batching.
    def test_torchscript_regression_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(103, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=103)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test torchscript classification with batching.
    def test_torchscript_classification_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(103, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=103)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06)

    # Test torchscript iforest with batching.
    def test_torchscript_iforest_batch(self):
        warnings.filterwarnings("ignore")
        num_classes = 2
        model = IsolationForest(n_estimators=10, max_samples=2)
        np.random.seed(0)
        X = np.random.rand(103, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=103)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06)

    # Test torchscript transform with batching and uneven rows.
    def test_torchscript_batch_transform(self):
        warnings.filterwarnings("ignore")
        model = StandardScaler(with_mean=True, with_std=True)
        np.random.seed(0)
        X = np.random.rand(101, 200)
        X = np.array(X, dtype=np.float32)

        model.fit(X)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06)

    # Test onnx transform with batching and uneven rows.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_batch_transform(self):
        warnings.filterwarnings("ignore")
        model = StandardScaler(with_mean=True, with_std=True)
        np.random.seed(0)
        X = np.random.rand(101, 200)
        X = np.array(X, dtype=np.float32)

        model.fit(X)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06)

    # Test onnx regression with batching.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_regression_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(103, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=103)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test onnx classification with batching.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_classification_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(103, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=103)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06)

    # Test onnx iforest with batching.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_iforest_batch(self):
        warnings.filterwarnings("ignore")
        num_classes = 2
        model = IsolationForest(n_estimators=10, max_samples=2)
        np.random.seed(0)
        X = np.random.rand(103, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=103)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06)

    # Test tvm transform with batching.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_batch_transform(self):
        warnings.filterwarnings("ignore")
        model = StandardScaler(with_mean=True, with_std=True)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)

        model.fit(X)

        batch_size = 10
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :])

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06)

    # Test tvm regression with batching.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_regression_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(103, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=103)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test tvm classification with batching.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_classification_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        batch_size = 10
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :])

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06)

    # Test tvm iforest with batching.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_iforest_batch(self):
        warnings.filterwarnings("ignore")
        num_classes = 2
        model = IsolationForest(n_estimators=10, max_samples=2)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        batch_size = 10
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :])

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06)

    # Test tvm transform with batching and uneven numer of records.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_batch_remainder_transform(self):
        warnings.filterwarnings("ignore")
        model = StandardScaler(with_mean=True, with_std=True)
        np.random.seed(0)
        X = np.random.rand(105, 200)
        X = np.array(X, dtype=np.float32)

        model.fit(X)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06)

    # Test tvm regression with batching and uneven numer of records.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_regression_remainder_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(105, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=105)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test tvm classification with batching and uneven numer of records.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_classification_remainder_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(105, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=105)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06)

    # Test tvm iforest with batching and uneven numer of records.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_iforest_remainder_batch(self):
        warnings.filterwarnings("ignore")
        num_classes = 2
        model = IsolationForest(n_estimators=10, max_samples=2)
        np.random.seed(0)
        X = np.random.rand(105, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=105)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06)

    # Test batch with pandas.
    @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed")
    def test_pandas_batch(self):
        import pandas

        max_depth = 10
        iris = datasets.load_iris()
        X = iris.data[:149, :3]
        y = iris.target[:149]
        columns = ["vA", "vB", "vC"]
        X_train = pandas.DataFrame(X, columns=columns)

        pipeline = Pipeline(
            steps=[
                ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)),
                ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)),
            ]
        )

        pipeline.fit(X_train, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        torch_model = hummingbird.ml.convert_batch(
            pipeline, "torch", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size
        )

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            pipeline.predict_proba(X_train), torch_model.predict_proba(X_train), rtol=1e-06, atol=1e-06,
        )

    # Test batch with pandas ts.
    @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed")
    def test_pandas_batch_ts(self):
        import pandas

        max_depth = 10
        iris = datasets.load_iris()
        X = iris.data[:149, :3]
        y = iris.target[:149]
        columns = ["vA", "vB", "vC"]
        X_train = pandas.DataFrame(X, columns=columns)

        pipeline = Pipeline(
            steps=[
                ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)),
                ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)),
            ]
        )

        pipeline.fit(X_train, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        torch_model = hummingbird.ml.convert_batch(
            pipeline, "torch.jit", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size
        )

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            pipeline.predict_proba(X_train), torch_model.predict_proba(X_train), rtol=1e-06, atol=1e-06,
        )

    # Test batch with pandas onnx.
    @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed")
    @unittest.skipIf(not onnx_runtime_installed(), reason="ONNXML test require ONNX and ORT")
    def test_pandas_batch_onnx(self):
        import pandas

        max_depth = 10
        iris = datasets.load_iris()
        X = iris.data[:149, :3]
        y = iris.target[:149]
        columns = ["vA", "vB", "vC"]
        X_train = pandas.DataFrame(X, columns=columns)

        pipeline = Pipeline(
            steps=[
                ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)),
                ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)),
            ]
        )

        pipeline.fit(X_train, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(
            pipeline, "onnx", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size
        )

        self.assertTrue(hb_model is not None)

        np.testing.assert_allclose(
            pipeline.predict_proba(X_train), hb_model.predict_proba(X_train), rtol=1e-06, atol=1e-06,
        )

    # Test batch with pandas from onnxml.
    @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed")
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_pandas_batch_onnxml(self):
        import pandas

        max_depth = 10
        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        columns = ["vA", "vB", "vC"]
        X_train = pandas.DataFrame(X, columns=columns)

        pipeline = Pipeline(
            steps=[
                ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)),
                ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)),
            ]
        )

        pipeline.fit(X_train, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            pipeline,
            initial_types=[
                ("vA", DoubleTensorType([X.shape[0], 1])),
                ("vB", DoubleTensorType([X.shape[0], 1])),
                ("vC", DoubleTensorType([X.shape[0], 1])),
            ],
            target_opset=9,
        )

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(
            onnx_ml_model, "onnx", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size
        )

        self.assertTrue(hb_model is not None)

        np.testing.assert_allclose(
            pipeline.predict_proba(X_train), hb_model.predict_proba(X_train), rtol=1e-06, atol=1e-06,
        )

    # Test batch with pandas tvm.
    @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed")
    @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM")
    def test_pandas_batch_tvm(self):
        import pandas

        max_depth = 10
        iris = datasets.load_iris()
        X = iris.data[:149, :3]
        y = iris.target[:149]
        columns = ["vA", "vB", "vC"]
        X_train = pandas.DataFrame(X, columns=columns)

        pipeline = Pipeline(
            steps=[
                ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)),
                ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)),
            ]
        )

        pipeline.fit(X_train, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(
            pipeline, "tvm", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size
        )

        self.assertTrue(hb_model is not None)

        np.testing.assert_allclose(
            pipeline.predict_proba(X_train), hb_model.predict_proba(X_train), rtol=1e-06, atol=1e-06,
        )

    # Check converter with model name set as extra_config.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_pytorch_extra_config(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_lightgbm(
            model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9
        )

        # Create ONNX model
        model_name = "hummingbird.ml.test.lightgbm"
        onnx_model = hummingbird.ml.convert(onnx_ml_model, "onnx", extra_config={constants.ONNX_OUTPUT_MODEL_NAME: model_name})

        assert onnx_model.model.graph.name == model_name

    # Test max fuse depth configuration in TVM.
    @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed")
    def test_tvm_max_fuse(self):
        warnings.filterwarnings("ignore")

        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X, extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})
        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test TVM without padding returns an errror is sizes don't match.
    @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed")
    def test_tvm_no_padding(self):
        warnings.filterwarnings("ignore")

        np.random.seed(0)
        X = np.random.rand(100, 20)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=100)
        model = lgb.LGBMRegressor(n_estimators=10)
        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X)
        self.assertIsNotNone(hb_model)
        self.assertRaises(AssertionError, hb_model.predict, X[:98])

    # Test padding in TVM.
    @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed")
    def test_tvm_padding(self):
        warnings.filterwarnings("ignore")

        np.random.seed(0)
        X = np.random.rand(100, 20)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=100)
        model = lgb.LGBMRegressor(n_estimators=10)
        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X, extra_config={constants.TVM_PAD_INPUT: True})
        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X[:98]), hb_model.predict(X[:98]), rtol=1e-06, atol=1e-06)

    # Test padding in TVM does not create problems when not necessary.
    @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed")
    def test_tvm_padding_2(self):
        warnings.filterwarnings("ignore")

        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X, extra_config={constants.TVM_PAD_INPUT: True})
        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test max string lentgh.
    def test_max_str_length(self):
        model = LabelEncoder()
        data = [
            "paris",
            "tokyo",
            "amsterdam",
            "tokyo",
        ]
        model.fit(data)

        torch_model = hummingbird.ml.convert(model, "torch", extra_config={constants.MAX_STRING_LENGTH: 20})

        np.testing.assert_allclose(model.transform(data), torch_model.transform(data), rtol=1e-06, atol=1e-06)
Пример #28
0
class TestONNXLabelEncoder(unittest.TestCase):

    # Test LabelEncoder with longs
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    def test_model_label_encoder_int_onnxml(self):
        model = LabelEncoder()
        X = np.array([1, 4, 5, 2, 0, 2], dtype=np.int64)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("input",
                                             LongTensorType_onnx(X.shape))
                                        ])

        # Create ONNX model by calling converter
        onnx_model = convert(onnx_ml_model, "onnx", X)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = np.array(session.run(output_names, inputs)).ravel()

        # Get the predictions for the ONNX model
        onnx_pred = onnx_model.transform(X).ravel()

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred,
                                   onnx_pred,
                                   rtol=1e-06,
                                   atol=1e-06)

    # Test LabelEncoder with strings on Pytorch >=1.8.0
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(
        LooseVersion(torch.__version__) < LooseVersion("1.8.0"),
        reason="PyTorch exporter don't support nonzero until version 1.8.0",
    )
    def test_model_label_encoder_str_onnxml(self):
        model = LabelEncoder()
        data = [
            "paris",
            "milan",
            "amsterdam",
            "tokyo",
        ]
        model.fit(data)

        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("input",
                                             StringTensorType_onnx([4]))
                                        ])

        onnx_model = convert(onnx_ml_model, "onnx", data)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        inputs = {session.get_inputs()[0].name: data}
        onnx_ml_pred = session.run(output_names, inputs)

        # Get the predictions for the ONNX model
        onnx_pred = onnx_model.transform(data)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred[0],
                                   onnx_pred,
                                   rtol=1e-06,
                                   atol=1e-06)

    # Test LabelEncoder String failcase for torch < 1.8.0
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(
        LooseVersion(torch.__version__) >= LooseVersion("1.8.0"),
        reason=
        "PyTorch exporter supports nonzero only from version 1.8.0 and should fail on older versions",
    )
    def test_le_string_raises_rt_onnx(self):
        warnings.filterwarnings("ignore")
        model = LabelEncoder()
        data = [
            "paris",
            "milan",
            "amsterdam",
            "tokyo",
        ]
        model.fit(data)

        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("input",
                                             StringTensorType_onnx([4]))
                                        ])

        # Create ONNX model by calling converter, should raise error for strings
        self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", data)

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test requires ONNX, ORT and ONNXMLTOOLS")
    # if the model is corrupt, we should get a RuntimeError
    def test_onnx_label_encoder_converter_raises_rt(self):
        warnings.filterwarnings("ignore")
        model = LabelEncoder()
        X = np.array([1, 4, 5, 2, 0, 2], dtype=np.int64)
        model.fit(X)

        # generate test input
        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("float_input",
                                             FloatTensorType_onnx(X.shape))
                                        ])
        onnx_ml_model.graph.node[0].attribute[0].name = "".encode()

        self.assertRaises(RuntimeError, convert, onnx_ml_model, "onnx", X)