class TestSklearnSimpleImputer(unittest.TestCase):
    def _test_simple_imputer(self, model, data, backend):

        model.fit(data)

        hb_model = hummingbird.ml.convert(model, backend, data)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(
            model.transform(data),
            hb_model.transform(data),
            rtol=1e-06,
            atol=1e-06,
        )

    def test_simple_imputer_float_inputs(self):
        model = SimpleImputer(strategy="mean", fill_value="nan")
        data = np.array([[1, 2], [np.nan, 3], [7, 6]], dtype=np.float32)

        for backend in ["torch", "torch.jit"]:
            self._test_simple_imputer(model, data, backend)

    def test_simple_imputer_no_nan_inputs(self):
        model = SimpleImputer(missing_values=0, strategy="most_frequent")
        data = np.array([[1, 2], [1, 3], [7, 6]], dtype=np.float32)

        for backend in ["torch", "torch.jit"]:
            self._test_simple_imputer(model, data, backend)

    def test_simple_imputer_nan_to_0(self):
        model = SimpleImputer(strategy="constant", fill_value=0)
        data = np.array([[1, 2], [1, 3], [7, 6]], dtype=np.float32)

        for backend in ["torch", "torch.jit"]:
            self._test_simple_imputer(model, data, backend)

    # TVM tests
    @unittest.skipIf(not (tvm_installed()), reason="TVM test requires TVM")
    def test_simple_imputer_float_inputs_tvm(self):
        model = SimpleImputer(strategy="mean", fill_value="nan")
        data = np.array([[1, 2], [np.nan, 3], [7, 6]], dtype=np.float32)

        self._test_simple_imputer(model, data, "tvm")

    @unittest.skipIf(not (tvm_installed()), reason="TVM test requires TVM")
    def test_simple_imputer_no_nan_inputs_tvm(self):
        model = SimpleImputer(missing_values=0, strategy="most_frequent")
        data = np.array([[1, 2], [1, 3], [7, 6]], dtype=np.float32)

        self._test_simple_imputer(model, data, "tvm")

    @unittest.skipIf(not (tvm_installed()), reason="TVM test requires TVM")
    def test_simple_imputer_nan_to_0_tvm(self):
        model = SimpleImputer(strategy="constant", fill_value=0)
        data = np.array([[1, 2], [1, 3], [7, 6]], dtype=np.float32)

        self._test_simple_imputer(model, data, "tvm")
class TestSklearnMissingIndicator(unittest.TestCase):
    def _test_sklearn_missing_indic(self, model, data, backend):
        data_tensor = torch.from_numpy(data)
        hb_model = hummingbird.ml.convert(model, backend, data)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(
            model.transform(data),
            hb_model.transform(data_tensor),
            rtol=1e-06,
            atol=1e-06,
        )

    def test_missing_indicator_float_inputs(self):
        for features in ["all", "missing-only"]:
            model = MissingIndicator(features=features)
            data = np.array([[1, 2], [np.nan, 3], [7, 6]], dtype=np.float32)
            model.fit(data)

            for backend in ["torch", "torch.jit"]:
                self._test_sklearn_missing_indic(model, data, backend)

    def test_missing_indicator_float_inputs_isnan_false(self):
        for features in ["all", "missing-only"]:
            model = MissingIndicator(features=features, missing_values=0)
            data = np.array([[1, 2], [0, 3], [7, 6]], dtype=np.float32)
            model.fit(data)

            for backend in ["torch", "torch.jit"]:
                self._test_sklearn_missing_indic(model, data, backend)

    # TVM tests
    @unittest.skipIf(not (tvm_installed()), reason="TVM test requires TVM")
    def test_missing_indicator_float_inputs_tvm(self):
        for features in ["all", "missing-only"]:
            model = MissingIndicator(features=features)
            data = np.array([[1, 2], [np.nan, 3], [7, 6]], dtype=np.float32)
            model.fit(data)

            self._test_sklearn_missing_indic(model, data, "tvm")

    @unittest.skipIf(not (tvm_installed()), reason="TVM test requires TVM")
    def test_missing_indicator_float_inputs_isnan_false_tvm(self):
        for features in ["all", "missing-only"]:
            model = MissingIndicator(features=features, missing_values=0)
            data = np.array([[1, 2], [0, 3], [7, 6]], dtype=np.float32)
            model.fit(data)

            self._test_sklearn_missing_indic(model, data, "tvm")
    def __init__(self, model, n_threads=None, batch_size=None, extra_config={}):
        super(TVMSklearnContainer, self).__init__(model, n_threads, batch_size, extra_config=extra_config)

        assert tvm_installed(), "TVM Container requires TVM installed."

        self._ctx = self._extra_config[constants.TVM_CONTEXT]
        self._to_tvm_array = lambda x: tvm.nd.array(x, self._ctx)
        self._input_names = self._extra_config[constants.TVM_INPUT_NAMES]
        self._tvm_tensors = {name: self._to_tvm_array(np.array([])) for name in self._input_names}
        self._pad_input = (
            self._extra_config[constants.TVM_PAD_INPUT] if constants.TVM_PAD_INPUT in self._extra_config else False
        )

        os.environ["TVM_NUM_THREADS"] = str(self._n_threads)
示例#4
0
    def __init__(self, model, n_threads=None, batch_size=None, extra_config={}):
        super(TVMSklearnContainer, self).__init__(model, n_threads, batch_size, extra_config=extra_config)

        assert tvm_installed()
        import tvm

        self._ctx = self._extra_config[constants.TVM_CONTEXT]
        self._input_names = self._extra_config[constants.TVM_INPUT_NAMES]
        self._remainder_model = None
        if constants.TVM_REMAINDER_MODEL in self._extra_config:
            self._remainder_model = self._extra_config[constants.TVM_REMAINDER_MODEL]
        self._to_tvm_array = lambda x: tvm.nd.array(x, self._ctx)

        os.environ["TVM_NUM_THREADS"] = str(self._n_threads)
class TestLGBMConverter(unittest.TestCase):
    # Check tree implementation
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_implementation(self):
        warnings.filterwarnings("ignore")
        np.random.seed(0)
        X = np.random.rand(10, 1)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=10)

        for model in [
                lgb.LGBMClassifier(n_estimators=1, max_depth=1),
                lgb.LGBMRegressor(n_estimators=1, max_depth=1)
        ]:
            for extra_config_param in ["tree_trav", "perf_tree_trav", "gemm"]:
                model.fit(X, y)

                torch_model = hummingbird.ml.convert(
                    model,
                    "torch",
                    extra_config={"tree_implementation": extra_config_param})
                self.assertIsNotNone(torch_model)
                self.assertEqual(
                    str(type(list(torch_model.model._operators)[0])),
                    gbdt_implementation_map[extra_config_param])

    def _run_lgbm_classifier_converter(self, num_classes, extra_config={}):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12, None]:
            model = lgb.LGBMClassifier(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict_proba(X),
                                       torch_model.predict_proba(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Binary classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_binary_classifier_converter(self):
        self._run_lgbm_classifier_converter(2)

    # Gemm classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_gemm_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            2, extra_config={"tree_implementation": "gemm"})

    # Tree_trav classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_tree_trav_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            2, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_perf_tree_trav_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            2, extra_config={"tree_implementation": "perf_tree_trav"})

    # Multi classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_multi_classifier_converter(self):
        self._run_lgbm_classifier_converter(3)

    # Gemm multi classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_gemm_multi_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            3, extra_config={"tree_implementation": "gemm"})

    # Tree_trav multi classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_tree_trav_multi_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            3, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav multi classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_perf_tree_trav_multi_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            3, extra_config={"tree_implementation": "perf_tree_trav"})

    def _run_lgbm_ranker_converter(self,
                                   num_classes,
                                   extra_config={},
                                   label_gain=None):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12, None]:
            model = lgb.LGBMRanker(n_estimators=10,
                                   max_depth=max_depth,
                                   label_gain=label_gain)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(num_classes, size=100)

            model.fit(X,
                      y,
                      group=[X.shape[0]],
                      eval_set=[(X, y)],
                      eval_group=[X.shape[0]])

            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X),
                                       torch_model.predict(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Ranker - small, no label gain
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_binary_ranker_converter_no_label(self):
        self._run_lgbm_ranker_converter(30)

    # Ranker
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_binary_ranker_converter(self):
        self._run_lgbm_ranker_converter(1000, label_gain=list(range(1000)))

    # Gemm ranker
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_gemm_ranker_converter(self):
        self._run_lgbm_ranker_converter(
            1000,
            extra_config={"tree_implementation": "gemm"},
            label_gain=list(range(1000)))

    # Tree_trav ranker
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_tree_trav_ranker_converter(self):
        self._run_lgbm_ranker_converter(
            1000,
            extra_config={"tree_implementation": "tree_trav"},
            label_gain=list(range(1000)))

    # Perf_tree_trav ranker
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_perf_tree_trav_ranker_converter(self):
        self._run_lgbm_ranker_converter(
            1000,
            extra_config={"tree_implementation": "perf_tree_trav"},
            label_gain=list(range(1000)))

    def _run_lgbm_regressor_converter(self, num_classes, extra_config={}):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12, None]:
            model = lgb.LGBMRegressor(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X),
                                       torch_model.predict(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Regressor
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_binary_regressor_converter(self):
        self._run_lgbm_regressor_converter(1000)

    # Gemm regressor
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_gemm_regressor_converter(self):
        self._run_lgbm_regressor_converter(
            1000, extra_config={"tree_implementation": "gemm"})

    # Tree_trav regressor
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_tree_trav_regressor_converter(self):
        self._run_lgbm_regressor_converter(
            1000, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav regressor
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_perf_tree_trav_regressor_converter(self):
        self._run_lgbm_regressor_converter(
            1000, extra_config={"tree_implementation": "perf_tree_trav"})

    # Float 64 classification test helper
    def _run_float64_lgbm_classifier_converter(self,
                                               num_classes,
                                               extra_config={}):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12, None]:
            model = lgb.LGBMClassifier(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict_proba(X),
                                       torch_model.predict_proba(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Gemm classifier (float64 data)
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_float64_lgbm_gemm_classifier_converter(self):
        self._run_float64_lgbm_classifier_converter(
            2, extra_config={"tree_implementation": "gemm"})

    # Tree_trav classifier (float64 data)
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_float64_lgbm_tree_trav_classifier_converter(self):
        self._run_float64_lgbm_classifier_converter(
            2, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav classifier (float64 data)
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_float64_lgbm_perf_tree_trav_classifier_converter(self):
        self._run_float64_lgbm_classifier_converter(
            2, extra_config={"tree_implementation": "perf_tree_trav"})

    # Float 64 regression test helper
    def _run_float64_lgbm_regressor_converter(self,
                                              num_classes,
                                              extra_config={}):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12, None]:
            model = lgb.LGBMRegressor(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X),
                                       torch_model.predict(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Gemm regressor (float64 data)
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_float64_lgbm_gemm_regressor_converter(self):
        self._run_float64_lgbm_regressor_converter(
            1000, extra_config={"tree_implementation": "gemm"})

    # Tree_trav regressor (float64 data)
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_float64_lgbm_tree_trav_regressor_converter(self):
        self._run_float64_lgbm_regressor_converter(
            1000, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav regressor (float64 data)
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_float64_lgbm_perf_tree_trav_regressor_converter(self):
        self._run_float64_lgbm_regressor_converter(
            1000, extra_config={"tree_implementation": "perf_tree_trav"})

    # Random forest in lgbm, the conversion fails with the latest
    # version of lightgbm. The direct converter to pytorch should be
    # updated or the model could be converted into ONNX then
    # converted into pytorch.
    # For more details, see ONNX converter at https://github.com/onnx/
    # onnxmltools/blob/master/onnxmltools/convert/lightgbm/
    # operator_converters/LightGbm.py#L313.
    @unittest.skipIf(
        True, reason="boosting_type=='rf' produces different probabilites.")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_classifier_random_forest_rf(self):
        warnings.filterwarnings("ignore")

        model = lgb.LGBMClassifier(boosting_type="rf",
                                   n_estimators=128,
                                   max_depth=5,
                                   subsample=0.3,
                                   bagging_freq=1)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=100)

        model.fit(X, y)

        torch_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(torch_model)
        np.testing.assert_allclose(model.predict_proba(X),
                                   torch_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

    # Random forest in lgbm
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_classifier_random_forest_gbdt(self):
        warnings.filterwarnings("ignore")

        model = lgb.LGBMClassifier(boosting_type="gbdt",
                                   n_estimators=128,
                                   max_depth=5,
                                   subsample=0.3,
                                   bagging_freq=1)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=100)

        model.fit(X, y)

        torch_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(torch_model)
        np.testing.assert_allclose(model.predict_proba(X),
                                   torch_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

    # Test Tweedie loss in lgbm
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_tweedie(self):
        warnings.filterwarnings("ignore")
        model = lgb.LGBMRegressor(objective="tweedie",
                                  n_estimators=2,
                                  max_depth=5)

        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(100, size=100)

        model.fit(X, y)

        torch_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(torch_model)
        np.testing.assert_allclose(model.predict(X),
                                   torch_model.predict(X),
                                   rtol=1e-06,
                                   atol=1e-06)

    # Backend tests.
    # Test TorchScript backend regression.
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_regressor_converter_torchscript(self):
        warnings.filterwarnings("ignore")

        for max_depth in [1, 3, 8, 10, 12]:
            model = lgb.LGBMRegressor(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(1000, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model,
                                                 "torchscript",
                                                 X,
                                                 extra_config={})
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X),
                                       torch_model.predict(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Test TorchScript backend classification.
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_classifier_converter_torchscript(self):
        warnings.filterwarnings("ignore")

        for max_depth in [1, 3, 8, 10, 12]:
            model = lgb.LGBMClassifier(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(2, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model,
                                                 "torchscript",
                                                 X,
                                                 extra_config={})
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict_proba(X),
                                       torch_model.predict_proba(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Check that we can export into ONNX.
    @unittest.skipIf(not onnx_runtime_installed(),
                     reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_onnx(self):
        warnings.filterwarnings("ignore")

        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)

        # Create ONNX model
        onnx_model = hummingbird.ml.convert(model, "onnx", X)

        np.testing.assert_allclose(
            onnx_model.predict(X).flatten(), model.predict(X))

    # TVM backend tests.
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_lightgbm_tvm_regressor(self):
        warnings.filterwarnings("ignore")

        for tree_implementation in ["gemm", "tree_trav", "perf_tree_trav"]:
            X = [[0, 1], [1, 1], [2, 0]]
            X = np.array(X, dtype=np.float32)
            y = np.array([100, -10, 50], dtype=np.float32)
            model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
            model.fit(X, y)

            # Create TVM model.
            tvm_model = hummingbird.ml.convert(
                model,
                "tvm",
                X,
                extra_config={"tree_implementation": tree_implementation})

            # Check results.
            np.testing.assert_allclose(tvm_model.predict(X), model.predict(X))

    @unittest.skipIf(not (tvm_installed()),
                     reason="TVM tests require TVM installed")
    def test_lightgbm_tvm_classifier(self):
        warnings.filterwarnings("ignore")

        for tree_implementation in ["gemm", "tree_trav", "perf_tree_trav"]:
            X = [[0, 1], [1, 1], [2, 0]]
            X = np.array(X, dtype=np.float32)
            y = np.array([0, 1, 0], dtype=np.float32)
            model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1)
            model.fit(X, y)

            # Create TVM model.
            tvm_model = hummingbird.ml.convert(
                model,
                "tvm",
                X,
                extra_config={"tree_implementation": tree_implementation})

            # Check results.
            np.testing.assert_allclose(tvm_model.predict(X), model.predict(X))
            np.testing.assert_allclose(tvm_model.predict_proba(X),
                                       model.predict_proba(X))

    # Test TVM with large input datasets.
    @unittest.skipIf(not (tvm_installed()),
                     reason="TVM tests require TVM installed")
    def test_lightgbm_tvm_classifier_large_dataset(self):
        warnings.filterwarnings("ignore")

        for tree_implementation in ["gemm", "tree_trav", "perf_tree_trav"]:
            size = 200000
            X = np.random.rand(size, 28)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(2, size=size)
            model = lgb.LGBMClassifier(n_estimators=100, max_depth=3)
            model.fit(X, y)

            # Create TVM model.
            tvm_model = hummingbird.ml.convert(
                model,
                "tvm",
                X,
                extra_config={
                    constants.TREE_IMPLEMENTATION: tree_implementation,
                    constants.TREE_OP_PRECISION_DTYPE: "float64"
                })

            # Check results.
            np.testing.assert_allclose(tvm_model.predict_proba(X),
                                       model.predict_proba(X),
                                       rtol=1e-04,
                                       atol=1e-04)
示例#6
0
class TestSklearnMLPClassifier(unittest.TestCase):

    # MLPClassifier test function to be parameterized
    def _test_mlp_classifer(self,
                            num_classes,
                            activation="relu",
                            labels_shift=0,
                            backend="torch",
                            extra_config={}):
        model = MLPClassifier(hidden_layer_sizes=(
            100,
            100,
            50,
        ),
                              activation=activation)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100) + labels_shift

        model.fit(X, y)
        torch_model = hummingbird.ml.convert(model,
                                             backend,
                                             X,
                                             extra_config=extra_config)
        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict_proba(X),
                                   torch_model.predict_proba(X),
                                   rtol=1e-6,
                                   atol=1e-6)

    # MLPClassifier binary
    def test_mlp_classifer_bi(self):
        self._test_mlp_classifer(2)

    # MLPClassifier multi-class
    def test_mlp_classifer_multi(self):
        self._test_mlp_classifer(3)

    # MLPClassifier multi-class w/ shifted labels
    def test_mlp_classifer_multi_shifted_labels(self):
        self._test_mlp_classifer(3, labels_shift=3)

    #  MLPClassifier multi-class w/ tanh activation
    def test_mlp_classifer_multi_logistic(self):
        self._test_mlp_classifer(3, activation="tanh")

    #  MLPClassifier multi-class w/ logistic activation
    def test_mlp_classifer_multi_tanh(self):
        self._test_mlp_classifer(3, activation="logistic")

    #  MLPClassifier multi-class w/ identity activation
    def test_mlp_classifer_multi_identity(self):
        self._test_mlp_classifer(3, activation="identity")

    # Test TVM backend
    # MLPClassifier binary
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_mlp_classifer_bi_tvm(self):
        self._test_mlp_classifer(
            2, backend="tvm", extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})

    # MLPClassifier multi-class
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_mlp_classifer_multi_tvm(self):
        self._test_mlp_classifer(
            3, backend="tvm", extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})

    # MLPClassifier multi-class w/ shifted labels
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_mlp_classifer_multi_shifted_labels_tvm(self):
        self._test_mlp_classifer(
            3,
            labels_shift=3,
            backend="tvm",
            extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})

    #  MLPClassifier multi-class w/ tanh activation
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_mlp_classifer_multi_logistic_tvm(self):
        self._test_mlp_classifer(
            3,
            activation="tanh",
            backend="tvm",
            extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})

    #  MLPClassifier multi-class w/ logistic activation
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_mlp_classifer_multi_tanh_tvm(self):
        self._test_mlp_classifer(
            3,
            activation="logistic",
            backend="tvm",
            extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})

    #  MLPClassifier multi-class w/ identity activation
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_mlp_classifer_multi_identity_tvm(self):
        self._test_mlp_classifer(
            3,
            activation="identity",
            backend="tvm",
            extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})

    # MLPRegressor test function to be parameterized
    def _test_mlp_regressor(self, activation="relu"):
        model = MLPRegressor(hidden_layer_sizes=(
            100,
            100,
            50,
        ),
                             activation=activation)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.rand(100)

        model.fit(X, y)
        torch_model = hummingbird.ml.convert(model, "torch")
        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict(X),
                                   torch_model.predict(X),
                                   rtol=1e-6,
                                   atol=1e-6)

    # MLPRegressor
    def test_mlp_regressor(self):
        self._test_mlp_regressor()

    #  MLPRegressor w/ tanh activation
    def test_mlp_regressor_multi_logistic(self):
        self._test_mlp_regressor(activation="tanh")

    #  MLPRegressor w/ logistic activation
    def test_mlp_regressor_multi_tanh(self):
        self._test_mlp_regressor(activation="logistic")

    #  MLPRegressor multi-class w/ identity activation
    def test_mlp_regressor_multi_identity(self):
        self._test_mlp_regressor(activation="identity")
class TestXGBoostConverter(unittest.TestCase):
    # Check tree implementation
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_xgb_implementation(self):
        warnings.filterwarnings("ignore")
        np.random.seed(0)
        X = np.random.rand(1, 1)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=1)

        for model in [xgb.XGBClassifier(n_estimators=1, max_depth=1), xgb.XGBRegressor(n_estimators=1, max_depth=1)]:
            for extra_config_param in ["tree_trav", "perf_tree_trav", "gemm"]:
                model.fit(X, y)

                torch_model = hummingbird.ml.convert(
                    model, "torch", X[0:1], extra_config={"tree_implementation": extra_config_param}
                )
                self.assertIsNotNone(torch_model)
                self.assertEqual(str(type(list(torch_model.model._operators)[0])), gbdt_implementation_map[extra_config_param])

    def _run_xgb_classifier_converter(self, num_classes, extra_config={}):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12]:
            model = xgb.XGBClassifier(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model, "torch", [], extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict_proba(X), torch_model.predict_proba(X), rtol=1e-06, atol=1e-06)

    # Binary classifier
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_xgb_binary_classifier_converter(self):
        self._run_xgb_classifier_converter(2)

    # Gemm classifier
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_xgb_gemm_classifier_converter(self):
        self._run_xgb_classifier_converter(2, extra_config={"tree_implementation": "gemm"})

    # Tree_trav classifier
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_xgb_tree_trav_classifier_converter(self):
        self._run_xgb_classifier_converter(2, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav classifier
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_xgb_perf_tree_trav_classifier_converter(self):
        self._run_xgb_classifier_converter(2, extra_config={"tree_implementation": "perf_tree_trav"})

    # Multi classifier
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_xgb_multi_classifier_converter(self):
        self._run_xgb_classifier_converter(3)

    # Gemm multi classifier
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_xgb_gemm_multi_classifier_converter(self):
        self._run_xgb_classifier_converter(3, extra_config={"tree_implementation": "gemm"})

    # Tree_trav multi classifier
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_xgb_tree_trav_multi_classifier_converter(self):
        self._run_xgb_classifier_converter(3, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav multi classifier
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_xgb_perf_tree_trav_multi_classifier_converter(self):
        self._run_xgb_classifier_converter(3, extra_config={"tree_implementation": "perf_tree_trav"})

    def _run_xgb_ranker_converter(self, num_classes, extra_config={}):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12]:
            model = xgb.XGBRanker(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y, group=[X.shape[0]])

            torch_model = hummingbird.ml.convert(model, "torch", X, extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-06, atol=1e-06)

    # Ranker
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_xgb_binary_ranker_converter(self):
        self._run_xgb_ranker_converter(1000)

    # Gemm ranker
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_xgb_gemm_ranker_converter(self):
        self._run_xgb_ranker_converter(1000, extra_config={"tree_implementation": "gemm"})

    # Tree_trav ranker
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_xgb_tree_trav_ranker_converter(self):
        self._run_xgb_ranker_converter(1000, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav ranker
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_xgb_perf_tree_trav_ranker_converter(self):
        self._run_xgb_ranker_converter(1000, extra_config={"tree_implementation": "perf_tree_trav"})

    def _run_xgb_regressor_converter(self, num_classes, extra_config={}):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12]:
            model = xgb.XGBRegressor(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)
            torch_model = hummingbird.ml.convert(model, "torch", X, extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-06, atol=1e-06)

    # Regressor
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_xgb_binary_regressor_converter(self):
        self._run_xgb_regressor_converter(1000)

    # Gemm regressor
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_xgb_gemm_regressor_converter(self):
        self._run_xgb_regressor_converter(1000, extra_config={"tree_implementation": "gemm"})

    # Tree_trav regressor
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_xgb_tree_trav_regressor_converter(self):
        self._run_xgb_regressor_converter(1000, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav regressor
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_xgb_perf_tree_trav_regressor_converter(self):
        self._run_xgb_regressor_converter(1000, extra_config={"tree_implementation": "perf_tree_trav"})

    # Float 64 data tests
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_float64_xgb_classifier_converter(self):
        warnings.filterwarnings("ignore")
        num_classes = 3
        for max_depth in [1, 3, 8, 10, 12]:
            model = xgb.XGBClassifier(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model, "torch", [])
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict_proba(X), torch_model.predict_proba(X), rtol=1e-06, atol=1e-06)

    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_float64_xgb_ranker_converter(self):
        warnings.filterwarnings("ignore")
        num_classes = 3
        for max_depth in [1, 3, 8, 10, 12]:
            model = xgb.XGBRanker(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y, group=[X.shape[0]])

            torch_model = hummingbird.ml.convert(model, "torch", X[0:1])
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-06, atol=1e-06)

    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_float64_xgb_regressor_converter(self):
        warnings.filterwarnings("ignore")
        num_classes = 3
        for max_depth in [1, 3, 8, 10, 12]:
            model = xgb.XGBRegressor(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)
            torch_model = hummingbird.ml.convert(model, "torch", X[0:1])
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-06, atol=1e-06)

    # Small tree.
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_run_xgb_classifier_converter(self):
        warnings.filterwarnings("ignore")
        for extra_config_param in ["tree_trav", "perf_tree_trav", "gemm"]:
            model = xgb.XGBClassifier(n_estimators=1, max_depth=1)
            np.random.seed(0)
            X = np.random.rand(1, 1)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(2, size=1)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model, "torch", [], extra_config={"tree_implementation": extra_config_param})
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict_proba(X), torch_model.predict_proba(X), rtol=1e-06, atol=1e-06)

    # Missing values test.
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_run_xgb_classifier_w_missing_vals_converter(self):
        warnings.filterwarnings("ignore")
        for extra_config_param in ["gemm", "tree_trav", "perf_tree_trav"]:
            for missing in [None, -99999, np.nan]:
                for model_class, n_classes in zip([xgb.XGBClassifier, xgb.XGBClassifier, xgb.XGBRegressor], [2, 3, None]):
                    model = model_class(missing=missing)
                    # Missing values during both training and inference.
                    if model_class == xgb.XGBClassifier:
                        X, y = make_classification(n_samples=100, n_features=3, n_informative=3, n_redundant=0, n_repeated=0, n_classes=n_classes, random_state=2021)
                    else:
                        X, y = make_regression(n_samples=100, n_features=3, n_informative=3, random_state=2021)
                    X[:25][y[:25] == 0, 0] = np.nan if missing is None else missing
                    model.fit(X, y)
                    torch_model = hummingbird.ml.convert(model, "torch", X, extra_config={"tree_implementation": extra_config_param})
                    self.assertIsNotNone(torch_model)
                    if model_class == xgb.XGBClassifier:
                        np.testing.assert_allclose(model.predict_proba(X), torch_model.predict_proba(X), rtol=1e-06, atol=1e-06)
                    else:
                        np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-06, atol=1e-06)

                    # Missing values during only inference.
                    model = model_class(missing=missing)
                    if model_class == xgb.XGBClassifier:
                        X, y = make_classification(n_samples=100, n_features=3, n_informative=3, n_redundant=0, n_repeated=0, n_classes=n_classes, random_state=2021)
                    else:
                        X, y = make_regression(n_samples=100, n_features=3, n_informative=3, random_state=2021)
                    model.fit(X, y)
                    X[:25][y[:25] == 0, 0] = np.nan if missing is None else missing
                    torch_model = hummingbird.ml.convert(model, "torch", X, extra_config={"tree_implementation": extra_config_param})
                    self.assertIsNotNone(torch_model)
                    if model_class == xgb.XGBClassifier:
                        np.testing.assert_allclose(model.predict_proba(X), torch_model.predict_proba(X), rtol=1e-06, atol=1e-06)
                    else:
                        np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-06, atol=1e-06)

    # Torchscript backends.
    # Test TorchScript backend regression.
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_xgb_regressor_converter_torchscript(self):
        warnings.filterwarnings("ignore")
        import torch

        for max_depth in [1, 3, 8, 10, 12]:
            model = xgb.XGBRegressor(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(1000, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model, "torchscript", X)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test TorchScript backend classification.
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    def test_xgb_classifier_converter_torchscript(self):
        warnings.filterwarnings("ignore")
        import torch

        for max_depth in [1, 3, 8, 10, 12]:
            model = xgb.XGBClassifier(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(2, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model, "torchscript", X)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict_proba(X), torch_model.predict_proba(X), rtol=1e-06, atol=1e-06)

    # TVM backend tests.
    # TVM backend regression.
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed")
    def test_xgb_regressor_converter_tvm(self):
        warnings.filterwarnings("ignore")
        import torch

        for max_depth in [1, 3, 8, 10, 12]:
            model = xgb.XGBRegressor(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(1000, size=100)

            model.fit(X, y)

            tvm_model = hummingbird.ml.convert(model, "tvm", X, extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})
            self.assertIsNotNone(tvm_model)
            np.testing.assert_allclose(model.predict(X), tvm_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test TVM backend classification.
    @unittest.skipIf(not xgboost_installed(), reason="XGBoost test requires XGBoost installed")
    @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed")
    def test_xgb_classifier_converter_tvm(self):
        warnings.filterwarnings("ignore")
        import torch

        for max_depth in [1, 3, 8, 10, 12]:
            model = xgb.XGBClassifier(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(2, size=100)

            model.fit(X, y)

            tvm_model = hummingbird.ml.convert(model, "tvm", X, extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})
            self.assertIsNotNone(tvm_model)
            np.testing.assert_allclose(model.predict_proba(X), tvm_model.predict_proba(X), rtol=1e-06, atol=1e-06)
示例#8
0
def convert(topology, backend, test_input, device, extra_config={}):
    """
    This function is used to convert a `onnxconverter_common.topology.Topology` object into a *backend* model.

    Args:
        topology: The `onnxconverter_common.topology.Topology` object that will be converted into a backend model
        backend: Which backend the model should be run on
        test_input: Inputs for PyTorch model tracing
        device: Which device the translated model will be run on
        extra_config: Extra configurations to be used by individual operator converters

    Returns:
        A model implemented in the selected backend
    """
    assert topology is not None, "Cannot convert a Topology object of type None."
    assert backend is not None, "Cannot convert a Topology object into backend None."
    assert device is not None, "Cannot convert a Topology object into device None."

    tvm_backend = None
    operator_map = {}

    if tvm_installed():
        import tvm

        tvm_backend = tvm.__name__

    for operator in topology.topological_operator_iterator():
        converter = get_converter(operator.type)
        if convert is None:
            raise MissingConverter(
                "Unable to find converter for {} type {} with extra config: {}."
                .format(operator.type,
                        type(getattr(operator, "raw_model", None)),
                        extra_config))

        if backend == onnx.__name__:
            # vers = LooseVersion(torch.__version__)
            # allowed_min = LooseVersion("1.6.0")
            # Pytorch <= 1.6.0 has a bug with exporting GEMM into ONNX.
            # For the moment only tree_trav is enabled for pytorch <= 1.6.0
            # if vers < allowed_min:
            extra_config[constants.TREE_IMPLEMENTATION] = "tree_trav"
        operator_map[operator.full_name] = converter(operator, device,
                                                     extra_config)

    # Set the parameters for the model / container
    n_threads = None if constants.N_THREADS not in extra_config else extra_config[
        constants.N_THREADS]

    # We set the number of threads for torch here to avoid errors in case we JIT.
    # We set intra op concurrency while we force operators to run sequentially.
    # We can revise this later, but in general we don't have graphs requireing inter-op parallelism.
    if n_threads is not None:
        if torch.get_num_interop_threads() != 1:
            torch.set_num_interop_threads(1)
        torch.set_num_threads(n_threads)

    operators = list(topology.topological_operator_iterator())
    executor = Executor(topology.raw_model.input_names,
                        topology.raw_model.output_names, operator_map,
                        operators, extra_config).eval()

    # if constants.REMAINDER_SIZE is present in extra_config, we are in the convert_batch mode.
    remainder_model = None
    remainder_size = None if constants.REMAINDER_SIZE not in extra_config else extra_config[
        constants.REMAINDER_SIZE]

    if backend == onnx.__name__:
        onnx_model_name = output_model_name = None
        target_opset = 11

        # Set optional configuration options for ONNX if any.
        if constants.ONNX_OUTPUT_MODEL_NAME in extra_config:
            onnx_model_name = extra_config[constants.ONNX_OUTPUT_MODEL_NAME]
            output_model_name = onnx_model_name + ".onnx"
        if constants.ONNX_TARGET_OPSET in extra_config:
            target_opset = extra_config[constants.ONNX_TARGET_OPSET]
        if output_model_name is None:
            output_model_name = str(uuid4().hex) + ".onnx"

        # Put the tracing test input into the right format.
        batch_trace_input, _ = _get_trace_input_from_test_input(
            test_input, remainder_size, extra_config)

        # Generate the ONNX models
        torch.onnx.export(
            executor,
            batch_trace_input,
            output_model_name,
            input_names=topology.raw_model.input_names,
            output_names=topology.raw_model.output_names,
            keep_initializers_as_inputs=False,
            opset_version=target_opset,
            do_constant_folding=True,
        )
        hb_model = onnx.load(output_model_name)
        os.remove(output_model_name)

        # Set the ONNX model name if any.
        if onnx_model_name is not None:
            hb_model.graph.name = onnx_model_name

        # Fix the model to use arbitrary batch dimensions
        def fix_dim(dim):
            updated = False
            if dim.HasField("dim_value"):
                dim.Clear()
                updated = True
                dim.dim_param = "sym"

            return updated

        def fix_value_info(value):
            num_fixed = 0
            if value.type.HasField("tensor_type"):
                shape = value.type.tensor_type.shape
                if shape:
                    dim = shape.dim[0]
                    if fix_dim(dim):
                        num_fixed += 1

            return num_fixed

        def fix_graph(graph):
            num_fixed = 0
            for input in graph.input:
                num_fixed += fix_value_info(input)

            for output in graph.output:
                num_fixed += fix_value_info(output)

            for node in graph.node:
                for attr in node.attribute:
                    if attr.HasField("g"):
                        num_fixed += fix_graph(attr.g)

            return num_fixed

        fix_graph(hb_model.graph)
    elif backend == tvm_backend:
        # Pick the proper target.
        if device == "cuda":
            target = tvm.target.cuda()
            ctx = tvm.gpu()
        elif device == "cpu":
            target = "llvm"
            ctx = tvm.cpu()
        elif "llvm" in device:
            target = device
            ctx = tvm.cpu()
        else:
            raise RuntimeError("Device {} not recognized".format(device))

        # Get configuration parameters.
        # 50 is a good depth for operator fusion. More than that will probably hurt performance.
        # https://github.com/microsoft/hummingbird/issues/232#issuecomment-697979508
        config = {"relay.FuseOps.max_depth": 50}

        if constants.TVM_MAX_FUSE_DEPTH in extra_config:
            config["relay.FuseOps.max_depth"] = extra_config[
                constants.TVM_MAX_FUSE_DEPTH]

        # First we need to generate the torchscript model.
        batch_trace_input, remainder_trace_input = _get_trace_input_from_test_input(
            test_input, remainder_size, extra_config)

        tvm_model = _compile_to_tvm(topology, executor, batch_trace_input,
                                    target, ctx, config, extra_config)

        if remainder_trace_input is not None:
            remainder_model = _compile_to_tvm(topology, executor,
                                              remainder_trace_input, target,
                                              ctx, config, extra_config)

        # In the container we will be using the context to properly configure the input tensors.
        extra_config[constants.TVM_CONTEXT] = ctx
        extra_config[
            constants.TVM_INPUT_NAMES] = topology.raw_model.input_names

        hb_model = tvm_model
    else:
        # Set the device for the model.
        if device != "cpu":
            if backend == torch.__name__ or torch.jit.__name__:
                executor = executor.to(device)

        # If the backend is tochscript, jit the model.
        if backend == torch.jit.__name__:
            trace_input, _ = _get_trace_input_from_test_input(
                test_input, remainder_size, extra_config)
            executor = _jit_trace(executor, trace_input, device, extra_config)
            torch.jit.optimized_execution(executor)

        hb_model = executor

    # Return if the container is not needed.
    if constants.CONTAINER in extra_config and not extra_config[
            constants.CONTAINER]:
        return hb_model

    # We scan the operators backwards until we find an operator with a defined type.
    # This is necessary because ONNX models can have arbitrary operators doing casting, reshaping etc.
    idx = len(operators) - 1
    while (idx >= 0 and not operator_map[operators[idx].full_name].regression
           and not operator_map[operators[idx].full_name].classification
           and not operator_map[operators[idx].full_name].anomaly_detection
           and not operator_map[operators[idx].full_name].transformer):
        idx -= 1

    assert idx >= 0, "Cannot detect container type. Please fill an issue at https://github.com/microsoft/hummingbird."

    # If is a transformer, we need to check whether there is another operator type before.
    # E.g., normalization after classification.
    tmp_idx = idx
    if operator_map[operators[idx].full_name].transformer:
        while (idx >= 0
               and not operator_map[operators[idx].full_name].regression
               and not operator_map[operators[idx].full_name].classification
               and
               not operator_map[operators[idx].full_name].anomaly_detection):
            idx -= 1
        if idx < 0:
            idx = tmp_idx

    # Get the proper container type.
    if operator_map[operators[idx].full_name].regression:
        # We are doing a regression task.
        if backend == torch.jit.__name__:
            container = TorchScriptSklearnContainerRegression
        elif backend == onnx.__name__:
            container = ONNXSklearnContainerRegression
        elif backend == tvm_backend:
            container = TVMSklearnContainerRegression
        else:
            container = PyTorchSklearnContainerRegression
    elif operator_map[operators[idx].full_name].anomaly_detection:
        # We are doing anomaly detection.
        if backend == torch.jit.__name__:
            container = TorchScriptSklearnContainerAnomalyDetection
        elif backend == onnx.__name__:
            container = ONNXSklearnContainerAnomalyDetection
        elif backend == tvm_backend:
            container = TVMSklearnContainerAnomalyDetection
        else:
            container = PyTorchSklearnContainerAnomalyDetection
    elif operator_map[operators[idx].full_name].transformer:
        # We are just transforming the input data.
        if backend == torch.jit.__name__:
            container = TorchScriptSklearnContainerTransformer
        elif backend == onnx.__name__:
            container = ONNXSklearnContainerTransformer
        elif backend == tvm_backend:
            container = TVMSklearnContainerTransformer
        else:
            container = PyTorchSklearnContainerTransformer
    else:
        # We are doing a classification task.
        if backend == torch.jit.__name__:
            container = TorchScriptSklearnContainerClassification
        elif backend == onnx.__name__:
            container = ONNXSklearnContainerClassification
        elif backend == tvm_backend:
            container = TVMSklearnContainerClassification
        else:
            container = PyTorchSklearnContainerClassification

    n_threads = None if constants.N_THREADS not in extra_config else extra_config[
        constants.N_THREADS]
    batch_size = None if constants.TEST_INPUT not in extra_config else _get_batch_size(
        test_input)
    hb_container = container(hb_model,
                             n_threads,
                             batch_size,
                             extra_config=extra_config)

    if remainder_model:
        aux_container = container(remainder_model,
                                  n_threads,
                                  remainder_size,
                                  extra_config=extra_config)
        return BatchContainer(hb_container, aux_container)
    elif remainder_size is not None and remainder_size > 0:
        # remainder_size is non zero but remainder_model is not created
        # -> torch backend case
        aux_container = container(hb_model,
                                  n_threads,
                                  remainder_size,
                                  extra_config=extra_config)
        return BatchContainer(hb_container, aux_container)
    elif remainder_size is not None:
        # remainder_size is not None but remainder_model is not created
        # -> remainder_size must be zero (no need to create remainder_model)
        assert remainder_size == 0, "remainder_size is non zero but no remainder_model has been created"
        # remainder_size is not None only if called by convert_batch(...), so we return BatchContainer
        # for this code path, even though there is no remainder_model created.
        return BatchContainer(hb_container)

    return hb_container
示例#9
0
class TestSklearnNBClassifier(unittest.TestCase):

    # BernoulliNB test function to be parameterized
    def _test_bernoulinb_classifer(self,
                                   num_classes,
                                   alpha=1.0,
                                   binarize=None,
                                   fit_prior=False,
                                   class_prior=None,
                                   labels_shift=0,
                                   backend="torch"):
        model = BernoulliNB(alpha=alpha,
                            binarize=binarize,
                            fit_prior=fit_prior,
                            class_prior=class_prior)
        np.random.seed(0)
        if binarize is None:
            X = np.random.randint(2, size=(100, 200))
        else:
            X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100) + labels_shift

        model.fit(X, y)
        torch_model = hummingbird.ml.convert(model, backend, X)
        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict_proba(X),
                                   torch_model.predict_proba(X),
                                   rtol=1e-6,
                                   atol=1e-5)

    # BernoulliNB binary
    def test_bernoulinb_classifer_bi(self):
        self._test_bernoulinb_classifer(2)

    # BernoulliNB multi-class
    def test_bernoulinb_classifer_multi(self):
        self._test_bernoulinb_classifer(3)

    # BernoulliNB multi-class w/ modified alpha
    def test_bernoulinb_classifer_multi_alpha(self):
        self._test_bernoulinb_classifer(3, alpha=0.5)

    #  BernoulliNB multi-class w/ binarize
    def test_bernoulinb_classifer_multi_binarize(self):
        self._test_bernoulinb_classifer(3, binarize=0.5)

    #  BernoulliNB multi-class w/ fit prior
    def test_bernoulinb_classifer_multi_fit_prior(self):
        self._test_bernoulinb_classifer(3, fit_prior=True)

    #  BernoulliNB multi-class w/ class prior
    def test_bernoulinb_classifer_multi_class_prior(self):
        np.random.seed(0)
        class_prior = np.random.rand(3)
        self._test_bernoulinb_classifer(3, class_prior=class_prior)

    # BernoulliNB multi-class w/ labels shift
    def test_bernoulinb_classifer_multi_labels_shift(self):
        self._test_bernoulinb_classifer(3, labels_shift=3)

    # Test TVM backend
    # BernoulliNB binary
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_bernoulinb_classifer_bi_tvm(self):
        self._test_bernoulinb_classifer(2, backend="tvm")

    # BernoulliNB multi-class
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_bernoulinb_classifer_multi_tvm(self):
        self._test_bernoulinb_classifer(3, backend="tvm")

    # BernoulliNB multi-class w/ modified alpha
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_bernoulinb_classifer_multi_alpha_tvm(self):
        self._test_bernoulinb_classifer(3, alpha=0.5, backend="tvm")

    #  BernoulliNB multi-class w/ binarize
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_bernoulinb_classifer_multi_binarize_tvm(self):
        self._test_bernoulinb_classifer(3, binarize=0.5, backend="tvm")

    #  BernoulliNB multi-class w/ fit prior
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_bernoulinb_classifer_multi_fit_prior_tvm(self):
        self._test_bernoulinb_classifer(3, fit_prior=True, backend="tvm")

    #  BernoulliNB multi-class w/ class prior
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_bernoulinb_classifer_multi_class_prior_tvm(self, backend="tvm"):
        np.random.seed(0)
        class_prior = np.random.rand(3)
        self._test_bernoulinb_classifer(3, class_prior=class_prior)

    # BernoulliNB multi-class w/ labels shift
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_bernoulinb_classifer_multi_labels_shift_tvm(self, backend="tvm"):
        self._test_bernoulinb_classifer(3, labels_shift=3)

    # MultinomialNB test function to be parameterized
    def _test_multinomialnb_classifer(self,
                                      num_classes,
                                      alpha=1.0,
                                      fit_prior=False,
                                      class_prior=None,
                                      labels_shift=0,
                                      backend="torch"):
        model = MultinomialNB(alpha=alpha,
                              fit_prior=fit_prior,
                              class_prior=class_prior)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100) + labels_shift

        model.fit(X, y)
        torch_model = hummingbird.ml.convert(model, backend, X)
        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict_proba(X),
                                   torch_model.predict_proba(X),
                                   rtol=1e-6,
                                   atol=1e-5)

    # MultinomialNB binary
    def test_multinomialnb_classifer_bi(self):
        self._test_multinomialnb_classifer(2)

    # MultinomialNB multi-class
    def test_multinomialnb_classifer_multi(self):
        self._test_multinomialnb_classifer(3)

    # MultinomialNB multi-class w/ modified alpha
    def test_multinomialnb_classifer_multi_alpha(self):
        self._test_multinomialnb_classifer(3, alpha=0.5)

    #  MultinomialNB multi-class w/ fir prior
    def test_multinomialnb_classifer_multi_fit_prior(self):
        self._test_multinomialnb_classifer(3, fit_prior=True)

    #  MultinomialNB multi-class w/ class prior
    def test_multinomialnb_classifer_multi_class_prior(self):
        np.random.seed(0)
        class_prior = np.random.rand(3)
        self._test_multinomialnb_classifer(3, class_prior=class_prior)

    # BernoulliNB multi-class w/ labels shift
    def test_multinomialnb_classifer_multi_labels_shift(self):
        self._test_multinomialnb_classifer(3, labels_shift=3)

    # TVM Backend
    # MultinomialNB binary
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_multinomialnb_classifer_bi_tvm(self):
        self._test_multinomialnb_classifer(2, backend="tvm")

    # MultinomialNB multi-class
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_multinomialnb_classifer_multi_tvm(self):
        self._test_multinomialnb_classifer(3, backend="tvm")

    # MultinomialNB multi-class w/ modified alpha
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_multinomialnb_classifer_multi_alpha_tvm(self):
        self._test_multinomialnb_classifer(3, alpha=0.5, backend="tvm")

    #  MultinomialNB multi-class w/ fir prior
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_multinomialnb_classifer_multi_fit_prior_tvm(self):
        self._test_multinomialnb_classifer(3, fit_prior=True, backend="tvm")

    #  MultinomialNB multi-class w/ class prior
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_multinomialnb_classifer_multi_class_prior_tvm(self):
        np.random.seed(0)
        class_prior = np.random.rand(3)
        self._test_multinomialnb_classifer(3,
                                           class_prior=class_prior,
                                           backend="tvm")

    # BernoulliNB multi-class w/ labels shift
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_multinomialnb_classifer_multi_labels_shift_tvm(self):
        self._test_multinomialnb_classifer(3, labels_shift=3, backend="tvm")

    # GaussianNB test function to be parameterized
    def _test_gaussiannb_classifer(self,
                                   num_classes,
                                   priors=None,
                                   var_smoothing=1e-9,
                                   labels_shift=0,
                                   backend="torch"):
        model = GaussianNB(priors=priors, var_smoothing=var_smoothing)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100) + labels_shift

        model.fit(X, y)
        torch_model = hummingbird.ml.convert(model, backend, X)
        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict_proba(X),
                                   torch_model.predict_proba(X),
                                   rtol=1e-5,
                                   atol=1e-5)

    # GaussianNB binary
    def test_gaussiannb_classifer_bi(self):
        self._test_gaussiannb_classifer(2)

    # GaussianNB multi-class
    def test_gaussiannb_classifer_multi(self):
        self._test_gaussiannb_classifer(3)

    #  GaussianNB multi-class w/ class prior
    def test_gaussiannb_classifer_multi_class_prior(self):
        np.random.seed(0)
        priors = np.random.rand(3)
        priors = priors / np.sum(priors)
        self._test_gaussiannb_classifer(3, priors=priors)

    # GaussianNB multi-class w/ modified var_smoothing
    def test_gaussiannb_classifer_multi_alpha(self):
        self._test_gaussiannb_classifer(3, var_smoothing=1e-2)

    # GaussianNB multi-class w/ labels shift
    def test_gaussiannb_classifer_multi_labels_shift(self):
        self._test_gaussiannb_classifer(3, labels_shift=3)

    # TVM Backend
    # GaussianNB binary
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_gaussiannb_classifer_bi_tvm(self):
        self._test_gaussiannb_classifer(2, backend="tvm")

    # GaussianNB multi-class
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_gaussiannb_classifer_multi_tvm(self):
        self._test_gaussiannb_classifer(3, backend="tvm")

    #  GaussianNB multi-class w/ class prior
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_gaussiannb_classifer_multi_class_prior_tvm(self):
        np.random.seed(0)
        priors = np.random.rand(3)
        priors = priors / np.sum(priors)
        self._test_gaussiannb_classifer(3, priors=priors, backend="tvm")

    # GaussianNB multi-class w/ modified var_smoothing
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_gaussiannb_classifer_multi_alpha_tvm(self):
        self._test_gaussiannb_classifer(3, var_smoothing=1e-2, backend="tvm")

    # GaussianNB multi-class w/ labels shift
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_gaussiannb_classifer_multi_labels_shift_tvm(self):
        self._test_gaussiannb_classifer(3, labels_shift=3, backend="tvm")
class TestIsolationForestConverter(unittest.TestCase):
    # Check tree implementation
    def test_iforest_implementation(self):
        warnings.filterwarnings("ignore")
        np.random.seed(0)
        X = np.random.rand(10, 1)
        X = np.array(X, dtype=np.float32)
        model = IsolationForest(n_estimators=1, max_samples=2)
        for extra_config_param in ["tree_trav", "perf_tree_trav", "gemm"]:
            model.fit(X)
            torch_model = hummingbird.ml.convert(
                model,
                "torch",
                extra_config={"tree_implementation": extra_config_param})
            self.assertIsNotNone(torch_model)
            self.assertEqual(
                str(type(list(torch_model.model._operator_map.values())[0])),
                iforest_implementation_map[extra_config_param])

    def _run_isolation_forest_converter(self, extra_config={}):
        warnings.filterwarnings("ignore")
        for max_samples in [2**1, 2**3, 2**8, 2**10, 2**12]:
            model = IsolationForest(n_estimators=10, max_samples=max_samples)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            model.fit(X)
            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.decision_function(X),
                                       torch_model.decision_function(X),
                                       rtol=1e-06,
                                       atol=1e-06)
            np.testing.assert_allclose(model.score_samples(X),
                                       torch_model.score_samples(X),
                                       rtol=1e-06,
                                       atol=1e-06)
            np.testing.assert_array_equal(model.predict(X),
                                          torch_model.predict(X))

    # Isolation Forest
    def test_isolation_forest_converter(self):
        self._run_isolation_forest_converter()

    # Gemm Isolation Forest
    def test_isolation_forest_gemm_converter(self):
        self._run_isolation_forest_converter(
            extra_config={"tree_implementation": "gemm"})

    # Tree_trav Isolation Forest
    def test_isolation_forest_tree_trav_converter(self):
        self._run_isolation_forest_converter(
            extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav Isolation Forest
    def test_isolation_forest_perf_tree_trav_converter(self):
        self._run_isolation_forest_converter(
            extra_config={"tree_implementation": "perf_tree_trav"})

    # Float 64 data tests
    def test_float64_isolation_forest_converter(self):
        warnings.filterwarnings("ignore")
        for max_samples in [2**1, 2**3, 2**8, 2**10, 2**12]:
            model = IsolationForest(n_estimators=10, max_samples=max_samples)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            model.fit(X)
            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config={})
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.decision_function(X),
                                       torch_model.decision_function(X),
                                       rtol=1e-06,
                                       atol=1e-06)
            np.testing.assert_allclose(model.score_samples(X),
                                       torch_model.score_samples(X),
                                       rtol=1e-06,
                                       atol=1e-06)
            np.testing.assert_array_equal(model.predict(X),
                                          torch_model.predict(X))

    # Test TorchScript backend.
    def test_isolation_forest_ts_converter(self):
        warnings.filterwarnings("ignore")
        for max_samples in [2**1, 2**3, 2**8, 2**10, 2**12]:
            model = IsolationForest(n_estimators=10, max_samples=max_samples)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            model.fit(X)
            torch_model = hummingbird.ml.convert(model,
                                                 "torch.jit",
                                                 X,
                                                 extra_config={})
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.decision_function(X),
                                       torch_model.decision_function(X),
                                       rtol=1e-06,
                                       atol=1e-06)
            np.testing.assert_allclose(model.score_samples(X),
                                       torch_model.score_samples(X),
                                       rtol=1e-06,
                                       atol=1e-06)
            np.testing.assert_array_equal(model.predict(X),
                                          torch_model.predict(X))

    # Test ONNX backend.
    @unittest.skipIf(not (onnx_runtime_installed()),
                     reason="ONNX tests require ORT")
    def test_isolation_forest_onnx_converter(self):
        warnings.filterwarnings("ignore")
        for max_samples in [2**1, 2**3, 2**8, 2**10, 2**12]:
            model = IsolationForest(n_estimators=10, max_samples=max_samples)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            model.fit(X)
            onnx_model = hummingbird.ml.convert(model,
                                                "onnx",
                                                X,
                                                extra_config={})
            self.assertIsNotNone(onnx_model)
            np.testing.assert_allclose(model.decision_function(X),
                                       onnx_model.decision_function(X),
                                       rtol=1e-06,
                                       atol=1e-06)
            np.testing.assert_allclose(model.score_samples(X),
                                       onnx_model.score_samples(X),
                                       rtol=1e-06,
                                       atol=1e-06)
            np.testing.assert_array_equal(model.predict(X),
                                          onnx_model.predict(X))

    # Test TVM backend.
    @unittest.skipIf(not (tvm_installed()), reason="TVM test requires TVM")
    def test_isolation_forest_tvm_converter(self):
        warnings.filterwarnings("ignore")
        for max_samples in [2**1, 2**3, 2**8, 2**10, 2**12]:
            model = IsolationForest(n_estimators=10, max_samples=max_samples)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            model.fit(X)
            hb_model = hummingbird.ml.convert(
                model,
                "tvm",
                X,
                extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})

            self.assertIsNotNone(hb_model)
            np.testing.assert_allclose(model.decision_function(X),
                                       hb_model.decision_function(X),
                                       rtol=1e-06,
                                       atol=1e-06)
            np.testing.assert_allclose(model.score_samples(X),
                                       hb_model.score_samples(X),
                                       rtol=1e-06,
                                       atol=1e-06)
            np.testing.assert_array_equal(model.predict(X),
                                          hb_model.predict(X))
class TestSklearnSVC(unittest.TestCase):
    def _test_linear_svc(self, num_classes, labels_shift=0):
        model = LinearSVC()
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100) + labels_shift

        model.fit(X, y)
        torch_model = hummingbird.ml.convert(model, "torch")
        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict(X),
                                   torch_model.predict(X),
                                   rtol=1e-6,
                                   atol=1e-6)

    # LinearSVC binary
    def test_linear_svc_bi(self):
        self._test_linear_svc(2)

    # LinearSVC multiclass
    def test_linear_svc_multi(self):
        self._test_linear_svc(3)

    # LinearSVC with class labels shifted
    def test_linear_svc_shifted(self):
        self._test_linear_svc(3, labels_shift=2)

    # RidgeCV test function to be parameterized
    def _test_svr(self, y_input):
        model = LinearSVR()

        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = y_input

        model.fit(X, y)

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict(X),
                                   torch_model.predict(X),
                                   rtol=1e-6,
                                   atol=1e-6)

    # RidgeCV with ints
    def test_svr_int(self):
        np.random.seed(0)
        self._test_svr(np.random.randint(2, size=100))

    # RidgeCV with floats
    def test_svr_float(self):
        np.random.seed(0)
        self._test_svr(np.random.rand(100))

    # SVC test function to be parameterized
    def _test_svc(self,
                  num_classes,
                  kernel="rbf",
                  gamma=None,
                  backend="torch",
                  labels_shift=0,
                  extra_config={}):

        if gamma:
            model = SVC(kernel=kernel, gamma=gamma)
        else:
            model = SVC(kernel=kernel)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100) + labels_shift

        model.fit(X, y)
        torch_model = hummingbird.ml.convert(model,
                                             backend,
                                             X,
                                             extra_config=extra_config)

        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict(X),
                                   torch_model.predict(X),
                                   rtol=1e-6,
                                   atol=1e-6)

    # SVC binary
    def test_svc_bi(self):
        self._test_svc(2)

    # SVC multiclass
    def test_svc_multi(self):
        self._test_svc(3)

    # SVC linear kernel
    def test_svc_linear(self):
        self._test_svc(2, kernel="linear")

    # SVC sigmoid kernel
    def test_svc_sigmoid(self):
        self._test_svc(3, kernel="sigmoid")

    # SVC poly kernel
    def test_svc_poly(self):
        self._test_svc(3, kernel="poly")

    # SVC with class labels shifted
    def test_svc_shifted(self):
        self._test_svc(3, labels_shift=2)

    # SVC with different gamma (default=’scale’)
    def test_svc_gamma(self):
        self._test_svc(3, gamma="auto")

    # NuSVC test function to be parameterized
    def _test_nu_svc(self, num_classes, backend="torch", extra_config={}):
        model = NuSVC()
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)
        torch_model = hummingbird.ml.convert(model,
                                             backend,
                                             X,
                                             extra_config=extra_config)

        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict(X),
                                   torch_model.predict(X),
                                   rtol=1e-6,
                                   atol=1e-6)

    # NuSVC binary
    def test_nu_svc_bi(self):
        self._test_nu_svc(2)

    # NuSVC multiclass
    def test_nu_svc_multi(self):
        self._test_nu_svc(3)

    # assert fail on unsupported kernel
    def test_sklearn_linear_model_raises_wrong_type(self):

        np.random.seed(0)
        X = np.random.rand(10, 10)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=10)
        model = SVC(kernel="precomputed").fit(X, y)
        self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "torch")

    # Float 64 data tests
    def test_float64_linear_svc(self):
        np.random.seed(0)
        num_classes = 3
        X = np.random.rand(100, 200)
        y = np.random.randint(num_classes, size=100)

        model = LinearSVC()
        model.fit(X, y)
        torch_model = hummingbird.ml.convert(model, "torch")
        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict(X),
                                   torch_model.predict(X),
                                   rtol=1e-6,
                                   atol=1e-6)

    # Torchscript backend
    def test_svc_ts(self):
        self._test_svc(2, backend="torch.jit")

    # SVC linear kernel
    def test_svc_linear_ts(self):
        self._test_svc(2, kernel="linear", backend="torch.jit")

    # SVC sigmoid kernel
    def test_svc_sigmoid_ts(self):
        self._test_svc(2, kernel="sigmoid", backend="torch.jit")

    # SVC poly kernel
    def test_svc_poly_ts(self):
        self._test_svc(2, kernel="poly", backend="torch.jit")

    # NuSVC binary
    def test_nu_svc_bi_ts(self):
        self._test_nu_svc(2, backend="torch.jit")

    def test_svc_multi_ts(self):
        self._test_svc(3, backend="torch.jit")

    # TVM backend.
    # SVC binary
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_svc_tvm(self):
        self._test_svc(2,
                       backend="tvm",
                       extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})

    # SVC linear kernel
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_svc_linear_tvm(self):
        self._test_svc(2,
                       kernel="linear",
                       backend="tvm",
                       extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})

    # SVC sigmoid kernel
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_svc_sigmoid_tvm(self):
        self._test_svc(2,
                       kernel="sigmoid",
                       backend="tvm",
                       extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})

    # SVC poly kernel
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_svc_poly_tvm(self):
        self._test_svc(2,
                       kernel="poly",
                       backend="tvm",
                       extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})

    # NuSVC binary
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_nu_svc_bi_tvm(self):
        self._test_nu_svc(2,
                          backend="tvm",
                          extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})
示例#12
0
class TestXGBoostConverter(unittest.TestCase):
    # Check tree implementation
    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    def test_xgb_implementation(self):
        warnings.filterwarnings("ignore")
        np.random.seed(0)
        X = np.random.rand(1, 1)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=1)

        for model in [
                xgb.XGBClassifier(n_estimators=1, max_depth=1),
                xgb.XGBRegressor(n_estimators=1, max_depth=1)
        ]:
            for extra_config_param in ["tree_trav", "perf_tree_trav", "gemm"]:
                model.fit(X, y)

                torch_model = hummingbird.ml.convert(
                    model,
                    "torch",
                    X[0:1],
                    extra_config={"tree_implementation": extra_config_param})
                self.assertIsNotNone(torch_model)
                self.assertEqual(
                    str(type(
                        list(torch_model.model._operator_map.values())[0])),
                    gbdt_implementation_map[extra_config_param])

    def _run_xgb_classifier_converter(self, num_classes, extra_config={}):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12]:
            model = xgb.XGBClassifier(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model,
                                                 "torch", [],
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict_proba(X),
                                       torch_model.predict_proba(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Binary classifier
    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    def test_xgb_binary_classifier_converter(self):
        self._run_xgb_classifier_converter(2)

    # Gemm classifier
    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    def test_xgb_gemm_classifier_converter(self):
        self._run_xgb_classifier_converter(
            2, extra_config={"tree_implementation": "gemm"})

    # Tree_trav classifier
    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    def test_xgb_tree_trav_classifier_converter(self):
        self._run_xgb_classifier_converter(
            2, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav classifier
    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    def test_xgb_perf_tree_trav_classifier_converter(self):
        self._run_xgb_classifier_converter(
            2, extra_config={"tree_implementation": "perf_tree_trav"})

    # Multi classifier
    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    def test_xgb_multi_classifier_converter(self):
        self._run_xgb_classifier_converter(3)

    # Gemm multi classifier
    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    def test_xgb_gemm_multi_classifier_converter(self):
        self._run_xgb_classifier_converter(
            3, extra_config={"tree_implementation": "gemm"})

    # Tree_trav multi classifier
    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    def test_xgb_tree_trav_multi_classifier_converter(self):
        self._run_xgb_classifier_converter(
            3, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav multi classifier
    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    def test_xgb_perf_tree_trav_multi_classifier_converter(self):
        self._run_xgb_classifier_converter(
            3, extra_config={"tree_implementation": "perf_tree_trav"})

    def _run_xgb_ranker_converter(self, num_classes, extra_config={}):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12]:
            model = xgb.XGBRanker(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y, group=[X.shape[0]])

            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 X,
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X),
                                       torch_model.predict(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Ranker
    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    def test_xgb_binary_ranker_converter(self):
        self._run_xgb_ranker_converter(1000)

    # Gemm ranker
    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    def test_xgb_gemm_ranker_converter(self):
        self._run_xgb_ranker_converter(
            1000, extra_config={"tree_implementation": "gemm"})

    # Tree_trav ranker
    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    def test_xgb_tree_trav_ranker_converter(self):
        self._run_xgb_ranker_converter(
            1000, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav ranker
    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    def test_xgb_perf_tree_trav_ranker_converter(self):
        self._run_xgb_ranker_converter(
            1000, extra_config={"tree_implementation": "perf_tree_trav"})

    def _run_xgb_regressor_converter(self, num_classes, extra_config={}):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12]:
            model = xgb.XGBRegressor(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)
            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 X,
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X),
                                       torch_model.predict(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Regressor
    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    def test_xgb_binary_regressor_converter(self):
        self._run_xgb_regressor_converter(1000)

    # Gemm regressor
    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    def test_xgb_gemm_regressor_converter(self):
        self._run_xgb_regressor_converter(
            1000, extra_config={"tree_implementation": "gemm"})

    # Tree_trav regressor
    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    def test_xgb_tree_trav_regressor_converter(self):
        self._run_xgb_regressor_converter(
            1000, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav regressor
    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    def test_xgb_perf_tree_trav_regressor_converter(self):
        self._run_xgb_regressor_converter(
            1000, extra_config={"tree_implementation": "perf_tree_trav"})

    # Float 64 data tests
    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    def test_float64_xgb_classifier_converter(self):
        warnings.filterwarnings("ignore")
        num_classes = 3
        for max_depth in [1, 3, 8, 10, 12]:
            model = xgb.XGBClassifier(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model, "torch", [])
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict_proba(X),
                                       torch_model.predict_proba(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    def test_float64_xgb_ranker_converter(self):
        warnings.filterwarnings("ignore")
        num_classes = 3
        for max_depth in [1, 3, 8, 10, 12]:
            model = xgb.XGBRanker(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y, group=[X.shape[0]])

            torch_model = hummingbird.ml.convert(model, "torch", X[0:1])
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X),
                                       torch_model.predict(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    def test_float64_xgb_regressor_converter(self):
        warnings.filterwarnings("ignore")
        num_classes = 3
        for max_depth in [1, 3, 8, 10, 12]:
            model = xgb.XGBRegressor(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)
            torch_model = hummingbird.ml.convert(model, "torch", X[0:1])
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X),
                                       torch_model.predict(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Small tree.
    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    def test_run_xgb_classifier_converter(self):
        warnings.filterwarnings("ignore")
        for extra_config_param in ["tree_trav", "perf_tree_trav", "gemm"]:
            model = xgb.XGBClassifier(n_estimators=1, max_depth=1)
            np.random.seed(0)
            X = np.random.rand(1, 1)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(2, size=1)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(
                model,
                "torch", [],
                extra_config={"tree_implementation": extra_config_param})
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict_proba(X),
                                       torch_model.predict_proba(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Torchscript backends.
    # Test TorchScript backend regression.
    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    def test_xgb_regressor_converter_torchscript(self):
        warnings.filterwarnings("ignore")
        import torch

        for max_depth in [1, 3, 8, 10, 12]:
            model = xgb.XGBRegressor(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(1000, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model, "torchscript", X)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X),
                                       torch_model.predict(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Test TorchScript backend classification.
    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    def test_xgb_classifier_converter_torchscript(self):
        warnings.filterwarnings("ignore")
        import torch

        for max_depth in [1, 3, 8, 10, 12]:
            model = xgb.XGBClassifier(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(2, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model, "torchscript", X)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict_proba(X),
                                       torch_model.predict_proba(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # TVM backend tests.
    # TVM backend regression.
    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_xgb_regressor_converter_tvm(self):
        warnings.filterwarnings("ignore")
        import torch

        for max_depth in [1, 3, 8, 10, 12]:
            model = xgb.XGBRegressor(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(1000, size=100)

            model.fit(X, y)

            tvm_model = hummingbird.ml.convert(
                model,
                "tvm",
                X,
                extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})
            self.assertIsNotNone(tvm_model)
            np.testing.assert_allclose(model.predict(X),
                                       tvm_model.predict(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Test TVM backend classification.
    @unittest.skipIf(not xgboost_installed(),
                     reason="XGBoost test requires XGBoost installed")
    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_xgb_classifier_converter_tvm(self):
        warnings.filterwarnings("ignore")
        import torch

        for max_depth in [1, 3, 8, 10, 12]:
            model = xgb.XGBClassifier(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(2, size=100)

            model.fit(X, y)

            tvm_model = hummingbird.ml.convert(
                model,
                "tvm",
                X,
                extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})
            self.assertIsNotNone(tvm_model)
            np.testing.assert_allclose(model.predict_proba(X),
                                       tvm_model.predict_proba(X),
                                       rtol=1e-06,
                                       atol=1e-06)
示例#13
0
    def load(location, do_unzip_and_model_type_check=True):
        """
        Method used to load a container from the file system.

        Args:
            location: The location on the file system where to load the model.
            do_unzip_and_model_type_check: Whether to unzip the model and check the type.

        Returns:
            The loaded model.
        """
        assert tvm_installed(), "TVM Container requires TVM installed."

        _load_param_dict = tvm._ffi.get_global_func("tvm.relay._load_param_dict")

        # We borrow this function directly from Relay.
        # Relay when imported tryies to download schedules data,
        # but at inference time access to disk or network could be blocked.
        def load_param_dict(param_bytes):
            if isinstance(param_bytes, (bytes, str)):
                param_bytes = bytearray(param_bytes)
            load_arr = _load_param_dict(param_bytes)
            return {v.name: v.array for v in load_arr}

        container = None

        if do_unzip_and_model_type_check:
            # Unzip the dir.
            zip_location = location
            if not location.endswith("zip"):
                zip_location = location + ".zip"
            else:
                location = zip_location[:-4]
            assert os.path.exists(zip_location), "Zip file {} does not exist.".format(zip_location)
            shutil.unpack_archive(zip_location, location, format="zip")

            assert os.path.exists(location), "Model location {} does not exist.".format(location)

            # Load the model type.
            with open(os.path.join(location, constants.SAVE_LOAD_MODEL_TYPE_PATH), "r") as file:
                model_type = file.readline()
            if model_type != "tvm":
                shutil.rmtree(location)
                raise RuntimeError("Expected TVM model type, got {}".format(model_type))

        # Check the versions of the modules used when saving the model.
        if os.path.exists(os.path.join(location, constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH)):
            with open(os.path.join(location, constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH), "r") as file:
                configuration = file.readlines()
            check_dumped_versions(configuration, hummingbird, torch)
        else:
            warnings.warn(
                "Cannot find the configuration file with versions. You are likely trying to load a model saved with an old version of Hummingbird."
            )

        # Load the actual model.
        path_lib = os.path.join(location, constants.SAVE_LOAD_TVM_LIB_PATH)
        graph = open(os.path.join(location, constants.SAVE_LOAD_TVM_GRAPH_PATH)).read()
        lib = tvm.runtime.module.load_module(path_lib)
        params = load_param_dict(open(os.path.join(location, constants.SAVE_LOAD_TVM_PARAMS_PATH), "rb").read())

        # Load the container.
        with open(os.path.join(location, constants.SAVE_LOAD_CONTAINER_PATH), "rb") as file:
            container = dill.load(file)
        if container is None:
            shutil.rmtree(location)
            raise RuntimeError("Failed to load the model container.")

        # Setup the container.
        ctx = tvm.cpu() if container._ctx == "cpu" else tvm.gpu
        container._model = graph_runtime.create(graph, lib, ctx)
        container._model.set_input(**params)

        container._extra_config[constants.TVM_GRAPH] = graph
        container._extra_config[constants.TVM_LIB] = lib
        container._extra_config[constants.TVM_PARAMS] = params
        container._extra_config[constants.TVM_CONTEXT] = ctx
        container._ctx = ctx
        container._tvm_tensors = {name: container._to_tvm_array(np.array([])) for name in container._input_names}

        # Need to set the number of threads to use as set in the original container.
        os.environ["TVM_NUM_THREADS"] = str(container._n_threads)
        shutil.rmtree(location)

        return container
示例#14
0
import shutil
import torch
import warnings

import hummingbird
from hummingbird.ml._utils import tvm_installed, dump_versions, check_dumped_versions
from hummingbird.ml.operator_converters import constants
from hummingbird.ml.containers._sklearn_api_containers import (
    SklearnContainer,
    SklearnContainerTransformer,
    SklearnContainerRegression,
    SklearnContainerClassification,
    SklearnContainerAnomalyDetection,
)

if tvm_installed():
    import tvm
    import tvm._ffi
    from tvm.contrib import graph_runtime


class TVMSklearnContainer(SklearnContainer):
    """
    Base container for TVM models.
    The container allows to mirror the Sklearn API.
    The test input size must be the same as the batch size this container is created.
    """

    def __init__(self, model, n_threads=None, batch_size=None, extra_config={}):
        super(TVMSklearnContainer, self).__init__(model, n_threads, batch_size, extra_config=extra_config)
示例#15
0
class TestSklearnLinearClassifiers(unittest.TestCase):

    # LogisticRegression test function to be parameterized
    def _test_logistic_regression(self, num_classes, solver="liblinear", multi_class="auto", labels_shift=0):
        if num_classes > 2:
            model = LogisticRegression(solver=solver, multi_class=multi_class, fit_intercept=True)
        else:
            model = LogisticRegression(solver="liblinear", fit_intercept=True)

        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100) + labels_shift

        model.fit(X, y)

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict_proba(X), torch_model.predict_proba(X), rtol=1e-6, atol=1e-6)

    # LogisticRegression binary
    def test_logistic_regression_bi(self):
        self._test_logistic_regression(2)

    # LogisticRegression multiclass with auto
    def test_logistic_regression_multi_auto(self):
        self._test_logistic_regression(3)

    # LogisticRegression with class labels shifted
    def test_logistic_regression_shifted_classes(self):
        self._test_logistic_regression(3, labels_shift=2)

    # LogisticRegression with multi+ovr
    def test_logistic_regression_multi_ovr(self):
        self._test_logistic_regression(3, multi_class="ovr")

    # LogisticRegression with multi+multinomial+sag
    def test_logistic_regression_multi_multin_sag(self):
        warnings.filterwarnings("ignore")
        # this will not converge due to small test size
        self._test_logistic_regression(3, multi_class="multinomial", solver="sag")

    # LogisticRegression binary lbfgs
    def test_logistic_regression_bi_lbfgs(self):
        warnings.filterwarnings("ignore")
        # this will not converge due to small test size
        self._test_logistic_regression(2, solver="lbfgs")

    # LogisticRegression with multi+lbfgs
    def test_logistic_regression_multi_lbfgs(self):
        warnings.filterwarnings("ignore")
        # this will not converge due to small test size
        self._test_logistic_regression(3, solver="lbfgs")

    # LogisticRegression with multi+multinomial+lbfgs
    def test_logistic_regression_multi_multin_lbfgs(self):
        warnings.filterwarnings("ignore")
        # this will not converge due to small test size
        self._test_logistic_regression(3, multi_class="multinomial", solver="lbfgs")

    # LogisticRegression with multi+ovr+lbfgs
    def test_logistic_regression_multi_ovr_lbfgs(self):
        warnings.filterwarnings("ignore")
        # this will not converge due to small test size
        self._test_logistic_regression(3, multi_class="ovr", solver="lbfgs")

    # LinearRegression test function to be parameterized
    def _test_linear_regression(self, y_input):
        model = LinearRegression()

        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = y_input

        model.fit(X, y)

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-6, atol=1e-6)

    # LinearRegression with ints
    def test_linear_regression_int(self):
        np.random.seed(0)
        self._test_linear_regression(np.random.randint(2, size=100))

    # LinearRegression with floats
    def test_linear_regression_float(self):
        np.random.seed(0)
        self._test_linear_regression(np.random.rand(100))

    # RidgeCV test function to be parameterized
    def _test_ridge_cv(self, y_input):
        model = RidgeCV()

        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = y_input

        model.fit(X, y)

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-6, atol=1e-6)

    # RidgeCV with ints
    def test_ridge_cv_int(self):
        np.random.seed(0)
        self._test_ridge_cv(np.random.randint(2, size=100))

    # RidgeCV with floats
    def test_ridge_cv_float(self):
        np.random.seed(0)
        self._test_ridge_cv(np.random.rand(100))

    # LogisticRegressionCV test function to be parameterized
    def _test_logistic_regression_cv(self, num_classes, solver="liblinear", multi_class="auto", labels_shift=0):
        if num_classes > 2:
            model = LogisticRegressionCV(solver=solver, multi_class=multi_class, fit_intercept=True)
        else:
            model = LogisticRegressionCV(solver="liblinear", fit_intercept=True)

        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100) + labels_shift

        model.fit(X, y)
        torch_model = hummingbird.ml.convert(model, "torch")
        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict_proba(X), torch_model.predict_proba(X), rtol=1e-6, atol=1e-6)

    # LogisticRegressionCV with 2 classes
    def test_logistic_regression_cv_bi(self):
        self._test_logistic_regression_cv(2)

    # LogisticRegressionCV with 3 classes
    def test_logistic_regression_cv_multi(self):
        self._test_logistic_regression_cv(3)

    # LogisticRegressionCV with shifted classes
    def test_logistic_regression_cv_shifted_classes(self):
        self._test_logistic_regression_cv(3, labels_shift=2)

    # LogisticRegressionCV with multi+ovr
    def test_logistic_regression_cv_multi_ovr(self):
        self._test_logistic_regression_cv(3, multi_class="ovr")

    # LogisticRegressionCV with multi+multinomial
    def test_logistic_regression_cv_multi_multin(self):
        warnings.filterwarnings("ignore")
        # this will not converge due to small test size
        self._test_logistic_regression_cv(3, multi_class="multinomial", solver="sag")

    # SGDClassifier test function to be parameterized
    def _test_sgd_classifier(self, num_classes):

        model = SGDClassifier(loss="log")

        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        torch_model = hummingbird.ml.convert(model, "torch")
        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict_proba(X), torch_model.predict_proba(X), rtol=1e-6, atol=1e-6)

    # SGDClassifier with 2 classes
    def test_sgd_classifier_bi(self):
        self._test_sgd_classifier(2)

    # SGDClassifier with 3 classes
    def test_sgd_classifier_multi(self):
        self._test_sgd_classifier(3)

    # SGDClassifier with modified huber loss
    @unittest.skipIf(
        LooseVersion(torch.__version__) < LooseVersion("1.6.0"), reason="Modified Huber loss test requires torch >= 1.6.0"
    )
    def test_modified_huber(self):
        X = np.array([[-0.5, -1], [-1, -1], [-0.1, -0.1], [0.1, -0.2], [0.5, 1], [1, 1], [0.1, 0.1], [-0.1, 0.2]])
        Y = np.array([1, 1, 1, 1, 2, 2, 2, 2])

        model = SGDClassifier(loss="modified_huber", max_iter=1000, tol=1e-3)
        model.fit(X, Y)

        # Use Hummingbird to convert the model to PyTorch
        hb_model = hummingbird.ml.convert(model, "torch")

        inputs = [[-1, -1], [1, 1], [-0.2, 0.1], [0.2, -0.1]]
        np.testing.assert_allclose(model.predict_proba(inputs), hb_model.predict_proba(inputs), rtol=1e-6, atol=1e-6)

    @unittest.skipIf(
        LooseVersion(torch.__version__) < LooseVersion("1.6.0"), reason="Modified Huber loss test requires torch >= 1.6.0"
    )
    def test_modified_huber2(self):
        X = np.array([[-0.5, -1], [-1, -1], [-0.1, -0.1], [0.1, -0.2], [0.5, 1], [1, 1], [0.1, 0.1], [-0.1, 0.2]])
        Y = np.array([1, 1, 1, 1, 2, 2, 2, 2])

        model = SGDClassifier(loss="modified_huber", max_iter=1000, tol=1e-3)
        model.fit(X, Y)

        # Use Hummingbird to convert the model to PyTorch
        hb_model = hummingbird.ml.convert(model, "torch")

        np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-6, atol=1e-6)

    # SGDClassifier with modified huber loss multiclass
    @unittest.skipIf(
        LooseVersion(torch.__version__) < LooseVersion("1.6.0"), reason="Modified Huber loss test requires torch >= 1.6.0"
    )
    def test_modified_huber_multi(self):
        X = np.array([[-0.5, -1], [-1, -1], [-0.1, -0.1], [0.1, -0.2], [0.5, 1], [1, 1], [0.1, 0.1], [-0.1, 0.2]])
        Y = np.array([0, 1, 1, 1, 2, 2, 2, 2])

        model = SGDClassifier(loss="modified_huber", max_iter=1000, tol=1e-3)
        model.fit(X, Y)

        # Use Hummingbird to convert the model to PyTorch
        hb_model = hummingbird.ml.convert(model, "torch")

        inputs = [[-1, -1], [1, 1], [-0.2, 0.1], [0.2, -0.1]]
        np.testing.assert_allclose(model.predict_proba(inputs), hb_model.predict_proba(inputs), rtol=1e-6, atol=1e-6)

    # Failure sases
    def test_sklearn_linear_model_raises_wrong_type(self):
        warnings.filterwarnings("ignore")
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(3, size=100).astype(np.float32)  # y must be int, not float, should error
        model = SGDClassifier().fit(X, y)
        self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "torch")

    # Float 64 data tests
    def test_float64_linear_regression(self):
        model = LinearRegression()

        np.random.seed(0)
        X = np.random.rand(100, 200)
        y = np.random.randint(2, size=100)

        model.fit(X, y)

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-6, atol=1e-6)

    def test_float64_sgd_classifier(self):

        model = SGDClassifier(loss="log")

        np.random.seed(0)
        num_classes = 3
        X = np.random.rand(100, 200)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        torch_model = hummingbird.ml.convert(model, "torch")
        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-6, atol=1e-6)

    # Multioutput regression tests
    def test_multioutput_linear_regression(self):
        for n_targets in [1, 2, 7]:
            model = LinearRegression()
            X, y = datasets.make_regression(
                n_samples=100, n_features=10, n_informative=5, n_targets=n_targets, random_state=2021
            )
            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model, "torch")
            self.assertTrue(torch_model is not None)
            np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-5, atol=1e-5)

    # Test Torschscript backend.
    def test_logistic_regression_ts(self):

        model = LogisticRegression(solver="liblinear")

        data = datasets.load_iris()
        X, y = data.data, data.target
        X = X.astype(np.float32)

        model.fit(X, y)

        ts_model = hummingbird.ml.convert(model, "torch.jit", X)
        self.assertTrue(ts_model is not None)
        np.testing.assert_allclose(model.predict(X), ts_model.predict(X), rtol=1e-6, atol=1e-6)
        np.testing.assert_allclose(model.predict_proba(X), ts_model.predict_proba(X), rtol=1e-6, atol=1e-6)

    # Test TVM backends.
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_sgd_classifier_tvm(self):

        model = SGDClassifier(loss="log")

        np.random.seed(0)
        num_classes = 3
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        tvm_model = hummingbird.ml.convert(model, "tvm", X)
        self.assertTrue(tvm_model is not None)
        np.testing.assert_allclose(model.predict(X), tvm_model.predict(X), rtol=1e-6, atol=1e-6)
        np.testing.assert_allclose(model.predict_proba(X), tvm_model.predict_proba(X), rtol=1e-6, atol=1e-6)

    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_lr_tvm(self):

        model = LinearRegression()

        np.random.seed(0)
        num_classes = 1000
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        tvm_model = hummingbird.ml.convert(model, "tvm", X, extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})
        self.assertTrue(tvm_model is not None)

        np.testing.assert_allclose(model.predict(X), tvm_model.predict(X), rtol=1e-6, atol=1e-3)
示例#16
0
class TestBackends(unittest.TestCase):
    # Test backends are browsable
    def test_backends(self):
        warnings.filterwarnings("ignore")
        self.assertTrue(len(hummingbird.ml.backends) > 0)

    # Test backends are not case sensitive
    def test_backends_case_sensitive(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tOrCh")
        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

    # Test pytorch is still a valid backend name
    def test_backends_pytorch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "pytOrCh")
        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

    # Test pytorch save and load
    def test_pytorch_save_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(hb_model)
        hb_model.save("pt-tmp")

        hb_model_loaded = hummingbird.ml.TorchContainer.load("pt-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("pt-tmp.zip")

    # Test pytorch save and generic load
    def test_pytorch_save_generic_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(hb_model)
        hb_model.save("pt-tmp")

        hb_model_loaded = hummingbird.ml.load("pt-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("pt-tmp.zip")

    def test_pytorch_save_load_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(hb_model)
        hb_model.save("pt-tmp")

        hummingbird.ml.load("pt-tmp")
        hummingbird.ml.load("pt-tmp")

        os.remove("pt-tmp.zip")

    def test_pytorch_save_load_more_versions(self):
        from hummingbird.ml.operator_converters import constants

        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(hb_model)
        hb_model.save("pt-tmp")

        shutil.unpack_archive("pt-tmp.zip", "pt-tmp", format="zip")

        # Adding a new library does not create problems.
        with open(
                os.path.join("pt-tmp",
                             constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH),
                "r") as file:
            configuration = file.readlines()
        configuration.append("\nlibx=1.3")
        os.remove(
            os.path.join("pt-tmp",
                         constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH))
        with open(
                os.path.join("pt-tmp",
                             constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH),
                "w") as file:
            file.writelines(configuration)
        shutil.make_archive("pt-tmp", "zip", "pt-tmp")

        hummingbird.ml.load("pt-tmp")
        os.remove("pt-tmp.zip")

    def test_pytorch_save_load_less_versions(self):
        from hummingbird.ml.operator_converters import constants

        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(hb_model)
        hb_model.save("pt-tmp")

        shutil.unpack_archive("pt-tmp.zip", "pt-tmp", format="zip")

        # Removing a library does not create problems.
        with open(
                os.path.join("pt-tmp",
                             constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH),
                "r") as file:
            configuration = file.readlines()
        configuration = configuration[-1]
        os.remove(
            os.path.join("pt-tmp",
                         constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH))
        with open(
                os.path.join("pt-tmp",
                             constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH),
                "w") as file:
            file.writelines(configuration)
        shutil.make_archive("pt-tmp", "zip", "pt-tmp")

        hummingbird.ml.load("pt-tmp")
        os.remove("pt-tmp.zip")

    def test_pytorch_save_load_different_versions(self):
        from hummingbird.ml.operator_converters import constants

        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(hb_model)
        hb_model.save("pt-tmp")

        shutil.unpack_archive("pt-tmp.zip", "pt-tmp", format="zip")

        # Changing the version of a library does not create problems.
        with open(
                os.path.join("pt-tmp",
                             constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH),
                "r") as file:
            configuration = file.readlines()
        configuration[0] = "hummingbird=0.0.0.1\n"
        os.remove(
            os.path.join("pt-tmp",
                         constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH))
        with open(
                os.path.join("pt-tmp",
                             constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH),
                "w") as file:
            file.writelines(configuration)
        shutil.make_archive("pt-tmp", "zip", "pt-tmp")

        hummingbird.ml.load("pt-tmp")
        os.remove("pt-tmp.zip")

    # Test torchscript save and load
    def test_torchscript_save_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch.jit", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("ts-tmp")

        hb_model_loaded = hummingbird.ml.TorchContainer.load("ts-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("ts-tmp.zip")

    # Test torchscript save and generic load
    def test_torchscript_save_generic_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch.jit", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("ts-tmp")

        hb_model_loaded = hummingbird.ml.load("ts-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("ts-tmp.zip")

    def test_load_fails_bad_path(self):
        # Asserts for bad path with extension
        self.assertRaises(AssertionError, hummingbird.ml.load, "nonsense.zip")
        self.assertRaises(AssertionError, hummingbird.ml.TorchContainer.load,
                          "nonsense.zip")

        # Asserts for bad path with no extension
        self.assertRaises(AssertionError, hummingbird.ml.load, "nonsense")
        self.assertRaises(AssertionError, hummingbird.ml.TorchContainer.load,
                          "nonsense")

    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_load_fails_bad_path_onnx(self):
        self.assertRaises(AssertionError, hummingbird.ml.ONNXContainer.load,
                          "nonsense.zip")
        self.assertRaises(AssertionError, hummingbird.ml.ONNXContainer.load,
                          "nonsense")

    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_load_fails_bad_path_tvm(self):
        self.assertRaises(AssertionError, hummingbird.ml.TVMContainer.load,
                          "nonsense.zip")
        self.assertRaises(AssertionError, hummingbird.ml.TVMContainer.load,
                          "nonsense")

    # Test not supported backends
    def test_unsupported_backend(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Test scala backend rises an exception
        self.assertRaises(MissingBackend, hummingbird.ml.convert, model,
                          "scala")

    # Test torchscript requires test_data
    def test_torchscript_test_data(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Test torcscript requires test_input
        self.assertRaises(RuntimeError, hummingbird.ml.convert, model,
                          "torch.jit")

    # Test TVM requires test_data
    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_tvm_test_data(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Test tvm requires test_input
        self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "tvm")

    # Test tvm save and load
    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_tvm_save_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("tvm-tmp")

        hb_model_loaded = hummingbird.ml.TVMContainer.load("tvm-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("tvm-tmp.zip")

    # Test tvm save and generic load
    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_tvm_save_generic_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("tvm-tmp")

        hb_model_loaded = hummingbird.ml.load("tvm-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("tvm-tmp.zip")

    # Test tvm save and load zip file
    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_tvm_save_load_zip(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("tvm-tmp.zip")

        hb_model_loaded = hummingbird.ml.TVMContainer.load("tvm-tmp.zip")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("tvm-tmp.zip")

    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_tvm_save_load_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("tvm-tmp.zip")

        hummingbird.ml.TVMContainer.load("tvm-tmp.zip")
        hummingbird.ml.TVMContainer.load("tvm-tmp.zip")

        os.remove("tvm-tmp.zip")

    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_tvm_save_load_no_versions(self):
        from hummingbird.ml.operator_converters import constants

        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("tvm-tmp")

        shutil.unpack_archive("tvm-tmp.zip", "tvm-tmp", format="zip")

        # Removing the configuration file with the versions does not create problems.
        os.remove(
            os.path.join("tvm-tmp",
                         constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH))

        hummingbird.ml.load("tvm-tmp")
        os.remove("tvm-tmp.zip")

    # Test onnx requires test_data or initial_types
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_no_test_data_float(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", FloatTensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=11)

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx 0 shape input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_zero_shape_input(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("input",
                                             DoubleTensorType([0, X.shape[1]]))
                                        ],
                                        target_opset=11)

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, double input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_no_test_data_double(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", DoubleTensorType([X.shape[0],
                                                       X.shape[1]]))],
            target_opset=11)

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, long input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_no_test_data_long(self):
        warnings.filterwarnings("ignore")
        model = model = StandardScaler(with_mean=True, with_std=True)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.int64)

        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", Int64TensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=11)

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, int input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_no_test_data_int(self):
        warnings.filterwarnings("ignore")
        model = OneHotEncoder()
        X = np.array([[1, 2, 3]], dtype=np.int32)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", Int32TensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=11)

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, string input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_no_test_data_string(self):
        warnings.filterwarnings("ignore")
        model = OneHotEncoder()
        X = np.array([["a", "b", "c"]])
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", StringTensorType([X.shape[0],
                                                       X.shape[1]]))],
            target_opset=11)

        # Test backends are not case sensitive
        self.assertRaises(RuntimeError, hummingbird.ml.convert, onnx_ml_model,
                          "onnx")

    # Test ONNX save and load
    @unittest.skipIf(not onnx_runtime_installed(),
                     reason="ONNX test requires ORT")
    def test_onnx_save_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "onnx", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("onnx-tmp")

        hb_model_loaded = hummingbird.ml.ONNXContainer.load("onnx-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("onnx-tmp.zip")

    # Test ONNX save and generic load
    @unittest.skipIf(not onnx_runtime_installed(),
                     reason="ONNX test requires ORT")
    def test_onnx_save_generic_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "onnx", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("onnx-tmp")

        hb_model_loaded = hummingbird.ml.load("onnx-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("onnx-tmp.zip")

    # Test ONNX save and generic load
    @unittest.skipIf(not onnx_runtime_installed(),
                     reason="ONNX test requires ORT")
    def test_onnx_save_load_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "onnx", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("onnx-tmp")

        hummingbird.ml.load("onnx-tmp")
        hummingbird.ml.load("onnx-tmp")

        os.remove("onnx-tmp.zip")

    @unittest.skipIf(not onnx_runtime_installed(),
                     reason="ONNX test requires ORT")
    def test_onnx_save_load_no_versions(self):
        from hummingbird.ml.operator_converters import constants

        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "onnx", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("onnx-tmp")

        shutil.unpack_archive("onnx-tmp.zip", "onnx-tmp", format="zip")

        # Removing the configuration file with the versions does not create problems.
        os.remove(
            os.path.join("onnx-tmp",
                         constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH))

        hummingbird.ml.load("onnx-tmp")
        os.remove("onnx-tmp.zip")

    # Test for when the user forgets to add a target (ex: convert(model, output) rather than convert(model, 'torch')) due to API change
    def test_forgotten_backend_string(self):
        from sklearn.preprocessing import LabelEncoder

        model = LabelEncoder()
        data = np.array([1, 4, 5, 2, 0, 2], dtype=np.int32)
        model.fit(data)

        self.assertRaises(ValueError, hummingbird.ml.convert, model,
                          [("input", Int32TensorType([6, 1]))])

    # Test ONNX
    @unittest.skipIf(not onnx_runtime_installed(),
                     reason="ONNX test requires ORT")
    def test_onnx(self):
        import numpy as np
        import lightgbm as lgb
        from hummingbird.ml import convert

        # Create some random data for binary classification.
        num_classes = 2
        X = np.array(np.random.rand(10000, 28), dtype=np.float32)
        y = np.random.randint(num_classes, size=10000)

        model = lgb.LGBMClassifier()
        model.fit(X, y)

        self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "onnx")

    # Test Spark UDF
    @unittest.skipIf(
        os.name == "nt" or not sparkml_installed()
        or LooseVersion(pyspark.__version__) < LooseVersion("3"),
        reason="UDF Test requires spark >= 3",
    )
    def test_udf_torch(self):
        X, y = load_iris(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(
            X,
            y,
            random_state=77,
            test_size=0.2,
        )
        spark_df = sql_context.createDataFrame(pd.DataFrame(data=X_train))
        sql_context.registerDataFrameAsTable(spark_df, "IRIS")

        model = GradientBoostingClassifier(n_estimators=10)
        model.fit(X_train, y_train)

        hb_model = hummingbird.ml.convert(model, "torch")

        # Broadcast the model.
        broadcasted_model = spark.sparkContext.broadcast(hb_model)

        # UDF definition.
        @pandas_udf("long")
        def udf_hb_predict(
                iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
            model = broadcasted_model.value
            for args in iterator:
                data_unmangled = pd.concat([feature for feature in args],
                                           axis=1)
                predictions = model.predict(data_unmangled)
                yield pd.Series(np.array(predictions))

        # Register the UDF.
        sql_context.udf.register("PREDICT", udf_hb_predict)

        # Run the query.
        sql_context.sql(
            "SELECT SUM(prediction) FROM (SELECT PREDICT(*) as prediction FROM IRIS)"
        ).show()

    @unittest.skipIf(
        os.name == "nt" or not sparkml_installed()
        or LooseVersion(pyspark.__version__) < LooseVersion("3"),
        reason="UDF Test requires spark >= 3",
    )
    def test_udf_torch_jit_broadcast(self):
        import pickle

        X, y = load_iris(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(
            X,
            y,
            random_state=77,
            test_size=0.2,
        )
        spark_df = sql_context.createDataFrame(pd.DataFrame(data=X_train))
        sql_context.registerDataFrameAsTable(spark_df, "IRIS")

        model = GradientBoostingClassifier(n_estimators=10)
        model.fit(X_train, y_train)

        hb_model = hummingbird.ml.convert(model, "torch.jit", X_test)

        # Broadcast the model returns an error.
        self.assertRaises(pickle.PickleError, spark.sparkContext.broadcast,
                          hb_model)

    @unittest.skipIf(
        os.name == "nt" or not sparkml_installed()
        or LooseVersion(pyspark.__version__) < LooseVersion("3"),
        reason="UDF Test requires spark >= 3",
    )
    def test_udf_torch_jit_spark_file(self):
        import dill
        import torch.jit

        X, y = load_iris(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(
            X,
            y,
            random_state=77,
            test_size=0.2,
        )
        spark_df = sql_context.createDataFrame(pd.DataFrame(data=X_train))
        sql_context.registerDataFrameAsTable(spark_df, "IRIS")

        model = GradientBoostingClassifier(n_estimators=10)
        model.fit(X_train, y_train)

        hb_model = hummingbird.ml.convert(model, "torch.jit", X_test)

        # Save the file locally.
        if os.path.exists("deployed_model.zip"):
            os.remove("deployed_model.zip")
        torch.jit.save(hb_model.model, "deployed_model.zip")
        hb_model._model = None

        # Share the model using spark file and broadcast the container.
        spark.sparkContext.addFile("deployed_model.zip")
        broadcasted_container = spark.sparkContext.broadcast(hb_model)

        # UDF definition.
        @pandas_udf("long")
        def udf_hb_predict(
                iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
            location = SparkFiles.get("deployed_model.zip")
            torch_model = torch.jit.load(location)
            container = broadcasted_container.value
            container._model = torch_model
            model = container
            for args in iterator:
                data_unmangled = pd.concat([feature for feature in args],
                                           axis=1)
                predictions = model.predict(data_unmangled.values)
                yield pd.Series(np.array(predictions))

        # Register the UDF.
        sql_context.udf.register("PREDICT", udf_hb_predict)

        # Run the query.
        sql_context.sql(
            "SELECT SUM(prediction) FROM (SELECT PREDICT(*) as prediction FROM IRIS)"
        ).show()

        os.remove("deployed_model.zip")
class TestSklearnNormalizer(unittest.TestCase):
    def test_normalizer_converter(self):
        # Generate a random 2D array with values in [0, 1000)
        np.random.seed(0)
        data = np.random.rand(100, 200) * 1000
        data = np.array(data, dtype=np.float32)
        data_tensor = torch.from_numpy(data)

        for norm in ["l1", "l2", "max"]:
            model = Normalizer(norm=norm)
            model.fit(data)

            torch_model = hummingbird.ml.convert(model, "torch")

            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(
                model.transform(data),
                torch_model.transform(data_tensor),
                rtol=1e-06,
                atol=1e-06,
            )

    def test_normalizer_converter_raises_wrong_type(self):
        # Generate a random 2D array with values in [0, 1000)
        np.random.seed(0)
        data = np.random.rand(100, 200) * 1000
        data = np.array(data, dtype=np.float32)

        model = Normalizer(norm="invalid")
        model.fit(data)

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertRaises(RuntimeError,
                          torch_model.model._operator_map.SklearnNormalizer,
                          torch.from_numpy(data))

    # Float 64 data tests
    def test_float64_normalizer_converter(self):
        # Generate a random 2D array with values in [0, 1000)
        np.random.seed(0)
        data = np.random.rand(100, 200) * 1000
        data_tensor = torch.from_numpy(data)

        for norm in ["l1", "l2", "max"]:
            model = Normalizer(norm=norm)
            model.fit(data)

            torch_model = hummingbird.ml.convert(model, "torch")

            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(
                model.transform(data),
                torch_model.transform(data_tensor),
                rtol=1e-06,
                atol=1e-06,
            )

    # ONNX backend
    @unittest.skipIf(not (onnx_runtime_installed()),
                     reason="ONNX test requires ONNX and  ORT")
    def test_normalizer_converter_onnx(self):
        # Generate a random 2D array with values in [0, 1000)
        np.random.seed(0)
        data = np.random.rand(100, 200) * 1000
        data = np.array(data, dtype=np.float32)
        data_tensor = torch.from_numpy(data)

        for norm in ["l1", "l2", "max"]:
            model = Normalizer(norm=norm)
            model.fit(data)

            hb_model = hummingbird.ml.convert(model, "onnx", data)

            self.assertIsNotNone(hb_model)
            np.testing.assert_allclose(
                model.transform(data),
                hb_model.transform(data_tensor),
                rtol=1e-06,
                atol=1e-06,
            )

    # TVM backend
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_normalizer_converter_tvm(self):
        # Generate a random 2D array with values in [0, 1000)
        np.random.seed(0)
        data = np.random.rand(100, 200) * 1000
        data = np.array(data, dtype=np.float32)
        data_tensor = torch.from_numpy(data)

        for norm in ["l1", "l2", "max"]:
            model = Normalizer(norm=norm)
            model.fit(data)

            torch_model = hummingbird.ml.convert(
                model,
                "tvm",
                data,
                extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})

            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(
                model.transform(data),
                torch_model.transform(data_tensor),
                rtol=1e-06,
                atol=1e-06,
            )
示例#18
0
class TestBackends(unittest.TestCase):
    # Test backends are browsable
    def test_backends(self):
        warnings.filterwarnings("ignore")
        self.assertTrue(len(hummingbird.ml.backends) > 0)

    # Test backends are not case sensitive
    def test_backends_case_sensitive(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tOrCh")
        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06)

    # Test pytorch is still a valid backend name
    def test_backends_pytorch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "pytOrCh")
        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06)

    # Test not supported backends
    def test_unsupported_backend(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Test scala backend rises an exception
        self.assertRaises(MissingBackend, hummingbird.ml.convert, model, "scala")

    # Test torchscript requires test_data
    def test_torchscript_test_data(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Test torcscript requires test_input
        self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "torch.jit")

    # Test TVM requires test_data
    @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed")
    def test_tvm_test_data(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Test tvm requires test_input
        self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "tvm")

    # Test onnx requires test_data or initial_types
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_no_test_data_float(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=11
        )

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, double input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_no_test_data_double(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model, initial_types=[("input", DoubleTensorType([X.shape[0], X.shape[1]]))], target_opset=11
        )

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, long input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_no_test_data_long(self):
        warnings.filterwarnings("ignore")
        model = model = StandardScaler(with_mean=True, with_std=True)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.int64)

        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model, initial_types=[("input", Int64TensorType([X.shape[0], X.shape[1]]))], target_opset=11
        )

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, int input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_no_test_data_int(self):
        warnings.filterwarnings("ignore")
        model = OneHotEncoder()
        X = np.array([[1, 2, 3]], dtype=np.int32)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model, initial_types=[("input", Int32TensorType([X.shape[0], X.shape[1]]))], target_opset=11
        )

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, string input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_no_test_data_string(self):
        warnings.filterwarnings("ignore")
        model = OneHotEncoder()
        X = np.array([["a", "b", "c"]])
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model, initial_types=[("input", StringTensorType([X.shape[0], X.shape[1]]))], target_opset=11
        )

        # Test backends are not case sensitive
        self.assertRaises(RuntimeError, hummingbird.ml.convert, onnx_ml_model, "onnx")
示例#19
0
class TestSklearnTreeConverter(unittest.TestCase):
    # Check tree implementation
    def test_random_forest_implementation(self):
        warnings.filterwarnings("ignore")
        np.random.seed(0)
        X = np.random.rand(1, 1)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=1)

        for model in [
                RandomForestClassifier(n_estimators=1, max_depth=1),
                RandomForestRegressor(n_estimators=1, max_depth=1)
        ]:
            for extra_config_param in ["tree_trav", "perf_tree_trav", "gemm"]:
                model.fit(X, y)

                torch_model = hummingbird.ml.convert(
                    model,
                    "torch",
                    extra_config={
                        constants.TREE_IMPLEMENTATION: extra_config_param
                    })
                self.assertIsNotNone(torch_model)
                self.assertTrue(
                    str(type(list(torch_model.model._operators)[0])) ==
                    dt_implementation_map[extra_config_param])

    # Used for classification tests
    def _run_tree_classification_converter(self,
                                           model_type,
                                           num_classes,
                                           backend="torch",
                                           extra_config={},
                                           labels_shift=0,
                                           **kwargs):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12, None]:
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(num_classes, size=100) + labels_shift

            model = model_type(max_depth=max_depth, **kwargs)
            model.fit(X, y)
            torch_model = hummingbird.ml.convert(model,
                                                 backend,
                                                 X,
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict_proba(X),
                                       torch_model.predict_proba(X),
                                       rtol=1e-06,
                                       atol=1e-06)

            from distutils.version import LooseVersion
            import torch

            if LooseVersion(torch.__version__) >= LooseVersion("1.7.0"):
                np.testing.assert_allclose(model.predict(X),
                                           torch_model.predict(X),
                                           rtol=1e-06,
                                           atol=1e-06)

    # Random forest binary classifier
    def test_random_forest_classifier_binary_converter(self):
        self._run_tree_classification_converter(RandomForestClassifier,
                                                2,
                                                n_estimators=10)

    # Random forest gemm classifier
    def test_random_forest_gemm_classifier_converter(self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            2,
            extra_config={constants.TREE_IMPLEMENTATION: "gemm"},
            n_estimators=10)

    # Random forest tree_trav classifier
    def test_random_forest_tree_trav_classifier_converter(self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            2,
            extra_config={constants.TREE_IMPLEMENTATION: "tree_trav"},
            n_estimators=10)

    # Random forest perf_tree_trav classifier
    def test_random_forest_perf_tree_trav_classifier_converter(self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            2,
            extra_config={constants.TREE_IMPLEMENTATION: "perf_tree_trav"},
            n_estimators=10)

    # Random forest multi classifier
    def test_random_forest_multi_classifier_converter(self):
        self._run_tree_classification_converter(RandomForestClassifier,
                                                3,
                                                n_estimators=10)

    # Random forest gemm multi classifier
    def test_random_forest_gemm_multi_classifier_converter(self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            3,
            extra_config={constants.TREE_IMPLEMENTATION: "gemm"},
            n_estimators=10)

    # Random forest tree_trav multi classifier
    def test_random_forest_tree_trav_multi_classifier_converter(self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            3,
            extra_config={constants.TREE_IMPLEMENTATION: "tree_trav"},
            n_estimators=10)

    # Random forest perf_tree_trav multi classifier
    def test_random_forest_perf_tree_trav_multi_classifier_converter(self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            3,
            extra_config={constants.TREE_IMPLEMENTATION: "perf_tree_trav"},
            n_estimators=10)

    # Random forest gemm classifier shifted classes
    def test_random_forest_gemm_classifier_shifted_labels_converter(self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            3,
            labels_shift=2,
            extra_config={constants.TREE_IMPLEMENTATION: "gemm"},
            n_estimators=10)

    # Random forest tree_trav classifier shifted classes
    def test_random_forest_tree_trav_classifier_shifted_labels_converter(self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            3,
            labels_shift=2,
            extra_config={constants.TREE_IMPLEMENTATION: "tree_trav"},
            n_estimators=10,
        )

    # Random forest perf_tree_trav classifier shifted classes
    def test_random_forest_perf_tree_trav_classifier_shifted_labels_converter(
            self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            3,
            labels_shift=2,
            extra_config={constants.TREE_IMPLEMENTATION: "perf_tree_trav"},
            n_estimators=10,
        )

    # Used for regression tests
    def _run_tree_regressor_converter(self,
                                      model_type,
                                      num_classes,
                                      backend="torch",
                                      extra_config={},
                                      **kwargs):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12, None]:
            model = model_type(max_depth=max_depth, **kwargs)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)
            torch_model = hummingbird.ml.convert(model,
                                                 backend,
                                                 X,
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X),
                                       torch_model.predict(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Random forest regressor
    def test_random_forest_regressor_converter(self):
        self._run_tree_regressor_converter(RandomForestRegressor,
                                           1000,
                                           n_estimators=10)

    # Random forest gemm regressor
    def test_random_forest_gemm_regressor_converter(self):
        self._run_tree_regressor_converter(
            RandomForestRegressor,
            1000,
            extra_config={constants.TREE_IMPLEMENTATION: "gemm"},
            n_estimators=10)

    # Random forest tree_trav regressor
    def test_random_forest_tree_trav_regressor_converter(self):
        self._run_tree_regressor_converter(
            RandomForestRegressor,
            1000,
            extra_config={constants.TREE_IMPLEMENTATION: "tree_trav"},
            n_estimators=10)

    # Random forest perf_tree_trav regressor
    def test_random_forest_perf_tree_trav_regressor_converter(self):
        self._run_tree_regressor_converter(
            RandomForestRegressor,
            1000,
            extra_config={constants.TREE_IMPLEMENTATION: "perf_tree_trav"},
            n_estimators=10)

    # Extra trees regressor
    def test_extra_trees_regressor_converter(self):
        self._run_tree_regressor_converter(ExtraTreesRegressor,
                                           1000,
                                           n_estimators=10)

    # Extra trees gemm regressor
    def test_extra_trees_gemm_regressor_converter(self):
        self._run_tree_regressor_converter(
            ExtraTreesRegressor,
            1000,
            extra_config={constants.TREE_IMPLEMENTATION: "gemm"},
            n_estimators=10)

    # Extra trees tree_trav regressor
    def test_extra_trees_tree_trav_regressor_converter(self):
        self._run_tree_regressor_converter(
            ExtraTreesRegressor,
            1000,
            extra_config={constants.TREE_IMPLEMENTATION: "tree_trav"},
            n_estimators=10)

    # Extra trees perf_tree_trav regressor
    def test_extra_trees_perf_tree_trav_regressor_converter(self):
        self._run_tree_regressor_converter(
            ExtraTreesRegressor,
            1000,
            extra_config={constants.TREE_IMPLEMENTATION: "perf_tree_trav"},
            n_estimators=10)

    # Decision tree regressor
    def test_decision_tree_regressor_converter(self):
        self._run_tree_regressor_converter(DecisionTreeRegressor, 1000)

    # Decision tree gemm regressor
    def test_decision_tree_gemm_regressor_converter(self):
        self._run_tree_regressor_converter(
            DecisionTreeRegressor,
            1000,
            extra_config={constants.TREE_IMPLEMENTATION: "gemm"})

    # Decision tree tree_trav regressor
    def test_decision_tree_tree_trav_regressor_converter(self):
        self._run_tree_regressor_converter(
            DecisionTreeRegressor,
            1000,
            extra_config={constants.TREE_IMPLEMENTATION: "tree_trav"})

    # Decision tree perf_tree_trav regressor
    def test_decision_tree_perf_tree_trav_regressor_converter(self):
        self._run_tree_regressor_converter(
            DecisionTreeRegressor,
            1000,
            extra_config={constants.TREE_IMPLEMENTATION: "perf_tree_trav"})

    # Decision tree classifier
    def test_decision_tree_classifier_converter(self):
        self._run_tree_classification_converter(DecisionTreeClassifier, 3)

    # Extra trees classifier
    def test_extra_trees_classifier_converter(self):
        self._run_tree_classification_converter(ExtraTreesClassifier,
                                                3,
                                                n_estimators=10)

    # Used for small tree tests
    def _run_random_forest_classifier_single_node_tree_converter(
            self, extra_config={}):
        warnings.filterwarnings("ignore")
        np.random.seed(0)
        X = np.random.rand(1, 1)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(1, size=1)
        model = RandomForestClassifier(n_estimators=1).fit(X, y)
        torch_model = hummingbird.ml.convert(model,
                                             "torch",
                                             extra_config=extra_config)
        self.assertIsNotNone(torch_model)
        np.testing.assert_allclose(model.predict_proba(X),
                                   torch_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

    # Small tree gemm implementation
    def test_random_forest_gemm_classifier_single_node_tree_converter(self):
        self._run_random_forest_classifier_single_node_tree_converter(
            extra_config={constants.TREE_IMPLEMENTATION: "gemm"})

    # Small tree tree_trav implementation
    def test_random_forest_tree_trav_classifier_single_node_tree_converter(
            self):
        self._run_random_forest_classifier_single_node_tree_converter(
            extra_config={constants.TREE_IMPLEMENTATION: "tree_trav"})

    # Small tree perf_tree_trav implementation
    def test_random_forest_perf_tree_trav_classifier_single_node_tree_converter(
            self):
        self._run_random_forest_classifier_single_node_tree_converter(
            extra_config={constants.TREE_IMPLEMENTATION: "perf_tree_trav"})

    # Another small tree tests
    def test_random_forest_classifier_small_tree_converter(self):
        seed = 0
        np.random.seed(seed=0)
        N = 9
        X = np.random.randn(N, 8)
        y = np.random.randint(low=0, high=2, size=N)
        model = RandomForestClassifier(random_state=seed)
        model.fit(X, y)
        torch_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(torch_model)
        np.testing.assert_allclose(model.predict_proba(X),
                                   torch_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

    # Float 64 classification test helper
    def _run_float64_tree_classification_converter(self,
                                                   model_type,
                                                   num_classes,
                                                   extra_config={},
                                                   labels_shift=0,
                                                   **kwargs):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12, None]:
            np.random.seed(0)
            X = np.random.rand(100, 200)
            y = np.random.randint(num_classes, size=100) + labels_shift

            model = model_type(max_depth=max_depth, **kwargs)
            model.fit(X, y)
            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict_proba(X),
                                       torch_model.predict_proba(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Random forest binary classifier (float64 data)
    def test_float64_random_forest_classifier_binary_converter(self):
        self._run_float64_tree_classification_converter(RandomForestClassifier,
                                                        2,
                                                        n_estimators=10)

    # Decision tree classifier (float64 data)
    def test_float64_decision_tree_classifier_converter(self):
        self._run_float64_tree_classification_converter(
            DecisionTreeClassifier, 3)

    # Extra trees classifier (float64 data)
    def test_float64_extra_trees_classifier_converter(self):
        self._run_float64_tree_classification_converter(ExtraTreesClassifier,
                                                        3,
                                                        n_estimators=10)

    # Float 64 regression tests helper
    def _run_float64_tree_regressor_converter(self,
                                              model_type,
                                              num_classes,
                                              extra_config={},
                                              **kwargs):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12, None]:
            model = model_type(max_depth=max_depth, **kwargs)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)
            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X),
                                       torch_model.predict(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Random forest regressor (float64 data)
    def test_float64_random_forest_regressor_converter(self):
        self._run_float64_tree_regressor_converter(RandomForestRegressor,
                                                   1000,
                                                   n_estimators=10)

    # Decision tree regressor (float64 data)
    def test_float64_decision_tree_regressor_converter(self):
        self._run_float64_tree_regressor_converter(DecisionTreeRegressor, 1000)

    # Extra trees regressor (float64 data)
    def test_float64_extra_trees_regressor_converter(self):
        self._run_float64_tree_regressor_converter(ExtraTreesRegressor,
                                                   1000,
                                                   n_estimators=10)

    # Failure Cases
    def test_random_forest_classifier_raises_wrong_type(self):
        warnings.filterwarnings("ignore")
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(3, size=100).astype(
            np.float32)  # y must be int, not float, should error
        model = RandomForestClassifier(n_estimators=10).fit(X, y)
        self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "torch")

    def test_random_forest_classifier_raises_wrong_extra_config(self):
        warnings.filterwarnings("ignore")
        np.random.seed(0)
        X = np.array(np.random.rand(100, 200), dtype=np.float32)
        y = np.random.randint(3, size=100)
        model = RandomForestClassifier(n_estimators=10).fit(X, y)
        self.assertRaises(
            MissingConverter,
            hummingbird.ml.convert,
            model,
            "torch",
            extra_config={constants.TREE_IMPLEMENTATION: "nonsense"})

    # Test trees with TorchScript backend
    # Random forest binary classifier
    def test_random_forest_ts_classifier_binary_converter(self):
        self._run_tree_classification_converter(RandomForestClassifier,
                                                2,
                                                "torch.jit",
                                                n_estimators=10)

    # Random forest gemm classifier
    def test_random_forest_ts_gemm_classifier_converter(self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            2,
            "torch.jit",
            extra_config={constants.TREE_IMPLEMENTATION: "gemm"},
            n_estimators=10)

    # Random forest tree_trav classifier
    def test_random_forest_ts_tree_trav_classifier_converter(self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            2,
            "torch.jit",
            extra_config={constants.TREE_IMPLEMENTATION: "tree_trav"},
            n_estimators=10)

    # Random forest perf_tree_trav classifier
    def test_random_forest_ts_perf_tree_trav_classifier_converter(self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            2,
            "torch.jit",
            extra_config={constants.TREE_IMPLEMENTATION: "perf_tree_trav"},
            n_estimators=10,
        )

    # Random forest multi classifier
    def test_random_forest_ts_multi_classifier_converter(self):
        self._run_tree_classification_converter(RandomForestClassifier,
                                                3,
                                                "torch.jit",
                                                n_estimators=10)

    # Random forest gemm multi classifier
    def test_random_forest_ts_gemm_multi_classifier_converter(self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            3,
            "torch.jit",
            extra_config={constants.TREE_IMPLEMENTATION: "gemm"},
            n_estimators=10)

    # Random forest tree_trav multi classifier
    def test_random_forest_ts_tree_trav_multi_classifier_converter(self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            3,
            "torch.jit",
            extra_config={constants.TREE_IMPLEMENTATION: "tree_trav"},
            n_estimators=10)

    # Random forest perf_tree_trav multi classifier
    def test_random_forest_ts_perf_tree_trav_multi_classifier_converter(self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            3,
            "torch.jit",
            extra_config={constants.TREE_IMPLEMENTATION: "perf_tree_trav"},
            n_estimators=10,
        )

    # Random forest gemm classifier shifted classes
    def test_random_forest_ts_gemm_classifier_shifted_labels_converter(self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            3,
            "torch.jit",
            labels_shift=2,
            extra_config={constants.TREE_IMPLEMENTATION: "gemm"},
            n_estimators=10,
        )

    # Random forest tree_trav classifier shifted classes
    def test_random_forest_ts_tree_trav_classifier_shifted_labels_converter(
            self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            3,
            "torch.jit",
            labels_shift=2,
            extra_config={constants.TREE_IMPLEMENTATION: "tree_trav"},
            n_estimators=10,
        )

    # Random forest perf_tree_trav classifier shifted classes
    def test_random_forest_ts_perf_tree_trav_classifier_shifted_labels_converter(
            self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            3,
            "torch.jit",
            labels_shift=2,
            extra_config={constants.TREE_IMPLEMENTATION: "perf_tree_trav"},
            n_estimators=10,
        )

    # Random forest regressor
    def test_random_forest_ts_regressor_converter(self):
        self._run_tree_regressor_converter(RandomForestRegressor,
                                           1000,
                                           "torch.jit",
                                           n_estimators=10)

    # Random forest gemm regressor
    def test_random_forest_ts_gemm_regressor_converter(self):
        self._run_tree_regressor_converter(
            RandomForestRegressor,
            1000,
            "torch.jit",
            extra_config={constants.TREE_IMPLEMENTATION: "gemm"},
            n_estimators=10)

    # Random forest tree_trav regressor
    def test_random_forest_ts_tree_trav_regressor_converter(self):
        self._run_tree_regressor_converter(
            RandomForestRegressor,
            1000,
            "torch.jit",
            extra_config={constants.TREE_IMPLEMENTATION: "tree_trav"},
            n_estimators=10,
        )

    # Random forest perf_tree_trav regressor
    def test_random_forest_ts_perf_tree_trav_regressor_converter(self):
        self._run_tree_regressor_converter(
            RandomForestRegressor,
            1000,
            "torch.jit",
            extra_config={constants.TREE_IMPLEMENTATION: "perf_tree_trav"},
            n_estimators=10,
        )

    # Extra trees regressor
    def test_extra_trees_ts_regressor_converter(self):
        self._run_tree_regressor_converter(ExtraTreesRegressor,
                                           1000,
                                           "torch.jit",
                                           n_estimators=10)

    # Extra trees gemm regressor
    def test_extra_trees_ts_gemm_regressor_converter(self):
        self._run_tree_regressor_converter(
            ExtraTreesRegressor,
            1000,
            "torch.jit",
            extra_config={constants.TREE_IMPLEMENTATION: "gemm"},
            n_estimators=10)

    # Extra trees tree_trav regressor
    def test_extra_trees_ts_tree_trav_regressor_converter(self):
        self._run_tree_regressor_converter(
            ExtraTreesRegressor,
            1000,
            "torch.jit",
            extra_config={constants.TREE_IMPLEMENTATION: "tree_trav"},
            n_estimators=10)

    # Extra trees perf_tree_trav regressor
    def test_extra_trees_ts_perf_tree_trav_regressor_converter(self):
        self._run_tree_regressor_converter(
            ExtraTreesRegressor,
            1000,
            "torch.jit",
            extra_config={constants.TREE_IMPLEMENTATION: "perf_tree_trav"},
            n_estimators=10,
        )

    # Decision tree regressor
    def test_decision_tree_ts_regressor_converter(self):
        self._run_tree_regressor_converter(DecisionTreeRegressor, 1000,
                                           "torch.jit")

    # Decision tree gemm regressor
    def test_decision_tree_ts_gemm_regressor_converter(self):
        self._run_tree_regressor_converter(
            DecisionTreeRegressor,
            1000,
            "torch.jit",
            extra_config={constants.TREE_IMPLEMENTATION: "gemm"})

    # Decision tree tree_trav regressor
    def test_decision_tree_ts_tree_trav_regressor_converter(self):
        self._run_tree_regressor_converter(
            DecisionTreeRegressor,
            1000,
            "torch.jit",
            extra_config={constants.TREE_IMPLEMENTATION: "tree_trav"})

    # Decision tree perf_tree_trav regressor
    def test_decision_tree_ts_perf_tree_trav_regressor_converter(self):
        self._run_tree_regressor_converter(
            DecisionTreeRegressor,
            1000,
            "torch.jit",
            extra_config={constants.TREE_IMPLEMENTATION: "perf_tree_trav"})

    # Decision tree classifier
    def test_decision_tree_ts_classifier_converter(self):
        self._run_tree_classification_converter(
            DecisionTreeClassifier,
            3,
            "torch.jit",
        )

    # Extra trees classifier
    def test_extra_trees_ts_classifier_converter(self):
        self._run_tree_classification_converter(ExtraTreesClassifier,
                                                3,
                                                "torch.jit",
                                                n_estimators=10)

    # Test trees with TVM backend
    # Random forest gemm classifier
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_random_forest_tvm_gemm_classifier_converter(self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            2,
            "tvm",
            extra_config={
                constants.TREE_IMPLEMENTATION: "gemm",
                constants.TVM_MAX_FUSE_DEPTH: 30
            },
            n_estimators=10,
        )

    # Random forest tree_trav classifier
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_random_forest_tvm_tree_trav_classifier_converter(self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            2,
            "tvm",
            extra_config={
                constants.TREE_IMPLEMENTATION: "tree_trav",
                constants.TVM_MAX_FUSE_DEPTH: 30
            },
            n_estimators=10,
        )

    # Random forest perf_tree_trav classifier
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_random_forest_tvm_perf_tree_trav_classifier_converter(self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            2,
            "tvm",
            extra_config={
                constants.TREE_IMPLEMENTATION: "perf_tree_trav",
                constants.TVM_MAX_FUSE_DEPTH: 30
            },
            n_estimators=10,
        )

    # Random forest gemm multi classifier
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_random_forest_tvm_gemm_multi_classifier_converter(self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            3,
            "tvm",
            extra_config={
                constants.TREE_IMPLEMENTATION: "gemm",
                constants.TVM_MAX_FUSE_DEPTH: 30
            },
            n_estimators=10,
        )

    # Random forest tree_trav multi classifier
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_random_forest_tvm_tree_trav_multi_classifier_converter(self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            3,
            "tvm",
            extra_config={
                constants.TREE_IMPLEMENTATION: "tree_trav",
                constants.TVM_MAX_FUSE_DEPTH: 30
            },
            n_estimators=10,
        )

    # Random forest perf_tree_trav multi classifier
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_random_forest_tvm_perf_tree_trav_multi_classifier_converter(self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            3,
            "tvm",
            extra_config={
                constants.TREE_IMPLEMENTATION: "perf_tree_trav",
                constants.TVM_MAX_FUSE_DEPTH: 30
            },
            n_estimators=10,
        )

    # Random forest gemm classifier shifted classes
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_random_forest_tvm_gemm_classifier_shifted_labels_converter(self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            3,
            "tvm",
            labels_shift=2,
            extra_config={
                constants.TREE_IMPLEMENTATION: "gemm",
                constants.TVM_MAX_FUSE_DEPTH: 30
            },
            n_estimators=10,
        )

    # Random forest tree_trav classifier shifted classes
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_random_forest_tvm_tree_trav_classifier_shifted_labels_converter(
            self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            3,
            "tvm",
            labels_shift=2,
            extra_config={
                constants.TREE_IMPLEMENTATION: "tree_trav",
                constants.TVM_MAX_FUSE_DEPTH: 30
            },
            n_estimators=10,
        )

    # Random forest perf_tree_trav classifier shifted classes
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_random_forest_tvm_perf_tree_trav_classifier_shifted_labels_converter(
            self):
        self._run_tree_classification_converter(
            RandomForestClassifier,
            3,
            "tvm",
            labels_shift=2,
            extra_config={
                constants.TREE_IMPLEMENTATION: "perf_tree_trav",
                constants.TVM_MAX_FUSE_DEPTH: 10
            },
            n_estimators=10,
        )

    # Random forest gemm regressor
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_random_forest_tvm_gemm_regressor_converter(self):
        self._run_tree_regressor_converter(
            RandomForestRegressor,
            1000,
            "tvm",
            extra_config={
                constants.TREE_IMPLEMENTATION: "gemm",
                constants.TVM_MAX_FUSE_DEPTH: 30
            },
            n_estimators=10,
        )

    # Random forest tree_trav regressor
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_random_forest_tvm_tree_trav_regressor_converter(self):
        self._run_tree_regressor_converter(
            RandomForestRegressor,
            1000,
            "tvm",
            extra_config={
                constants.TREE_IMPLEMENTATION: "tree_trav",
                constants.TVM_MAX_FUSE_DEPTH: 30
            },
            n_estimators=10,
        )

    # Random forest perf_tree_trav regressor
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_random_forest_tvm_perf_tree_trav_regressor_converter(self):
        self._run_tree_regressor_converter(
            RandomForestRegressor,
            1000,
            "tvm",
            extra_config={
                constants.TREE_IMPLEMENTATION: "perf_tree_trav",
                constants.TVM_MAX_FUSE_DEPTH: 10
            },
            n_estimators=10,
        )

    # Extra trees gemm regressor
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_extra_trees_tvm_gemm_regressor_converter(self):
        self._run_tree_regressor_converter(
            ExtraTreesRegressor,
            1000,
            "tvm",
            extra_config={
                constants.TREE_IMPLEMENTATION: "gemm",
                constants.TVM_MAX_FUSE_DEPTH: 30
            },
            n_estimators=10,
        )

    # Extra trees tree_trav regressor
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_extra_trees_tvm_tree_trav_regressor_converter(self):
        self._run_tree_regressor_converter(
            ExtraTreesRegressor,
            1000,
            "tvm",
            extra_config={
                constants.TREE_IMPLEMENTATION: "tree_trav",
                constants.TVM_MAX_FUSE_DEPTH: 30
            },
            n_estimators=10,
        )

    # Extra trees perf_tree_trav regressor
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_extra_trees_tvm_perf_tree_trav_regressor_converter(self):
        self._run_tree_regressor_converter(
            ExtraTreesRegressor,
            1000,
            "tvm",
            extra_config={
                constants.TREE_IMPLEMENTATION: "perf_tree_trav",
                constants.TVM_MAX_FUSE_DEPTH: 10
            },
            n_estimators=10,
        )

    # Decision tree regressor
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_decision_tree_tvm_regressor_converter(self):
        self._run_tree_regressor_converter(
            DecisionTreeRegressor,
            1000,
            "tvm",
            extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})

    # Decision tree gemm regressor
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_decision_tree_tvm_gemm_regressor_converter(self):
        self._run_tree_regressor_converter(
            DecisionTreeRegressor,
            1000,
            "tvm",
            extra_config={
                constants.TREE_IMPLEMENTATION: "gemm",
                constants.TVM_MAX_FUSE_DEPTH: 30
            },
        )

    # Decision tree tree_trav regressor
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_decision_tree_tvm_tree_trav_regressor_converter(self):
        self._run_tree_regressor_converter(
            DecisionTreeRegressor,
            1000,
            "tvm",
            extra_config={
                constants.TREE_IMPLEMENTATION: "tree_trav",
                constants.TVM_MAX_FUSE_DEPTH: 30
            },
        )

    # Decision tree perf_tree_trav regressor
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_decision_tree_tvm_perf_tree_trav_regressor_converter(self):
        self._run_tree_regressor_converter(
            DecisionTreeRegressor,
            1000,
            "tvm",
            extra_config={
                constants.TREE_IMPLEMENTATION: "perf_tree_trav",
                constants.TVM_MAX_FUSE_DEPTH: 10
            },
        )

    # Decision tree classifier
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_decision_tree_tvm_classifier_converter(self):
        self._run_tree_classification_converter(
            DecisionTreeClassifier,
            3,
            "tvm",
            extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})

    # Extra trees classifier
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_extra_trees_tvm_classifier_converter(self):
        self._run_tree_classification_converter(
            ExtraTreesClassifier,
            3,
            "tvm",
            n_estimators=10,
            extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})

    # TreeRegressor multioutput regression
    def test_tree_regressors_multioutput_regression(self):
        for tree_method in ['gemm', 'tree_trav', 'perf_tree_trav']:
            for n_targets in [1, 2, 7]:
                for tree_class in [
                        DecisionTreeRegressor, ExtraTreesRegressor,
                        RandomForestRegressor
                ]:
                    model = tree_class()
                    X, y = datasets.make_regression(n_samples=100,
                                                    n_features=10,
                                                    n_informative=5,
                                                    n_targets=n_targets,
                                                    random_state=2021)
                    model.fit(X, y)

                    torch_model = hummingbird.ml.convert(
                        model,
                        "torch",
                        extra_config={
                            constants.TREE_IMPLEMENTATION: tree_method
                        })
                    self.assertTrue(torch_model is not None)
                    np.testing.assert_allclose(model.predict(X),
                                               torch_model.predict(X),
                                               rtol=1e-5,
                                               atol=1e-5)
class TestSklearnScalerConverter(unittest.TestCase):
    def _test_robust_scaler_floats(self,
                                   with_centering,
                                   with_scaling,
                                   backend="torch"):
        # Generate a random 2D array with values in [0, 1000)
        np.random.seed(0)
        data = np.random.rand(100, 200) * 1000
        data = np.array(data, dtype=np.float32)
        data_tensor = torch.from_numpy(data)

        model = RobustScaler(with_centering=with_centering,
                             with_scaling=with_scaling)
        model.fit(data)
        torch_model = hummingbird.ml.convert(model, backend, data)
        self.assertIsNotNone(torch_model)
        np.testing.assert_allclose(model.transform(data),
                                   torch_model.transform(data_tensor),
                                   rtol=1e-06,
                                   atol=1e-06)

    def _test_standard_scaler_floats(self,
                                     with_mean,
                                     with_std,
                                     backend="torch"):
        # Generate a random 2D array with values in [0, 1000)
        np.random.seed(0)
        data = np.random.rand(100, 200) * 1000
        data = np.array(data, dtype=np.float32)
        data_tensor = torch.from_numpy(data)

        model = StandardScaler(with_mean=with_mean, with_std=with_std)
        model.fit(data)
        torch_model = hummingbird.ml.convert(model, backend, data)
        self.assertIsNotNone(torch_model)
        np.testing.assert_allclose(model.transform(data),
                                   torch_model.transform(data_tensor),
                                   rtol=1e-06,
                                   atol=1e-06)

    def test_robust_scaler_floats_torch_false_false(self):
        self._test_robust_scaler_floats(False, False)

    def test_robust_scaler_floats_torch_true_false(self):
        self._test_robust_scaler_floats(True, False)

    def test_robust_scaler_floats_torch_falser_true(self):
        self._test_robust_scaler_floats(False, True)

    def test_robust_scaler_floats_torch_true_true(self):
        self._test_robust_scaler_floats(True, True)

    def test_standard_scaler_floats_torch_false_false(self):
        self._test_standard_scaler_floats(False, False)

    def test_standard_scaler_floats_torch_true_false(self):
        self._test_standard_scaler_floats(True, False)

    def test_standard_scaler_floats_torch_true_true(self):
        self._test_standard_scaler_floats(True, True)

    def test_max_abs_scaler_floats(self):
        # Generate a random 2D array with values in [0, 1000)
        np.random.seed(0)
        data = np.random.rand(100, 200) * 1000
        data = np.array(data, dtype=np.float32)
        data_tensor = torch.from_numpy(data)

        model = MaxAbsScaler()
        model.fit(data)
        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertIsNotNone(torch_model)
        np.testing.assert_allclose(model.transform(data),
                                   torch_model.transform(data_tensor),
                                   rtol=1e-06,
                                   atol=1e-06)

    def test_min_max_scaler_floats(self):
        # Generate a random 2D array with values in [0, 1000)
        np.random.seed(0)
        data = np.random.rand(100, 200) * 1000
        data = np.array(data, dtype=np.float32)
        data_tensor = torch.from_numpy(data)

        model = MinMaxScaler()
        model.fit(data)
        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertIsNotNone(torch_model)
        np.testing.assert_allclose(model.transform(data),
                                   torch_model.transform(data_tensor),
                                   rtol=1e-06,
                                   atol=1e-06)

    # Float 64 data tests
    def test_float64_robust_scaler_floats(self):
        # Generate a random 2D array with values in [0, 1000)
        np.random.seed(0)
        data = np.random.rand(100, 200) * 1000
        data_tensor = torch.from_numpy(data)

        model = RobustScaler(with_centering=False, with_scaling=False)
        model.fit(data)
        torch_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(torch_model)
        np.testing.assert_allclose(model.transform(data),
                                   torch_model.transform(data_tensor),
                                   rtol=1e-06,
                                   atol=1e-06)

    # Tests with TVM backend
    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_standard_scaler_floats_tvm_false_false(self):
        self._test_standard_scaler_floats(False, False, "tvm")

    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_standard_scaler_floats_tvm_true_false(self):
        self._test_standard_scaler_floats(True, False, "tvm")

    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_standard_scaler_floats_tvm_true_true(self):
        self._test_standard_scaler_floats(True, True, "tvm")

    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_robust_scaler_floats_tvm_false_false(self):
        self._test_robust_scaler_floats(False, False, "tvm")

    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_robust_scaler_floats_tvm_true_false(self):
        self._test_robust_scaler_floats(True, False, "tvm")

    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_robust_scaler_floats_tvm_false_true(self):
        self._test_robust_scaler_floats(False, True, "tvm")

    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_robust_scaler_floats_tvm_true_true(self):
        self._test_robust_scaler_floats(True, True, "tvm")
示例#21
0
class TestBackends(unittest.TestCase):
    # Test backends are browsable
    def test_backends(self):
        warnings.filterwarnings("ignore")
        self.assertTrue(len(hummingbird.ml.backends) > 0)

    # Test backends are not case sensitive
    def test_backends_case_sensitive(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tOrCh")
        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

    # Test pytorch is still a valid backend name
    def test_backends_pytorch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "pytOrCh")
        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

    # Test pytorch save and load
    def test_pytorch_save_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(hb_model)
        hb_model.save("pt-tmp")

        hb_model_loaded = hummingbird.ml.TorchContainer.load("pt-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("pt-tmp.zip")
        shutil.rmtree("pt-tmp")

    # Test pytorch save and generic load
    def test_pytorch_save_generic_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(hb_model)
        hb_model.save("pt-tmp")

        hb_model_loaded = hummingbird.ml.load("pt-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("pt-tmp.zip")
        shutil.rmtree("pt-tmp")

    # Test torchscript save and load
    def test_torchscript_save_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch.jit", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("ts-tmp")

        hb_model_loaded = hummingbird.ml.TorchContainer.load("ts-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("ts-tmp.zip")
        shutil.rmtree("ts-tmp")

    # Test torchscript save and generic load
    def test_torchscript_save_generic_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch.jit", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("ts-tmp")

        hb_model_loaded = hummingbird.ml.load("ts-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("ts-tmp.zip")
        shutil.rmtree("ts-tmp")

    # Test not supported backends
    def test_unsupported_backend(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Test scala backend rises an exception
        self.assertRaises(MissingBackend, hummingbird.ml.convert, model,
                          "scala")

    # Test torchscript requires test_data
    def test_torchscript_test_data(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Test torcscript requires test_input
        self.assertRaises(RuntimeError, hummingbird.ml.convert, model,
                          "torch.jit")

    # Test TVM requires test_data
    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_tvm_test_data(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Test tvm requires test_input
        self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "tvm")

    # Test tvm save and load
    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_tvm_save_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("tvm-tmp")

        hb_model_loaded = hummingbird.ml.TVMContainer.load("tvm-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("tvm-tmp.zip")
        shutil.rmtree("tvm-tmp")

    # Test tvm save and generic load
    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_tvm_save_generic_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("tvm-tmp")

        hb_model_loaded = hummingbird.ml.load("tvm-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("tvm-tmp.zip")
        shutil.rmtree("tvm-tmp")

    # Test tvm save and load zip file
    @unittest.skipIf(not tvm_installed(),
                     reason="TVM test requires TVM installed")
    def test_tvm_save_load_zip(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("tvm-tmp.zip")

        hb_model_loaded = hummingbird.ml.TVMContainer.load("tvm-tmp.zip")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("tvm-tmp.zip")
        shutil.rmtree("tvm-tmp")

    # Test onnx requires test_data or initial_types
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_no_test_data_float(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", FloatTensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=11)

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx 0 shape input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_zero_shape_input(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(model,
                                        initial_types=[
                                            ("input",
                                             DoubleTensorType([0, X.shape[1]]))
                                        ],
                                        target_opset=11)

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, double input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_no_test_data_double(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", DoubleTensorType([X.shape[0],
                                                       X.shape[1]]))],
            target_opset=11)

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, long input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_no_test_data_long(self):
        warnings.filterwarnings("ignore")
        model = model = StandardScaler(with_mean=True, with_std=True)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.int64)

        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", Int64TensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=11)

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, int input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_no_test_data_int(self):
        warnings.filterwarnings("ignore")
        model = OneHotEncoder()
        X = np.array([[1, 2, 3]], dtype=np.int32)
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", Int32TensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=11)

        # Test onnx requires no test_data
        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx")
        assert hb_model

    # Test onnx no test_data, string input
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_no_test_data_string(self):
        warnings.filterwarnings("ignore")
        model = OneHotEncoder()
        X = np.array([["a", "b", "c"]])
        model.fit(X)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model,
            initial_types=[("input", StringTensorType([X.shape[0],
                                                       X.shape[1]]))],
            target_opset=11)

        # Test backends are not case sensitive
        self.assertRaises(RuntimeError, hummingbird.ml.convert, onnx_ml_model,
                          "onnx")

    # Test ONNX save and load
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_save_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "onnx", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("onnx-tmp")

        hb_model_loaded = hummingbird.ml.ONNXContainer.load("onnx-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("onnx-tmp.zip")
        shutil.rmtree("onnx-tmp")

    # Test ONNX save and generic load
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    def test_onnx_save_generic_load(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10,
                                           max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "onnx", X)
        self.assertIsNotNone(hb_model)
        hb_model.save("onnx-tmp")

        hb_model_loaded = hummingbird.ml.load("onnx-tmp")
        np.testing.assert_allclose(hb_model_loaded.predict_proba(X),
                                   hb_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

        os.remove("onnx-tmp.zip")
        shutil.rmtree("onnx-tmp")

    # Test for when the user forgets to add a target (ex: convert(model, output) rather than convert(model, 'torch')) due to API change
    def test_forgotten_backend_string(self):
        from sklearn.preprocessing import LabelEncoder

        model = LabelEncoder()
        data = np.array([1, 4, 5, 2, 0, 2], dtype=np.int32)
        model.fit(data)

        self.assertRaises(ValueError, hummingbird.ml.convert, model,
                          [("input", Int32TensorType([6, 1]))])
示例#22
0
class TestSklearnLinearClassifiers(unittest.TestCase):

    # LogisticRegression test function to be parameterized
    def _test_logistic_regression(self,
                                  num_classes,
                                  solver="liblinear",
                                  multi_class="auto",
                                  labels_shift=0):
        if num_classes > 2:
            model = LogisticRegression(solver=solver,
                                       multi_class=multi_class,
                                       fit_intercept=True)
        else:
            model = LogisticRegression(solver="liblinear", fit_intercept=True)

        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100) + labels_shift

        model.fit(X, y)

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict_proba(X),
                                   torch_model.predict_proba(X),
                                   rtol=1e-6,
                                   atol=1e-6)

    # LogisticRegression binary
    def test_logistic_regression_bi(self):
        self._test_logistic_regression(2)

    # LogisticRegression multiclass with auto
    def test_logistic_regression_multi_auto(self):
        self._test_logistic_regression(3)

    # LogisticRegression with class labels shifted
    def test_logistic_regression_shifted_classes(self):
        self._test_logistic_regression(3, labels_shift=2)

    # LogisticRegression with multi+ovr
    def test_logistic_regression_multi_ovr(self):
        self._test_logistic_regression(3, multi_class="ovr")

    # LogisticRegression with multi+multinomial+sag
    def test_logistic_regression_multi_multin_sag(self):
        warnings.filterwarnings("ignore")
        # this will not converge due to small test size
        self._test_logistic_regression(3,
                                       multi_class="multinomial",
                                       solver="sag")

    # LogisticRegression binary lbfgs
    def test_logistic_regression_bi_lbfgs(self):
        warnings.filterwarnings("ignore")
        # this will not converge due to small test size
        self._test_logistic_regression(2, solver="lbfgs")

    # LogisticRegression with multi+lbfgs
    def test_logistic_regression_multi_lbfgs(self):
        warnings.filterwarnings("ignore")
        # this will not converge due to small test size
        self._test_logistic_regression(3, solver="lbfgs")

    # LogisticRegression with multi+multinomial+lbfgs
    def test_logistic_regression_multi_multin_lbfgs(self):
        warnings.filterwarnings("ignore")
        # this will not converge due to small test size
        self._test_logistic_regression(3,
                                       multi_class="multinomial",
                                       solver="lbfgs")

    # LogisticRegression with multi+ovr+lbfgs
    def test_logistic_regression_multi_ovr_lbfgs(self):
        warnings.filterwarnings("ignore")
        # this will not converge due to small test size
        self._test_logistic_regression(3, multi_class="ovr", solver="lbfgs")

    # LinearRegression test function to be parameterized
    def _test_linear_regression(self, y_input):
        model = LinearRegression()

        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = y_input

        model.fit(X, y)

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict(X),
                                   torch_model.predict(X),
                                   rtol=1e-6,
                                   atol=1e-6)

    # LinearRegression with ints
    def test_linear_regression_int(self):
        np.random.seed(0)
        self._test_linear_regression(np.random.randint(2, size=100))

    # LinearRegression with floats
    def test_linear_regression_float(self):
        np.random.seed(0)
        self._test_linear_regression(np.random.rand(100))

    # LogisticRegressionCV test function to be parameterized
    def _test_logistic_regression_cv(self,
                                     num_classes,
                                     solver="liblinear",
                                     multi_class="auto",
                                     labels_shift=0):
        if num_classes > 2:
            model = LogisticRegressionCV(solver=solver,
                                         multi_class=multi_class,
                                         fit_intercept=True)
        else:
            model = LogisticRegressionCV(solver="liblinear",
                                         fit_intercept=True)

        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100) + labels_shift

        model.fit(X, y)
        torch_model = hummingbird.ml.convert(model, "torch")
        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict_proba(X),
                                   torch_model.predict_proba(X),
                                   rtol=1e-6,
                                   atol=1e-6)

    # LogisticRegressionCV with 2 classes
    def test_logistic_regression_cv_bi(self):
        self._test_logistic_regression_cv(2)

    # LogisticRegressionCV with 3 classes
    def test_logistic_regression_cv_multi(self):
        self._test_logistic_regression_cv(3)

    # LogisticRegressionCV with shifted classes
    def test_logistic_regression_cv_shifted_classes(self):
        self._test_logistic_regression_cv(3, labels_shift=2)

    # LogisticRegressionCV with multi+ovr
    def test_logistic_regression_cv_multi_ovr(self):
        self._test_logistic_regression_cv(3, multi_class="ovr")

    # LogisticRegressionCV with multi+multinomial
    def test_logistic_regression_cv_multi_multin(self):
        warnings.filterwarnings("ignore")
        # this will not converge due to small test size
        self._test_logistic_regression_cv(3,
                                          multi_class="multinomial",
                                          solver="sag")

    # SGDClassifier test function to be parameterized
    def _test_sgd_classifier(self, num_classes):

        model = SGDClassifier(loss="log")

        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        torch_model = hummingbird.ml.convert(model, "torch")
        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict(X),
                                   torch_model.predict(X),
                                   rtol=1e-6,
                                   atol=1e-6)

    # SGDClassifier with 2 classes
    def test_sgd_classifier_bi(self):
        self._test_sgd_classifier(2)

    # SGDClassifier with 3 classes
    def test_sgd_classifier_multi(self):
        self._test_sgd_classifier(3)

    # Failure Cases
    def test_sklearn_linear_model_raises_wrong_type(self):
        warnings.filterwarnings("ignore")
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(3, size=100).astype(
            np.float32)  # y must be int, not float, should error
        model = SGDClassifier().fit(X, y)
        self.assertRaises(RuntimeError, hummingbird.ml.convert, model, "torch")

    # Float 64 data tests
    def test_float64_linear_regression(self):
        model = LinearRegression()

        np.random.seed(0)
        X = np.random.rand(100, 200)
        y = np.random.randint(2, size=100)

        model.fit(X, y)

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict(X),
                                   torch_model.predict(X),
                                   rtol=1e-6,
                                   atol=1e-6)

    def test_float64_sgd_classifier(self):

        model = SGDClassifier(loss="log")

        np.random.seed(0)
        num_classes = 3
        X = np.random.rand(100, 200)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        torch_model = hummingbird.ml.convert(model, "torch")
        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict(X),
                                   torch_model.predict(X),
                                   rtol=1e-6,
                                   atol=1e-6)

    # Test Torschscript backend.
    def test_logistic_regression_ts(self):

        model = LogisticRegression(solver="liblinear")

        data = datasets.load_iris()
        X, y = data.data, data.target
        X = X.astype(np.float32)

        model.fit(X, y)

        ts_model = hummingbird.ml.convert(model, "torch.jit", X)
        self.assertTrue(ts_model is not None)
        np.testing.assert_allclose(model.predict(X),
                                   ts_model.predict(X),
                                   rtol=1e-6,
                                   atol=1e-6)
        np.testing.assert_allclose(model.predict_proba(X),
                                   ts_model.predict_proba(X),
                                   rtol=1e-6,
                                   atol=1e-6)

    # Test TVM backends.
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_sgd_classifier_tvm(self):

        model = SGDClassifier(loss="log")

        np.random.seed(0)
        num_classes = 3
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        tvm_model = hummingbird.ml.convert(model, "tvm", X)
        self.assertTrue(tvm_model is not None)
        np.testing.assert_allclose(model.predict(X),
                                   tvm_model.predict(X),
                                   rtol=1e-6,
                                   atol=1e-6)
        np.testing.assert_allclose(model.predict_proba(X),
                                   tvm_model.predict_proba(X),
                                   rtol=1e-6,
                                   atol=1e-6)

    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_lr_tvm(self):

        model = LinearRegression()

        np.random.seed(0)
        num_classes = 1000
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        tvm_model = hummingbird.ml.convert(
            model, "tvm", X, extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})
        self.assertTrue(tvm_model is not None)

        np.testing.assert_allclose(model.predict(X),
                                   tvm_model.predict(X),
                                   rtol=1e-6,
                                   atol=1e-3)
示例#23
0
class TestExtraConf(unittest.TestCase):
    # Test default number of threads. It will only work on mac after 1.6 https://github.com/pytorch/pytorch/issues/43036
    @unittest.skipIf(
        sys.platform == "darwin" and LooseVersion(torch.__version__) <= LooseVersion("1.6.0"),
        reason="PyTorch has a bug on mac related to multi-threading",
    )
    def test_torch_deafault_n_threads(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch")

        self.assertIsNotNone(hb_model)
        self.assertTrue(torch.get_num_threads() == psutil.cpu_count(logical=False))
        self.assertTrue(torch.get_num_interop_threads() == 1)

    # Test one thread in pytorch.
    @unittest.skipIf(
        sys.platform == "darwin" and LooseVersion(torch.__version__) > LooseVersion("1.6.0"),
        reason="Setting threading multi times will break on mac",
    )
    def test_torch_one_thread(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch", extra_config={constants.N_THREADS: 1})

        self.assertIsNotNone(hb_model)
        self.assertTrue(torch.get_num_threads() == 1)
        self.assertTrue(torch.get_num_interop_threads() == 1)

    # Test default number of threads onnx.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_deafault_n_threads(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9
        )

        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx", X)

        self.assertIsNotNone(hb_model)
        self.assertTrue(hb_model._session.get_session_options().intra_op_num_threads == psutil.cpu_count(logical=False))
        self.assertTrue(hb_model._session.get_session_options().inter_op_num_threads == 1)

    # Test one thread onnx.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_one_thread(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "onnx", X, extra_config={constants.N_THREADS: 1})

        self.assertIsNotNone(hb_model)
        self.assertTrue(hb_model._session.get_session_options().intra_op_num_threads == 1)
        self.assertTrue(hb_model._session.get_session_options().inter_op_num_threads == 1)

    # Test pytorch regressor with batching.
    def test_torch_regression_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test pytorch classifier with batching.
    def test_torch_classification_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06)

    # Test pytorch classifier with batching.
    def test_torch_iforest_batch(self):
        warnings.filterwarnings("ignore")
        num_classes = 2
        model = IsolationForest(n_estimators=10, max_samples=2)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06)

    # Test pytorch regressor with batching and uneven rows.
    def test_torch_batch_regression_uneven(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(105, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=105)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test pytorch classification with batching and uneven rows.
    def test_torch_batch_classification_uneven(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(105, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=105)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test pytorch transform with batching and uneven rows.
    def test_torch_batch_transform(self):
        warnings.filterwarnings("ignore")
        model = StandardScaler(with_mean=True, with_std=True)
        np.random.seed(0)
        X = np.random.rand(105, 200)
        X = np.array(X, dtype=np.float32)

        model.fit(X)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06)

    # Test torchscript regression with batching.
    def test_torchscript_regression_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(103, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=103)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test torchscript classification with batching.
    def test_torchscript_classification_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(103, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=103)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06)

    # Test torchscript iforest with batching.
    def test_torchscript_iforest_batch(self):
        warnings.filterwarnings("ignore")
        num_classes = 2
        model = IsolationForest(n_estimators=10, max_samples=2)
        np.random.seed(0)
        X = np.random.rand(103, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=103)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06)

    # Test torchscript transform with batching and uneven rows.
    def test_torchscript_batch_transform(self):
        warnings.filterwarnings("ignore")
        model = StandardScaler(with_mean=True, with_std=True)
        np.random.seed(0)
        X = np.random.rand(101, 200)
        X = np.array(X, dtype=np.float32)

        model.fit(X)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06)

    # Test onnx transform with batching and uneven rows.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_batch_transform(self):
        warnings.filterwarnings("ignore")
        model = StandardScaler(with_mean=True, with_std=True)
        np.random.seed(0)
        X = np.random.rand(101, 200)
        X = np.array(X, dtype=np.float32)

        model.fit(X)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06)

    # Test onnx regression with batching.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_regression_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(103, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=103)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test onnx classification with batching.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_classification_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(103, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=103)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06)

    # Test onnx iforest with batching.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_iforest_batch(self):
        warnings.filterwarnings("ignore")
        num_classes = 2
        model = IsolationForest(n_estimators=10, max_samples=2)
        np.random.seed(0)
        X = np.random.rand(103, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=103)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06)

    # Test tvm transform with batching.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_batch_transform(self):
        warnings.filterwarnings("ignore")
        model = StandardScaler(with_mean=True, with_std=True)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)

        model.fit(X)

        batch_size = 10
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :])

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06)

    # Test tvm regression with batching.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_regression_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(103, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=103)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test tvm classification with batching.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_classification_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        batch_size = 10
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :])

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06)

    # Test tvm iforest with batching.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_iforest_batch(self):
        warnings.filterwarnings("ignore")
        num_classes = 2
        model = IsolationForest(n_estimators=10, max_samples=2)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        batch_size = 10
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :])

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06)

    # Test tvm transform with batching and uneven numer of records.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_batch_remainder_transform(self):
        warnings.filterwarnings("ignore")
        model = StandardScaler(with_mean=True, with_std=True)
        np.random.seed(0)
        X = np.random.rand(105, 200)
        X = np.array(X, dtype=np.float32)

        model.fit(X)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06)

    # Test tvm regression with batching and uneven numer of records.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_regression_remainder_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(105, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=105)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test tvm classification with batching and uneven numer of records.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_classification_remainder_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(105, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=105)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06)

    # Test tvm iforest with batching and uneven numer of records.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_iforest_remainder_batch(self):
        warnings.filterwarnings("ignore")
        num_classes = 2
        model = IsolationForest(n_estimators=10, max_samples=2)
        np.random.seed(0)
        X = np.random.rand(105, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=105)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06)

    # Test batch with pandas.
    @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed")
    def test_pandas_batch(self):
        import pandas

        max_depth = 10
        iris = datasets.load_iris()
        X = iris.data[:149, :3]
        y = iris.target[:149]
        columns = ["vA", "vB", "vC"]
        X_train = pandas.DataFrame(X, columns=columns)

        pipeline = Pipeline(
            steps=[
                ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)),
                ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)),
            ]
        )

        pipeline.fit(X_train, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        torch_model = hummingbird.ml.convert_batch(
            pipeline, "torch", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size
        )

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            pipeline.predict_proba(X_train), torch_model.predict_proba(X_train), rtol=1e-06, atol=1e-06,
        )

    # Test batch with pandas ts.
    @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed")
    def test_pandas_batch_ts(self):
        import pandas

        max_depth = 10
        iris = datasets.load_iris()
        X = iris.data[:149, :3]
        y = iris.target[:149]
        columns = ["vA", "vB", "vC"]
        X_train = pandas.DataFrame(X, columns=columns)

        pipeline = Pipeline(
            steps=[
                ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)),
                ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)),
            ]
        )

        pipeline.fit(X_train, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        torch_model = hummingbird.ml.convert_batch(
            pipeline, "torch.jit", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size
        )

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            pipeline.predict_proba(X_train), torch_model.predict_proba(X_train), rtol=1e-06, atol=1e-06,
        )

    # Test batch with pandas onnx.
    @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed")
    @unittest.skipIf(not onnx_runtime_installed(), reason="ONNXML test require ONNX and ORT")
    def test_pandas_batch_onnx(self):
        import pandas

        max_depth = 10
        iris = datasets.load_iris()
        X = iris.data[:149, :3]
        y = iris.target[:149]
        columns = ["vA", "vB", "vC"]
        X_train = pandas.DataFrame(X, columns=columns)

        pipeline = Pipeline(
            steps=[
                ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)),
                ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)),
            ]
        )

        pipeline.fit(X_train, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(
            pipeline, "onnx", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size
        )

        self.assertTrue(hb_model is not None)

        np.testing.assert_allclose(
            pipeline.predict_proba(X_train), hb_model.predict_proba(X_train), rtol=1e-06, atol=1e-06,
        )

    # Test batch with pandas from onnxml.
    @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed")
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_pandas_batch_onnxml(self):
        import pandas

        max_depth = 10
        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        columns = ["vA", "vB", "vC"]
        X_train = pandas.DataFrame(X, columns=columns)

        pipeline = Pipeline(
            steps=[
                ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)),
                ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)),
            ]
        )

        pipeline.fit(X_train, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            pipeline,
            initial_types=[
                ("vA", DoubleTensorType([X.shape[0], 1])),
                ("vB", DoubleTensorType([X.shape[0], 1])),
                ("vC", DoubleTensorType([X.shape[0], 1])),
            ],
            target_opset=9,
        )

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(
            onnx_ml_model, "onnx", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size
        )

        self.assertTrue(hb_model is not None)

        np.testing.assert_allclose(
            pipeline.predict_proba(X_train), hb_model.predict_proba(X_train), rtol=1e-06, atol=1e-06,
        )

    # Test batch with pandas tvm.
    @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed")
    @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM")
    def test_pandas_batch_tvm(self):
        import pandas

        max_depth = 10
        iris = datasets.load_iris()
        X = iris.data[:149, :3]
        y = iris.target[:149]
        columns = ["vA", "vB", "vC"]
        X_train = pandas.DataFrame(X, columns=columns)

        pipeline = Pipeline(
            steps=[
                ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)),
                ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)),
            ]
        )

        pipeline.fit(X_train, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(
            pipeline, "tvm", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size
        )

        self.assertTrue(hb_model is not None)

        np.testing.assert_allclose(
            pipeline.predict_proba(X_train), hb_model.predict_proba(X_train), rtol=1e-06, atol=1e-06,
        )

    # Check converter with model name set as extra_config.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_pytorch_extra_config(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_lightgbm(
            model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9
        )

        # Create ONNX model
        model_name = "hummingbird.ml.test.lightgbm"
        onnx_model = hummingbird.ml.convert(onnx_ml_model, "onnx", extra_config={constants.ONNX_OUTPUT_MODEL_NAME: model_name})

        assert onnx_model.model.graph.name == model_name

    # Test max fuse depth configuration in TVM.
    @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed")
    def test_tvm_max_fuse(self):
        warnings.filterwarnings("ignore")

        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X, extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})
        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test TVM without padding returns an errror is sizes don't match.
    @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed")
    def test_tvm_no_padding(self):
        warnings.filterwarnings("ignore")

        np.random.seed(0)
        X = np.random.rand(100, 20)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=100)
        model = lgb.LGBMRegressor(n_estimators=10)
        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X)
        self.assertIsNotNone(hb_model)
        self.assertRaises(AssertionError, hb_model.predict, X[:98])

    # Test padding in TVM.
    @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed")
    def test_tvm_padding(self):
        warnings.filterwarnings("ignore")

        np.random.seed(0)
        X = np.random.rand(100, 20)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=100)
        model = lgb.LGBMRegressor(n_estimators=10)
        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X, extra_config={constants.TVM_PAD_INPUT: True})
        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X[:98]), hb_model.predict(X[:98]), rtol=1e-06, atol=1e-06)

    # Test padding in TVM does not create problems when not necessary.
    @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed")
    def test_tvm_padding_2(self):
        warnings.filterwarnings("ignore")

        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X, extra_config={constants.TVM_PAD_INPUT: True})
        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test max string lentgh.
    def test_max_str_length(self):
        model = LabelEncoder()
        data = [
            "paris",
            "tokyo",
            "amsterdam",
            "tokyo",
        ]
        model.fit(data)

        torch_model = hummingbird.ml.convert(model, "torch", extra_config={constants.MAX_STRING_LENGTH: 20})

        np.testing.assert_allclose(model.transform(data), torch_model.transform(data), rtol=1e-06, atol=1e-06)
 def test_tvm_installed_false(self):
     warnings.filterwarnings("ignore")
     assert not tvm_installed()