Exemplo n.º 1
0
class TestNoExtra(unittest.TestCase):
    """
    These tests are meant to be run on a clean container after doing
    `pip install hummingbird-ml` without any of the `extra` packages
    """

    # Test no LGBM returns false on lightgbm_installed()
    @unittest.skipIf(lightgbm_installed(),
                     reason="Test when LightGBM is not installed")
    def test_lightgbm_installed_false(self):
        warnings.filterwarnings("ignore")
        assert not lightgbm_installed()

    # Test no XGB returns false on xgboost_installed()
    @unittest.skipIf(xgboost_installed(),
                     reason="Test when XGBoost is not installed")
    def test_xgboost_installed_false(self):
        warnings.filterwarnings("ignore")
        assert not xgboost_installed()

    # Test that we can import the converter successfully without installing [extra]
    def test_import_convert_no_extra(self):
        try:
            from hummingbird.ml import convert
        except Exception:  # TODO something more specific?
            self.fail(
                "Unexpected Error on importing convert without extra packages")
Exemplo n.º 2
0
class TestExtraConf(unittest.TestCase):
    # Test default number of threads. It will only work on mac after 1.6 https://github.com/pytorch/pytorch/issues/43036
    @unittest.skipIf(
        sys.platform == "darwin" and LooseVersion(torch.__version__) <= LooseVersion("1.6.0"),
        reason="PyTorch has a bug on mac related to multi-threading",
    )
    def test_torch_deafault_n_threads(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch")

        self.assertIsNotNone(hb_model)
        self.assertTrue(torch.get_num_threads() == psutil.cpu_count(logical=False))
        self.assertTrue(torch.get_num_interop_threads() == 1)

    # Test one thread in pytorch.
    @unittest.skipIf(
        sys.platform == "darwin" and LooseVersion(torch.__version__) > LooseVersion("1.6.0"),
        reason="Setting threading multi times will break on mac",
    )
    def test_torch_one_thread(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "torch", extra_config={constants.N_THREADS: 1})

        self.assertIsNotNone(hb_model)
        self.assertTrue(torch.get_num_threads() == 1)
        self.assertTrue(torch.get_num_interop_threads() == 1)

    # Test default number of threads onnx.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_deafault_n_threads(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9
        )

        hb_model = hummingbird.ml.convert(onnx_ml_model, "onnx", X)

        self.assertIsNotNone(hb_model)
        self.assertTrue(hb_model._session.get_session_options().intra_op_num_threads == psutil.cpu_count(logical=False))
        self.assertTrue(hb_model._session.get_session_options().inter_op_num_threads == 1)

    # Test one thread onnx.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_one_thread(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "onnx", X, extra_config={constants.N_THREADS: 1})

        self.assertIsNotNone(hb_model)
        self.assertTrue(hb_model._session.get_session_options().intra_op_num_threads == 1)
        self.assertTrue(hb_model._session.get_session_options().inter_op_num_threads == 1)

    # Test pytorch regressor with batching.
    def test_torch_regression_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test pytorch classifier with batching.
    def test_torch_classification_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06)

    # Test pytorch classifier with batching.
    def test_torch_iforest_batch(self):
        warnings.filterwarnings("ignore")
        num_classes = 2
        model = IsolationForest(n_estimators=10, max_samples=2)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06)

    # Test pytorch regressor with batching and uneven rows.
    def test_torch_batch_regression_uneven(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(105, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=105)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test pytorch classification with batching and uneven rows.
    def test_torch_batch_classification_uneven(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(105, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=105)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test pytorch transform with batching and uneven rows.
    def test_torch_batch_transform(self):
        warnings.filterwarnings("ignore")
        model = StandardScaler(with_mean=True, with_std=True)
        np.random.seed(0)
        X = np.random.rand(105, 200)
        X = np.array(X, dtype=np.float32)

        model.fit(X)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06)

    # Test torchscript regression with batching.
    def test_torchscript_regression_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(103, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=103)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test torchscript classification with batching.
    def test_torchscript_classification_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(103, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=103)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06)

    # Test torchscript iforest with batching.
    def test_torchscript_iforest_batch(self):
        warnings.filterwarnings("ignore")
        num_classes = 2
        model = IsolationForest(n_estimators=10, max_samples=2)
        np.random.seed(0)
        X = np.random.rand(103, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=103)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06)

    # Test torchscript transform with batching and uneven rows.
    def test_torchscript_batch_transform(self):
        warnings.filterwarnings("ignore")
        model = StandardScaler(with_mean=True, with_std=True)
        np.random.seed(0)
        X = np.random.rand(101, 200)
        X = np.array(X, dtype=np.float32)

        model.fit(X)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "torch.jit", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06)

    # Test onnx transform with batching and uneven rows.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_batch_transform(self):
        warnings.filterwarnings("ignore")
        model = StandardScaler(with_mean=True, with_std=True)
        np.random.seed(0)
        X = np.random.rand(101, 200)
        X = np.array(X, dtype=np.float32)

        model.fit(X)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06)

    # Test onnx regression with batching.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_regression_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(103, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=103)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test onnx classification with batching.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_classification_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(103, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=103)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06)

    # Test onnx iforest with batching.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_onnx_iforest_batch(self):
        warnings.filterwarnings("ignore")
        num_classes = 2
        model = IsolationForest(n_estimators=10, max_samples=2)
        np.random.seed(0)
        X = np.random.rand(103, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=103)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "onnx", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06)

    # Test tvm transform with batching.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_batch_transform(self):
        warnings.filterwarnings("ignore")
        model = StandardScaler(with_mean=True, with_std=True)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)

        model.fit(X)

        batch_size = 10
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :])

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06)

    # Test tvm regression with batching.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_regression_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(103, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=103)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size=remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test tvm classification with batching.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_classification_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        batch_size = 10
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :])

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06)

    # Test tvm iforest with batching.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_iforest_batch(self):
        warnings.filterwarnings("ignore")
        num_classes = 2
        model = IsolationForest(n_estimators=10, max_samples=2)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)

        batch_size = 10
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :])

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06)

    # Test tvm transform with batching and uneven numer of records.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_batch_remainder_transform(self):
        warnings.filterwarnings("ignore")
        model = StandardScaler(with_mean=True, with_std=True)
        np.random.seed(0)
        X = np.random.rand(105, 200)
        X = np.array(X, dtype=np.float32)

        model.fit(X)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.transform(X), hb_model.transform(X), rtol=1e-06, atol=1e-06)

    # Test tvm regression with batching and uneven numer of records.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_regression_remainder_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingRegressor(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(105, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=105)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test tvm classification with batching and uneven numer of records.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_classification_remainder_batch(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
        num_classes = 2
        model = GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)
        np.random.seed(0)
        X = np.random.rand(105, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=105)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.predict_proba(X), hb_model.predict_proba(X), rtol=1e-06, atol=1e-06)

    # Test tvm iforest with batching and uneven numer of records.
    @unittest.skipIf(not tvm_installed(), reason="TVM test require TVM")
    def test_tvm_iforest_remainder_batch(self):
        warnings.filterwarnings("ignore")
        num_classes = 2
        model = IsolationForest(n_estimators=10, max_samples=2)
        np.random.seed(0)
        X = np.random.rand(105, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=105)

        model.fit(X, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(model, "tvm", X[:batch_size, :], remainder_size)

        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.decision_function(X), hb_model.decision_function(X), rtol=1e-06, atol=1e-06)
        np.testing.assert_allclose(model.score_samples(X), hb_model.score_samples(X), rtol=1e-06, atol=1e-06)

    # Test batch with pandas.
    @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed")
    def test_pandas_batch(self):
        import pandas

        max_depth = 10
        iris = datasets.load_iris()
        X = iris.data[:149, :3]
        y = iris.target[:149]
        columns = ["vA", "vB", "vC"]
        X_train = pandas.DataFrame(X, columns=columns)

        pipeline = Pipeline(
            steps=[
                ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)),
                ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)),
            ]
        )

        pipeline.fit(X_train, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        torch_model = hummingbird.ml.convert_batch(
            pipeline, "torch", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size
        )

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            pipeline.predict_proba(X_train), torch_model.predict_proba(X_train), rtol=1e-06, atol=1e-06,
        )

    # Test batch with pandas ts.
    @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed")
    def test_pandas_batch_ts(self):
        import pandas

        max_depth = 10
        iris = datasets.load_iris()
        X = iris.data[:149, :3]
        y = iris.target[:149]
        columns = ["vA", "vB", "vC"]
        X_train = pandas.DataFrame(X, columns=columns)

        pipeline = Pipeline(
            steps=[
                ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)),
                ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)),
            ]
        )

        pipeline.fit(X_train, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        torch_model = hummingbird.ml.convert_batch(
            pipeline, "torch.jit", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size
        )

        self.assertTrue(torch_model is not None)

        np.testing.assert_allclose(
            pipeline.predict_proba(X_train), torch_model.predict_proba(X_train), rtol=1e-06, atol=1e-06,
        )

    # Test batch with pandas onnx.
    @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed")
    @unittest.skipIf(not onnx_runtime_installed(), reason="ONNXML test require ONNX and ORT")
    def test_pandas_batch_onnx(self):
        import pandas

        max_depth = 10
        iris = datasets.load_iris()
        X = iris.data[:149, :3]
        y = iris.target[:149]
        columns = ["vA", "vB", "vC"]
        X_train = pandas.DataFrame(X, columns=columns)

        pipeline = Pipeline(
            steps=[
                ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)),
                ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)),
            ]
        )

        pipeline.fit(X_train, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(
            pipeline, "onnx", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size
        )

        self.assertTrue(hb_model is not None)

        np.testing.assert_allclose(
            pipeline.predict_proba(X_train), hb_model.predict_proba(X_train), rtol=1e-06, atol=1e-06,
        )

    # Test batch with pandas from onnxml.
    @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed")
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    def test_pandas_batch_onnxml(self):
        import pandas

        max_depth = 10
        iris = datasets.load_iris()
        X = iris.data[:, :3]
        y = iris.target
        columns = ["vA", "vB", "vC"]
        X_train = pandas.DataFrame(X, columns=columns)

        pipeline = Pipeline(
            steps=[
                ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)),
                ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)),
            ]
        )

        pipeline.fit(X_train, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_sklearn(
            pipeline,
            initial_types=[
                ("vA", DoubleTensorType([X.shape[0], 1])),
                ("vB", DoubleTensorType([X.shape[0], 1])),
                ("vC", DoubleTensorType([X.shape[0], 1])),
            ],
            target_opset=9,
        )

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(
            onnx_ml_model, "onnx", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size
        )

        self.assertTrue(hb_model is not None)

        np.testing.assert_allclose(
            pipeline.predict_proba(X_train), hb_model.predict_proba(X_train), rtol=1e-06, atol=1e-06,
        )

    # Test batch with pandas tvm.
    @unittest.skipIf(not pandas_installed(), reason="Test requires pandas installed")
    @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM")
    def test_pandas_batch_tvm(self):
        import pandas

        max_depth = 10
        iris = datasets.load_iris()
        X = iris.data[:149, :3]
        y = iris.target[:149]
        columns = ["vA", "vB", "vC"]
        X_train = pandas.DataFrame(X, columns=columns)

        pipeline = Pipeline(
            steps=[
                ("preprocessor", ColumnTransformer(transformers=[], remainder="passthrough",)),
                ("classifier", GradientBoostingClassifier(n_estimators=10, max_depth=max_depth)),
            ]
        )

        pipeline.fit(X_train, y)

        batch_size = 10
        remainder_size = X.shape[0] % batch_size
        hb_model = hummingbird.ml.convert_batch(
            pipeline, "tvm", pandas.DataFrame(X[:batch_size], columns=columns), remainder_size
        )

        self.assertTrue(hb_model is not None)

        np.testing.assert_allclose(
            pipeline.predict_proba(X_train), hb_model.predict_proba(X_train), rtol=1e-06, atol=1e-06,
        )

    # Check converter with model name set as extra_config.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()), reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS"
    )
    @unittest.skipIf(not lightgbm_installed(), reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_pytorch_extra_config(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_lightgbm(
            model, initial_types=[("input", FloatTensorType([X.shape[0], X.shape[1]]))], target_opset=9
        )

        # Create ONNX model
        model_name = "hummingbird.ml.test.lightgbm"
        onnx_model = hummingbird.ml.convert(onnx_ml_model, "onnx", extra_config={constants.ONNX_OUTPUT_MODEL_NAME: model_name})

        assert onnx_model.model.graph.name == model_name

    # Test max fuse depth configuration in TVM.
    @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed")
    def test_tvm_max_fuse(self):
        warnings.filterwarnings("ignore")

        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X, extra_config={constants.TVM_MAX_FUSE_DEPTH: 30})
        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test TVM without padding returns an errror is sizes don't match.
    @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed")
    def test_tvm_no_padding(self):
        warnings.filterwarnings("ignore")

        np.random.seed(0)
        X = np.random.rand(100, 20)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=100)
        model = lgb.LGBMRegressor(n_estimators=10)
        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X)
        self.assertIsNotNone(hb_model)
        self.assertRaises(AssertionError, hb_model.predict, X[:98])

    # Test padding in TVM.
    @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed")
    def test_tvm_padding(self):
        warnings.filterwarnings("ignore")

        np.random.seed(0)
        X = np.random.rand(100, 20)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=100)
        model = lgb.LGBMRegressor(n_estimators=10)
        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X, extra_config={constants.TVM_PAD_INPUT: True})
        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X[:98]), hb_model.predict(X[:98]), rtol=1e-06, atol=1e-06)

    # Test padding in TVM does not create problems when not necessary.
    @unittest.skipIf(not tvm_installed(), reason="TVM test requires TVM installed")
    def test_tvm_padding_2(self):
        warnings.filterwarnings("ignore")

        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)

        hb_model = hummingbird.ml.convert(model, "tvm", X, extra_config={constants.TVM_PAD_INPUT: True})
        self.assertIsNotNone(hb_model)
        np.testing.assert_allclose(model.predict(X), hb_model.predict(X), rtol=1e-06, atol=1e-06)

    # Test max string lentgh.
    def test_max_str_length(self):
        model = LabelEncoder()
        data = [
            "paris",
            "tokyo",
            "amsterdam",
            "tokyo",
        ]
        model.fit(data)

        torch_model = hummingbird.ml.convert(model, "torch", extra_config={constants.MAX_STRING_LENGTH: 20})

        np.testing.assert_allclose(model.transform(data), torch_model.transform(data), rtol=1e-06, atol=1e-06)
Exemplo n.º 3
0
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import torch

import hummingbird.ml
from hummingbird.ml._utils import (
    onnx_ml_tools_installed,
    onnx_runtime_installed,
    pandas_installed,
    lightgbm_installed,
    tvm_installed,
)
from hummingbird.ml import constants

if lightgbm_installed():
    import lightgbm as lgb

if onnx_ml_tools_installed():
    from onnxmltools.convert import convert_sklearn, convert_lightgbm


class TestExtraConf(unittest.TestCase):
    # Test default number of threads. It will only work on mac after 1.6 https://github.com/pytorch/pytorch/issues/43036
    @unittest.skipIf(
        sys.platform == "darwin" and LooseVersion(torch.__version__) <= LooseVersion("1.6.0"),
        reason="PyTorch has a bug on mac related to multi-threading",
    )
    def test_torch_deafault_n_threads(self):
        warnings.filterwarnings("ignore")
        max_depth = 10
Exemplo n.º 4
0
class TestLGBMConverter(unittest.TestCase):
    # Check tree implementation
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_implementation(self):
        warnings.filterwarnings("ignore")
        np.random.seed(0)
        X = np.random.rand(10, 1)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=10)

        for model in [
                lgb.LGBMClassifier(n_estimators=1, max_depth=1),
                lgb.LGBMRegressor(n_estimators=1, max_depth=1)
        ]:
            for extra_config_param in ["tree_trav", "perf_tree_trav", "gemm"]:
                model.fit(X, y)

                torch_model = hummingbird.ml.convert(
                    model,
                    "torch",
                    extra_config={"tree_implementation": extra_config_param})
                self.assertIsNotNone(torch_model)
                self.assertEqual(
                    str(type(list(torch_model.operator_map.values())[0])),
                    gbdt_implementation_map[extra_config_param])

    def _run_lgbm_classifier_converter(self, num_classes, extra_config={}):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12, None]:
            model = lgb.LGBMClassifier(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict_proba(X),
                                       torch_model.predict_proba(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Binary classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_binary_classifier_converter(self):
        self._run_lgbm_classifier_converter(2)

    # Gemm classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_gemm_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            2, extra_config={"tree_implementation": "gemm"})

    # Tree_trav classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_tree_trav_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            2, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_perf_tree_trav_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            2, extra_config={"tree_implementation": "perf_tree_trav"})

    # Multi classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_multi_classifier_converter(self):
        self._run_lgbm_classifier_converter(3)

    # Gemm multi classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_gemm_multi_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            3, extra_config={"tree_implementation": "gemm"})

    # Tree_trav multi classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_tree_trav_multi_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            3, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav multi classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_perf_tree_trav_multi_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            3, extra_config={"tree_implementation": "perf_tree_trav"})

    def _run_lgbm_ranker_converter(self,
                                   num_classes,
                                   extra_config={},
                                   label_gain=None):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12, None]:
            model = lgb.LGBMRanker(n_estimators=10,
                                   max_depth=max_depth,
                                   label_gain=label_gain)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(num_classes, size=100)

            model.fit(X,
                      y,
                      group=[X.shape[0]],
                      eval_set=[(X, y)],
                      eval_group=[X.shape[0]])

            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X),
                                       torch_model.predict(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Ranker - small, no label gain
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_binary_ranker_converter_no_label(self):
        self._run_lgbm_ranker_converter(30)

    # Ranker
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_binary_ranker_converter(self):
        self._run_lgbm_ranker_converter(1000, label_gain=list(range(1000)))

    # Gemm ranker
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_gemm_ranker_converter(self):
        self._run_lgbm_ranker_converter(
            1000,
            extra_config={"tree_implementation": "gemm"},
            label_gain=list(range(1000)))

    # Tree_trav ranker
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_tree_trav_ranker_converter(self):
        self._run_lgbm_ranker_converter(
            1000,
            extra_config={"tree_implementation": "tree_trav"},
            label_gain=list(range(1000)))

    # Perf_tree_trav ranker
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_perf_tree_trav_ranker_converter(self):
        self._run_lgbm_ranker_converter(
            1000,
            extra_config={"tree_implementation": "perf_tree_trav"},
            label_gain=list(range(1000)))

    def _run_lgbm_regressor_converter(self, num_classes, extra_config={}):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12, None]:
            model = lgb.LGBMRegressor(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X),
                                       torch_model.predict(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Regressor
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_binary_regressor_converter(self):
        self._run_lgbm_regressor_converter(1000)

    # Gemm regressor
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_gemm_regressor_converter(self):
        self._run_lgbm_regressor_converter(
            1000, extra_config={"tree_implementation": "gemm"})

    # Tree_trav regressor
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_tree_trav_regressor_converter(self):
        self._run_lgbm_regressor_converter(
            1000, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav regressor
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_perf_tree_trav_regressor_converter(self):
        self._run_lgbm_regressor_converter(
            1000, extra_config={"tree_implementation": "perf_tree_trav"})
Exemplo n.º 5
0
 def test_lightgbm_installed_false(self):
     warnings.filterwarnings("ignore")
     assert not lightgbm_installed()
Exemplo n.º 6
0
class TestONNXLightGBMConverter(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super(TestONNXLightGBMConverter, self).__init__(*args, **kwargs)

    # Base test implementation comparing ONNXML and ONNX models.
    def _test_lgbm(self, X, model, extra_config={}):
        # Create ONNX-ML model
        onnx_ml_model = convert_lightgbm(
            model,
            initial_types=[("input", FloatTensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=9)

        # Create ONNX model
        onnx_model = convert(onnx_ml_model, "onnx", extra_config=extra_config)

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        onnx_ml_pred = [[] for i in range(len(output_names))]
        inputs = {session.get_inputs()[0].name: X}
        pred = session.run(output_names, inputs)
        for i in range(len(output_names)):
            if "label" in output_names[i]:
                onnx_ml_pred[1] = pred[i]
            else:
                onnx_ml_pred[0] = pred[i]

        # Get the predictions for the ONNX model
        onnx_pred = [[] for i in range(len(output_names))]
        if len(output_names) == 1:  # regression
            onnx_pred = onnx_model.predict(X)
        else:  # classification
            onnx_pred[0] = onnx_model.predict_proba(X)
            onnx_pred[1] = onnx_model.predict(X)

        return onnx_ml_pred, onnx_pred, output_names

    # Utility function for testing regression models.
    def _test_regressor(self,
                        X,
                        model,
                        rtol=1e-06,
                        atol=1e-06,
                        extra_config={}):
        onnx_ml_pred, onnx_pred, output_names = self._test_lgbm(
            X, model, extra_config)

        # Check that predicted values match
        np.testing.assert_allclose(onnx_ml_pred[0].ravel(),
                                   onnx_pred,
                                   rtol=rtol,
                                   atol=atol)

    # Utility function for testing classification models.
    def _test_classifier(self,
                         X,
                         model,
                         rtol=1e-06,
                         atol=1e-06,
                         extra_config={}):
        onnx_ml_pred, onnx_pred, output_names = self._test_lgbm(
            X, model, extra_config)

        np.testing.assert_allclose(onnx_ml_pred[1],
                                   onnx_pred[1],
                                   rtol=rtol,
                                   atol=atol)  # labels
        np.testing.assert_allclose(list(
            map(lambda x: list(x.values()), onnx_ml_pred[0])),
                                   onnx_pred[0],
                                   rtol=rtol,
                                   atol=atol)  # probs

    # Check that ONNXML models can also target other backends.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_onnx_pytorch(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)

        # Create ONNX-ML model
        onnx_ml_model = convert_lightgbm(
            model,
            initial_types=[("input", FloatTensorType([X.shape[0],
                                                      X.shape[1]]))],
            target_opset=9)

        pt_model = convert(onnx_ml_model, "torch", X)
        assert pt_model

        # Get the predictions for the ONNX-ML model
        session = ort.InferenceSession(onnx_ml_model.SerializeToString())
        output_names = [
            session.get_outputs()[i].name
            for i in range(len(session.get_outputs()))
        ]
        onnx_ml_pred = [[] for i in range(len(output_names))]
        inputs = {session.get_inputs()[0].name: X}
        onnx_ml_pred = session.run(output_names, inputs)

        np.testing.assert_allclose(onnx_ml_pred[0].flatten(),
                                   pt_model.predict(X))

    # Basic regression test.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_onnxml_model_regressor(self):
        warnings.filterwarnings("ignore")
        n_features = 28
        n_total = 100
        np.random.seed(0)
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(n_total, size=n_total)

        # Create LightGBM model
        model = lgb.LGBMRegressor()
        model.fit(X, y)
        import platform

        # TODO bug on newer macOS versions?
        if platform.system() == "Darwin":
            self._test_regressor(X, model, rtol=1e-05, atol=1e-04)
        else:
            self._test_regressor(X, model)

    # Regression test with 3 estimators (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_regressor(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)
        self._test_regressor(X, model)

    # Regression test with 1 estimator (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_regressor1(self):
        warnings.filterwarnings("ignore")
        model = lgb.LGBMRegressor(n_estimators=1, min_child_samples=1)
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model.fit(X, y)
        self._test_regressor(X, model)

    # Regression test with 2 estimators (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_regressor2(self):
        warnings.filterwarnings("ignore")
        model = lgb.LGBMRegressor(n_estimators=2,
                                  max_depth=1,
                                  min_child_samples=1)
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model.fit(X, y)
        self._test_regressor(X, model)

    # Regression test with gbdt boosting type (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_booster_regressor(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 1.1]
        data = lgb.Dataset(X, label=y)
        model = lgb.train(
            {
                "boosting_type": "gbdt",
                "objective": "regression",
                "n_estimators": 3,
                "min_child_samples": 1,
                "max_depth": 1
            },
            data,
        )
        self._test_regressor(X, model)

    # Binary classication test.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_onnxml_model_binary(self):
        warnings.filterwarnings("ignore")
        n_features = 28
        n_total = 100
        np.random.seed(0)
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=n_total)

        # Create LightGBM model
        model = lgb.LGBMClassifier()
        model.fit(X, y)
        self._test_classifier(X, model)

    # Binary classication test with float64.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_onnxml_model_binary_float64(self):
        warnings.filterwarnings("ignore")
        n_features = 28
        n_total = 100
        np.random.seed(0)
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=n_total)

        # Create LightGBM model
        model = lgb.LGBMClassifier()
        model.fit(X, y)

        onnx_model = convert(model, "onnx", X)

        np.testing.assert_allclose(model.predict(X), onnx_model.predict(X))

    # Binary classification test with 3 estimators (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_classifier(self):
        warnings.filterwarnings("ignore")
        model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1)
        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 0]
        model.fit(X, y)
        self._test_classifier(X, model)

    # Binary classification test with 3 estimators zipmap (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_classifier_zipmap(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0], [1, 2]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 0, 1]
        model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1)
        model.fit(X, y)
        self._test_classifier(X, model)

    # Binary classification test with 3 estimators and selecting boosting type (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_booster_classifier(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0], [1, 2]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 0, 1]
        data = lgb.Dataset(X, label=y)
        model = lgb.train(
            {
                "boosting_type": "gbdt",
                "objective": "binary",
                "n_estimators": 3,
                "min_child_samples": 1
            }, data)
        self._test_classifier(X, model)

    # Binary classification test with 3 estimators and selecting boosting type zipmap (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_booster_classifier_zipmap(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0], [1, 2]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 0, 1]
        data = lgb.Dataset(X, label=y)
        model = lgb.train(
            {
                "boosting_type": "gbdt",
                "objective": "binary",
                "n_estimators": 3,
                "min_child_samples": 1
            }, data)
        self._test_classifier(X, model)

    # Multiclass classification test.
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_onnxml_model_multi(self):
        warnings.filterwarnings("ignore")
        n_features = 28
        n_total = 100
        np.random.seed(0)
        X = np.random.rand(n_total, n_features)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(3, size=n_total)

        # Create LightGBM model
        model = lgb.LGBMClassifier()
        model.fit(X, y)
        self._test_classifier(X, model)

    # Multiclass classification test with 3 estimators (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_classifier_multi(self):
        warnings.filterwarnings("ignore")
        model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1)
        X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 2, 1, 1, 2]
        model.fit(X, y)
        self._test_classifier(X, model)

    # Multiclass classification test with 3 estimators and selecting boosting type (taken from ONNXMLTOOLS).
    @unittest.skipIf(
        not (onnx_ml_tools_installed() and onnx_runtime_installed()),
        reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_booster_multi_classifier(self):
        warnings.filterwarnings("ignore")
        X = [[0, 1], [1, 1], [2, 0], [1, 2], [-1, 2], [1, -2]]
        X = np.array(X, dtype=np.float32)
        y = [0, 1, 0, 1, 2, 2]
        data = lgb.Dataset(X, label=y)
        model = lgb.train(
            {
                "boosting_type": "gbdt",
                "objective": "multiclass",
                "n_estimators": 3,
                "min_child_samples": 1,
                "num_class": 3
            },
            data,
        )
        self._test_classifier(X, model)
Exemplo n.º 7
0
class TestLGBMConverter(unittest.TestCase):
    # Check tree implementation
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_implementation(self):
        warnings.filterwarnings("ignore")
        np.random.seed(0)
        X = np.random.rand(10, 1)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=10)

        for model in [
                lgb.LGBMClassifier(n_estimators=1, max_depth=1),
                lgb.LGBMRegressor(n_estimators=1, max_depth=1)
        ]:
            for extra_config_param in ["tree_trav", "perf_tree_trav", "gemm"]:
                model.fit(X, y)

                torch_model = hummingbird.ml.convert(
                    model,
                    "torch",
                    extra_config={"tree_implementation": extra_config_param})
                self.assertIsNotNone(torch_model)
                self.assertEqual(
                    str(type(list(torch_model.model._operators)[0])),
                    gbdt_implementation_map[extra_config_param])

    def _run_lgbm_classifier_converter(self, num_classes, extra_config={}):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12, None]:
            model = lgb.LGBMClassifier(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict_proba(X),
                                       torch_model.predict_proba(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Binary classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_binary_classifier_converter(self):
        self._run_lgbm_classifier_converter(2)

    # Gemm classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_gemm_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            2, extra_config={"tree_implementation": "gemm"})

    # Tree_trav classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_tree_trav_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            2, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_perf_tree_trav_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            2, extra_config={"tree_implementation": "perf_tree_trav"})

    # Multi classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_multi_classifier_converter(self):
        self._run_lgbm_classifier_converter(3)

    # Gemm multi classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_gemm_multi_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            3, extra_config={"tree_implementation": "gemm"})

    # Tree_trav multi classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_tree_trav_multi_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            3, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav multi classifier
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_perf_tree_trav_multi_classifier_converter(self):
        self._run_lgbm_classifier_converter(
            3, extra_config={"tree_implementation": "perf_tree_trav"})

    def _run_lgbm_ranker_converter(self,
                                   num_classes,
                                   extra_config={},
                                   label_gain=None):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12, None]:
            model = lgb.LGBMRanker(n_estimators=10,
                                   max_depth=max_depth,
                                   label_gain=label_gain)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(num_classes, size=100)

            model.fit(X,
                      y,
                      group=[X.shape[0]],
                      eval_set=[(X, y)],
                      eval_group=[X.shape[0]])

            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X),
                                       torch_model.predict(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Ranker - small, no label gain
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_binary_ranker_converter_no_label(self):
        self._run_lgbm_ranker_converter(30)

    # Ranker
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_binary_ranker_converter(self):
        self._run_lgbm_ranker_converter(1000, label_gain=list(range(1000)))

    # Gemm ranker
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_gemm_ranker_converter(self):
        self._run_lgbm_ranker_converter(
            1000,
            extra_config={"tree_implementation": "gemm"},
            label_gain=list(range(1000)))

    # Tree_trav ranker
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_tree_trav_ranker_converter(self):
        self._run_lgbm_ranker_converter(
            1000,
            extra_config={"tree_implementation": "tree_trav"},
            label_gain=list(range(1000)))

    # Perf_tree_trav ranker
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_perf_tree_trav_ranker_converter(self):
        self._run_lgbm_ranker_converter(
            1000,
            extra_config={"tree_implementation": "perf_tree_trav"},
            label_gain=list(range(1000)))

    def _run_lgbm_regressor_converter(self, num_classes, extra_config={}):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12, None]:
            model = lgb.LGBMRegressor(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X),
                                       torch_model.predict(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Regressor
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_binary_regressor_converter(self):
        self._run_lgbm_regressor_converter(1000)

    # Gemm regressor
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_gemm_regressor_converter(self):
        self._run_lgbm_regressor_converter(
            1000, extra_config={"tree_implementation": "gemm"})

    # Tree_trav regressor
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_tree_trav_regressor_converter(self):
        self._run_lgbm_regressor_converter(
            1000, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav regressor
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_perf_tree_trav_regressor_converter(self):
        self._run_lgbm_regressor_converter(
            1000, extra_config={"tree_implementation": "perf_tree_trav"})

    # Float 64 classification test helper
    def _run_float64_lgbm_classifier_converter(self,
                                               num_classes,
                                               extra_config={}):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12, None]:
            model = lgb.LGBMClassifier(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict_proba(X),
                                       torch_model.predict_proba(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Gemm classifier (float64 data)
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_float64_lgbm_gemm_classifier_converter(self):
        self._run_float64_lgbm_classifier_converter(
            2, extra_config={"tree_implementation": "gemm"})

    # Tree_trav classifier (float64 data)
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_float64_lgbm_tree_trav_classifier_converter(self):
        self._run_float64_lgbm_classifier_converter(
            2, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav classifier (float64 data)
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_float64_lgbm_perf_tree_trav_classifier_converter(self):
        self._run_float64_lgbm_classifier_converter(
            2, extra_config={"tree_implementation": "perf_tree_trav"})

    # Float 64 regression test helper
    def _run_float64_lgbm_regressor_converter(self,
                                              num_classes,
                                              extra_config={}):
        warnings.filterwarnings("ignore")
        for max_depth in [1, 3, 8, 10, 12, None]:
            model = lgb.LGBMRegressor(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            y = np.random.randint(num_classes, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model,
                                                 "torch",
                                                 extra_config=extra_config)
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X),
                                       torch_model.predict(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Gemm regressor (float64 data)
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_float64_lgbm_gemm_regressor_converter(self):
        self._run_float64_lgbm_regressor_converter(
            1000, extra_config={"tree_implementation": "gemm"})

    # Tree_trav regressor (float64 data)
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_float64_lgbm_tree_trav_regressor_converter(self):
        self._run_float64_lgbm_regressor_converter(
            1000, extra_config={"tree_implementation": "tree_trav"})

    # Perf_tree_trav regressor (float64 data)
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_float64_lgbm_perf_tree_trav_regressor_converter(self):
        self._run_float64_lgbm_regressor_converter(
            1000, extra_config={"tree_implementation": "perf_tree_trav"})

    # Random forest in lgbm, the conversion fails with the latest
    # version of lightgbm. The direct converter to pytorch should be
    # updated or the model could be converted into ONNX then
    # converted into pytorch.
    # For more details, see ONNX converter at https://github.com/onnx/
    # onnxmltools/blob/master/onnxmltools/convert/lightgbm/
    # operator_converters/LightGbm.py#L313.
    @unittest.skipIf(
        True, reason="boosting_type=='rf' produces different probabilites.")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_classifier_random_forest_rf(self):
        warnings.filterwarnings("ignore")

        model = lgb.LGBMClassifier(boosting_type="rf",
                                   n_estimators=128,
                                   max_depth=5,
                                   subsample=0.3,
                                   bagging_freq=1)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=100)

        model.fit(X, y)

        torch_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(torch_model)
        np.testing.assert_allclose(model.predict_proba(X),
                                   torch_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

    # Random forest in lgbm
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_classifier_random_forest_gbdt(self):
        warnings.filterwarnings("ignore")

        model = lgb.LGBMClassifier(boosting_type="gbdt",
                                   n_estimators=128,
                                   max_depth=5,
                                   subsample=0.3,
                                   bagging_freq=1)
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(2, size=100)

        model.fit(X, y)

        torch_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(torch_model)
        np.testing.assert_allclose(model.predict_proba(X),
                                   torch_model.predict_proba(X),
                                   rtol=1e-06,
                                   atol=1e-06)

    # Test Tweedie loss in lgbm
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_tweedie(self):
        warnings.filterwarnings("ignore")
        model = lgb.LGBMRegressor(objective="tweedie",
                                  n_estimators=2,
                                  max_depth=5)

        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(100, size=100)

        model.fit(X, y)

        torch_model = hummingbird.ml.convert(model, "torch")
        self.assertIsNotNone(torch_model)
        np.testing.assert_allclose(model.predict(X),
                                   torch_model.predict(X),
                                   rtol=1e-06,
                                   atol=1e-06)

    # Backend tests.
    # Test TorchScript backend regression.
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_regressor_converter_torchscript(self):
        warnings.filterwarnings("ignore")

        for max_depth in [1, 3, 8, 10, 12]:
            model = lgb.LGBMRegressor(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(1000, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model,
                                                 "torchscript",
                                                 X,
                                                 extra_config={})
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict(X),
                                       torch_model.predict(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Test TorchScript backend classification.
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lgbm_classifier_converter_torchscript(self):
        warnings.filterwarnings("ignore")

        for max_depth in [1, 3, 8, 10, 12]:
            model = lgb.LGBMClassifier(n_estimators=10, max_depth=max_depth)
            np.random.seed(0)
            X = np.random.rand(100, 200)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(2, size=100)

            model.fit(X, y)

            torch_model = hummingbird.ml.convert(model,
                                                 "torchscript",
                                                 X,
                                                 extra_config={})
            self.assertIsNotNone(torch_model)
            np.testing.assert_allclose(model.predict_proba(X),
                                       torch_model.predict_proba(X),
                                       rtol=1e-06,
                                       atol=1e-06)

    # Check that we can export into ONNX.
    @unittest.skipIf(not onnx_runtime_installed(),
                     reason="ONNXML test require ONNX, ORT and ONNXMLTOOLS")
    @unittest.skipIf(not lightgbm_installed(),
                     reason="LightGBM test requires LightGBM installed")
    def test_lightgbm_onnx(self):
        warnings.filterwarnings("ignore")

        X = [[0, 1], [1, 1], [2, 0]]
        X = np.array(X, dtype=np.float32)
        y = np.array([100, -10, 50], dtype=np.float32)
        model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
        model.fit(X, y)

        # Create ONNX model
        onnx_model = hummingbird.ml.convert(model, "onnx", X)

        np.testing.assert_allclose(
            onnx_model.predict(X).flatten(), model.predict(X))

    # TVM backend tests.
    @unittest.skipIf(not (tvm_installed()), reason="TVM tests require TVM")
    def test_lightgbm_tvm_regressor(self):
        warnings.filterwarnings("ignore")

        for tree_implementation in ["gemm", "tree_trav", "perf_tree_trav"]:
            X = [[0, 1], [1, 1], [2, 0]]
            X = np.array(X, dtype=np.float32)
            y = np.array([100, -10, 50], dtype=np.float32)
            model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1)
            model.fit(X, y)

            # Create TVM model.
            tvm_model = hummingbird.ml.convert(
                model,
                "tvm",
                X,
                extra_config={"tree_implementation": tree_implementation})

            # Check results.
            np.testing.assert_allclose(tvm_model.predict(X), model.predict(X))

    @unittest.skipIf(not (tvm_installed()),
                     reason="TVM tests require TVM installed")
    def test_lightgbm_tvm_classifier(self):
        warnings.filterwarnings("ignore")

        for tree_implementation in ["gemm", "tree_trav", "perf_tree_trav"]:
            X = [[0, 1], [1, 1], [2, 0]]
            X = np.array(X, dtype=np.float32)
            y = np.array([0, 1, 0], dtype=np.float32)
            model = lgb.LGBMClassifier(n_estimators=3, min_child_samples=1)
            model.fit(X, y)

            # Create TVM model.
            tvm_model = hummingbird.ml.convert(
                model,
                "tvm",
                X,
                extra_config={"tree_implementation": tree_implementation})

            # Check results.
            np.testing.assert_allclose(tvm_model.predict(X), model.predict(X))
            np.testing.assert_allclose(tvm_model.predict_proba(X),
                                       model.predict_proba(X))

    # Test TVM with large input datasets.
    @unittest.skipIf(not (tvm_installed()),
                     reason="TVM tests require TVM installed")
    def test_lightgbm_tvm_classifier_large_dataset(self):
        warnings.filterwarnings("ignore")

        for tree_implementation in ["gemm", "tree_trav", "perf_tree_trav"]:
            size = 200000
            X = np.random.rand(size, 28)
            X = np.array(X, dtype=np.float32)
            y = np.random.randint(2, size=size)
            model = lgb.LGBMClassifier(n_estimators=100, max_depth=3)
            model.fit(X, y)

            # Create TVM model.
            tvm_model = hummingbird.ml.convert(
                model,
                "tvm",
                X,
                extra_config={
                    constants.TREE_IMPLEMENTATION: tree_implementation,
                    constants.TREE_OP_PRECISION_DTYPE: "float64"
                })

            # Check results.
            np.testing.assert_allclose(tvm_model.predict_proba(X),
                                       model.predict_proba(X),
                                       rtol=1e-04,
                                       atol=1e-04)