예제 #1
0
    def setup_method(self, method):
        # super().setup_method(method)
        self.model = XGBoost(config={
            'n_estimators': 5,
            'max_depth': 2,
            'tree_method': 'hist'
        })
        feature_cols = ["f", "f2"]
        target_col = "t"
        train_df = pd.DataFrame({
            "f": np.random.randn(20),
            "f2": np.random.randn(20),
            "t": np.random.randint(20)
        })
        val_df = pd.DataFrame({
            "f": np.random.randn(5),
            "f2": np.random.randn(5),
            "t": np.random.randint(5)
        })

        ft = IdentityTransformer(feature_cols=feature_cols,
                                 target_col=target_col)

        self.x, self.y = ft.transform(train_df)
        self.val_x, self.val_y = ft.transform(val_df)
예제 #2
0
    def build(self, config):
        from zoo.orca.automl.xgboost.XGBoost import XGBoost
        model = XGBoost(model_type=self.model_type, config=config)

        if self.n_cpus is not None:
            model.set_params(n_jobs=self.n_cpus)
        return model
예제 #3
0
class TestXgbregressor(ZooTestCase):
    def setup_method(self, method):
        # super().setup_method(method)
        self.model = XGBoost(config={
            'n_estimators': 5,
            'max_depth': 2,
            'tree_method': 'hist'
        })
        feature_cols = ["f", "f2"]
        target_col = "t"
        train_df = pd.DataFrame({
            "f": np.random.randn(20),
            "f2": np.random.randn(20),
            "t": np.random.randint(20)
        })
        val_df = pd.DataFrame({
            "f": np.random.randn(5),
            "f2": np.random.randn(5),
            "t": np.random.randint(5)
        })

        ft = IdentityTransformer(feature_cols=feature_cols,
                                 target_col=target_col)

        self.x, self.y = ft.transform(train_df)
        self.val_x, self.val_y = ft.transform(val_df)

    def teardown_method(self, method):
        pass

    def test_fit_predict_evaluate(self):
        self.model.fit_eval((self.x, self.y), [(self.val_x, self.val_y)])

        # test predict
        result = self.model.predict(self.val_x)

        # test evaluate
        evaluate_result = self.model.evaluate(self.val_x, self.val_y)

    def test_save_restore(self):
        self.model.fit_eval((self.x, self.y), [(self.val_x, self.val_y)])

        result_save = self.model.predict(self.val_x)
        model_file = "tmp.pkl"
        self.model.save(model_file)
        assert os.path.isfile(model_file)
        new_model = XGBoost()
        new_model.restore(model_file)
        assert new_model.model
        result_restore = new_model.predict(self.val_x)
        assert_array_almost_equal(result_save, result_restore, decimal=2), \
            "Prediction values are not the same after restore: " \
            "predict before is {}, and predict after is {}".format(result_save, result_restore)
        os.remove(model_file)
예제 #4
0
    def test_save_restore(self):
        self.model.fit_eval((self.x, self.y), [(self.val_x, self.val_y)])

        result_save = self.model.predict(self.val_x)
        model_file = "tmp.pkl"
        self.model.save(model_file)
        assert os.path.isfile(model_file)
        new_model = XGBoost()
        new_model.restore(model_file)
        assert new_model.model
        result_restore = new_model.predict(self.val_x)
        assert_array_almost_equal(result_save, result_restore, decimal=2), \
            "Prediction values are not the same after restore: " \
            "predict before is {}, and predict after is {}".format(result_save, result_restore)
        os.remove(model_file)
예제 #5
0
def load_xgboost_pipeline(file, model_type="regressor"):
    from zoo.zouwu.feature.identity_transformer import IdentityTransformer
    from zoo.orca.automl.xgboost.XGBoost import XGBoost

    feature_transformers = IdentityTransformer()
    model = XGBoost(model_type=model_type)

    all_config = restore_zip(file, feature_transformers, model)
    ts_pipeline = TimeSequencePipeline(feature_transformers=feature_transformers,
                                       model=model,
                                       config=all_config)
    print("Restore pipeline from", file)
    return ts_pipeline
예제 #6
0
class TestXgbregressor(ZooTestCase):
    def setup_method(self, method):
        # super().setup_method(method)
        self.model = XGBoost(config={
            'n_estimators': 5,
            'max_depth': 2,
            'tree_method': 'hist'
        })
        feature_cols = ["f", "f2"]
        target_col = "t"
        train_df = pd.DataFrame({
            "f": np.random.randn(20),
            "f2": np.random.randn(20),
            "t": np.random.randint(20)
        })
        val_df = pd.DataFrame({
            "f": np.random.randn(5),
            "f2": np.random.randn(5),
            "t": np.random.randint(5)
        })

        ft = IdentityTransformer(feature_cols=feature_cols,
                                 target_col=target_col)

        self.x, self.y = ft.transform(train_df)
        self.val_x, self.val_y = ft.transform(val_df)

    def teardown_method(self, method):
        pass

    def test_fit_predict_evaluate(self):
        self.model.fit_eval((self.x, self.y), [(self.val_x, self.val_y)])

        # test predict
        result = self.model.predict(self.val_x)

        # test evaluate
        evaluate_result = self.model.evaluate(self.val_x, self.val_y)

    def test_save_restore(self):
        self.model.fit_eval((self.x, self.y), [(self.val_x, self.val_y)])

        result_save = self.model.predict(self.val_x)
        model_file = "tmp.pkl"
        self.model.save(model_file)
        assert os.path.isfile(model_file)
        new_model = XGBoost()
        new_model.restore(model_file)
        assert new_model.model
        result_restore = new_model.predict(self.val_x)
        assert_array_almost_equal(result_save, result_restore, decimal=2), \
            "Prediction values are not the same after restore: " \
            "predict before is {}, and predict after is {}".format(result_save, result_restore)
        os.remove(model_file)

    def test_metric(self):
        # metric not in XGB_METRIC_NAME but in Evaluator.metrics_func.keys()
        self.model.fit_eval(data=(self.x, self.y),
                            validation_data=[(self.val_x, self.val_y)],
                            metric="mse")
        # metric in XGB_METRIC_NAME
        self.model.fit_eval(data=(self.x, self.y),
                            validation_data=[(self.val_x, self.val_y)],
                            metric="rmsle")

        with pytest.raises(ValueError):
            self.model.fit_eval(data=(self.x, self.y),
                                validation_data=[(self.val_x, self.val_y)],
                                metric="wrong_metric")

        # metric func
        def pyrmsle(y_true, y_pred):
            y_pred[y_pred < -1] = -1 + 1e-6
            elements = np.power(np.log1p(y_true) - np.log1p(y_pred), 2)
            return float(np.sqrt(np.sum(elements) / len(y_true)))

        result = self.model.fit_eval(data=(self.x, self.y),
                                     validation_data=[(self.val_x, self.val_y)
                                                      ],
                                     metric_func=pyrmsle)
        assert "pyrmsle" in result

    def test_data_creator(self):
        def get_x_y(size, config):
            values = np.random.randn(size, 4)
            df = pd.DataFrame(values, columns=["f1", "f2", "f3", "t"])
            selected_features = config["features"]
            x = df[selected_features].to_numpy()
            y = df["t"].to_numpy()
            return x, y

        from functools import partial
        train_data_creator = partial(get_x_y, 20)
        val_data_creator = partial(get_x_y, 5)
        config = {'n_estimators': 5, 'max_depth': 2, 'tree_method': 'hist'}
        model_builder = XGBoostModelBuilder(model_type="regressor",
                                            cpus_per_trial=1,
                                            **config)
        model = model_builder.build(config={"features": ["f1", "f2"]})
        model.fit_eval(train_data_creator,
                       validation_data=val_data_creator,
                       metric="mae")
예제 #7
0
 def create_model():
     _model = XGBoost(model_type=model_type, config=config)
     if "cpu" in resources_per_trial:
         _model.set_params(n_jobs=resources_per_trial.get("cpu"))
     return _model
예제 #8
0
 def build_from_ckpt(self, checkpoint_filename):
     from zoo.orca.automl.xgboost.XGBoost import XGBoost
     model = XGBoost(model_type=self.model_type, config=self.model_config)
     model.restore(checkpoint_filename)
     return model
예제 #9
0
 def build(self, config):
     from zoo.orca.automl.xgboost.XGBoost import XGBoost
     model = XGBoost(model_type=self.model_type, config=self.model_config)
     model._build(**config)
     return model