示例#1
0
 def test_mtry(self, boston_X, boston_y, mtry):
     forest = GRFBoostedForestRegressor(mtry=mtry)
     forest.fit(boston_X, boston_y)
     if mtry is not None:
         assert forest.mtry_ == mtry
     else:
         assert forest.mtry_ == 6
示例#2
0
 def test_fit(self, boston_X, boston_y):
     forest = GRFBoostedForestRegressor()
     with pytest.raises(NotFittedError):
         check_is_fitted(forest)
     forest.fit(boston_X, boston_y)
     check_is_fitted(forest)
     assert hasattr(forest, "boosted_forests_")
     assert hasattr(forest, "mtry_")
示例#3
0
 def test_with_X_nan(self, boston_X, boston_y):
     boston_X_nan = boston_X.copy()
     index = np.random.choice(boston_X_nan.size, 100, replace=False)
     boston_X_nan.ravel()[index] = np.nan
     assert np.sum(np.isnan(boston_X_nan)) == 100
     forest = GRFBoostedForestRegressor()
     forest.fit(boston_X_nan, boston_y)
     pred = forest.predict(boston_X_nan)
     assert len(pred) == boston_X_nan.shape[0]
示例#4
0
 def test_serialize(self, boston_X, boston_y):
     forest = GRFBoostedForestRegressor()
     # not fitted
     tf = tempfile.TemporaryFile()
     pickle.dump(forest, tf)
     tf.seek(0)
     forest = pickle.load(tf)
     forest.fit(boston_X, boston_y)
     # fitted
     tf = tempfile.TemporaryFile()
     pickle.dump(forest, tf)
     tf.seek(0)
     new_forest = pickle.load(tf)
     pred = new_forest.predict(boston_X)
     assert len(pred) == boston_X.shape[0]
示例#5
0
 def test_alpha(self, boston_X, boston_y, alpha):
     forest = GRFBoostedForestRegressor(alpha=alpha)
     if alpha <= 0 or alpha >= 0.25:
         with pytest.raises(ValueError):
             forest.fit(boston_X, boston_y)
     else:
         forest.fit(boston_X, boston_y)
示例#6
0
 def test_honesty_fraction(self, boston_X, boston_y, honesty_fraction):
     forest = GRFBoostedForestRegressor(honesty=True,
                                        honesty_fraction=honesty_fraction,
                                        honesty_prune_leaves=True)
     if honesty_fraction <= 0 or honesty_fraction >= 1:
         with pytest.raises(RuntimeError):
             forest.fit(boston_X, boston_y)
     else:
         forest.fit(boston_X, boston_y)
示例#7
0
    def test_equalize_cluster_weights(self, boston_X, boston_y, boston_cluster,
                                      equalize_cluster_weights):
        forest = GRFBoostedForestRegressor(
            equalize_cluster_weights=equalize_cluster_weights)
        forest.fit(boston_X, boston_y, cluster=boston_cluster)
        if equalize_cluster_weights:
            assert forest.samples_per_cluster_ == 20
        else:
            assert forest.samples_per_cluster_ == boston_y.shape[0] - 20

        if equalize_cluster_weights:
            with pytest.raises(ValueError):
                forest.fit(boston_X,
                           boston_y,
                           cluster=boston_cluster,
                           sample_weight=boston_y)

        forest.fit(boston_X, boston_y, cluster=None)
        assert forest.samples_per_cluster_ == 0
示例#8
0
 def test_tuning(
     self,
     boston_X,
     boston_y,
     tune_params,
     tune_n_estimators,
     tune_n_reps,
     tune_n_draws,
 ):
     forest = GRFBoostedForestRegressor(
         tune_params=tune_params,
         tune_n_estimators=tune_n_estimators,
         tune_n_reps=tune_n_reps,
         tune_n_draws=tune_n_draws,
     )
     if tune_params == ["invalid"]:
         with pytest.raises(ValueError):
             forest.fit(boston_X, boston_y)
     elif tune_n_draws == 1:
         with pytest.raises(ValueError):
             forest.fit(boston_X, boston_y)
     else:
         forest.fit(boston_X, boston_y)
示例#9
0
 def test_boosting(
     self,
     boston_X,
     boston_y,
     boost_steps,
     boost_error_reduction,
     boost_max_steps,
     boost_trees_tune,
 ):
     forest = GRFBoostedForestRegressor(
         tune_params=["mtry"],
         tune_n_draws=5,
         tune_n_reps=2,
         boost_steps=boost_steps,
         boost_error_reduction=boost_error_reduction,
         boost_max_steps=boost_max_steps,
         boost_trees_tune=boost_trees_tune,
     )
     if boost_error_reduction < 0 or boost_error_reduction > 1:
         with pytest.raises(ValueError):
             forest.fit(boston_X, boston_y)
     else:
         forest.fit(boston_X, boston_y)
示例#10
0
    def fit(
        self,
        X,
        y,
        w,  # treatment
        y_hat=None,
        w_hat=None,
        sample_weight=None,
        cluster=None,
    ):
        """Fit the grf forest using training data.

        :param array2d X: training input features
        :param array1d y: training input targets
        :param array1d w: training input treatments
        :param array1d y_hat: estimated expected target responses
        :param array1d w_hat: estimated treatment propensities
        :param array1d sample_weight: optional weights for input samples
        :param array1d cluster: optional cluster assignments for input samples
        """
        X, y = self._validate_data(X, y, force_all_finite="allow-nan")
        self._check_num_samples(X)

        boost_params = {
            "n_estimators": max(50, int(self.n_estimators / 4)),
            "equalize_cluster_weights": self.equalize_cluster_weights,
            "sample_fraction": self.sample_fraction,
            "mtry": self.mtry,
            "min_node_size": 5,
            "honesty": True,
            "honesty_fraction": 0.5,
            "honesty_prune_leaves": self.honesty_prune_leaves,
            "alpha": self.alpha,
            "imbalance_penalty": self.imbalance_penalty,
            "ci_group_size": 1,
            "tune_params": None,  # TODO ?
            "n_jobs": self.n_jobs,
            "seed": self.seed,
        }
        if y_hat is None and self.orthogonal_boosting:
            logger.debug("orthogonal boosting y_hat")
            br = GRFBoostedForestRegressor(**boost_params)
            br.fit(X, y, sample_weight=sample_weight, cluster=cluster)
            y_hat = br.boosted_forests_["predictions"]

        if w_hat is None and self.orthogonal_boosting:
            logger.debug("orthogonal boosting w_hat")
            br = GRFBoostedForestRegressor(**boost_params)
            br.fit(X, w, sample_weight=sample_weight, cluster=cluster)
            w_hat = br.boosted_forests_["predictions"]

        return super().fit(
            X=X,
            y=y,
            w=w,
            z=w,
            y_hat=y_hat,
            w_hat=w_hat,
            z_hat=w_hat,
            cluster=cluster,
            sample_weight=sample_weight,
        )
示例#11
0
    def test_sample_fraction(self, boston_X, boston_y,
                             sample_fraction):  # and ci_group_size
        forest = GRFBoostedForestRegressor(sample_fraction=sample_fraction,
                                           ci_group_size=1)
        if sample_fraction <= 0 or sample_fraction >= 1:
            with pytest.raises(ValueError):
                forest.fit(boston_X, boston_y)
        else:
            forest.fit(boston_X, boston_y)

        forest = GRFBoostedForestRegressor(sample_fraction=sample_fraction,
                                           ci_group_size=2)
        if sample_fraction <= 0 or sample_fraction > 0.5:
            with pytest.raises(ValueError):
                forest.fit(boston_X, boston_y)
        else:
            forest.fit(boston_X, boston_y)
示例#12
0
 def test_clone(self, boston_X, boston_y):
     forest = GRFBoostedForestRegressor()
     forest.fit(boston_X, boston_y)
     clone(forest)
示例#13
0
 def test_predict(self, boston_X, boston_y, boost_predict_steps):
     forest = GRFBoostedForestRegressor()
     forest.fit(boston_X, boston_y)
     pred = forest.predict(boston_X,
                           boost_predict_steps=boost_predict_steps)
     assert len(pred) == boston_X.shape[0]
示例#14
0
 def test_check_estimator(self):
     check_estimator(GRFBoostedForestRegressor())
示例#15
0
 def test_honesty(self, boston_X, boston_y, honesty):
     forest = GRFBoostedForestRegressor(honesty=honesty)
     forest.fit(boston_X, boston_y)
示例#16
0
 def test_init(self):
     _ = GRFBoostedForestRegressor()
示例#17
0
 def test_honesty_prune_leaves(self, boston_X, boston_y,
                               honesty_prune_leaves):
     forest = GRFBoostedForestRegressor(
         honesty=True, honesty_prune_leaves=honesty_prune_leaves)
     forest.fit(boston_X, boston_y)