class ForestMemberRegressor(BaseForestMember, base.Regressor):
    """Forest member class for regression"""
    def __init__(
        self,
        index_original: int,
        model: BaseTreeRegressor,
        created_on: int,
        drift_detector: base.DriftDetector,
        warning_detector: base.DriftDetector,
        is_background_learner,
        metric: RegressionMetric,
    ):
        super().__init__(
            index_original=index_original,
            model=model,
            created_on=created_on,
            drift_detector=drift_detector,
            warning_detector=warning_detector,
            is_background_learner=is_background_learner,
            metric=metric,
        )
        self._var = Var()  # Used to track drift

    def _drift_detector_input(self, y_true: float, y_pred: float):
        drift_input = y_true - y_pred
        self._var.update(drift_input)

        if self._var.mean.n == 1:
            return 0.5  # The expected error is the normalized mean error

        sd = math.sqrt(self._var.get())

        # We assume the error follows a normal distribution -> (empirical rule)
        # 99.73% of the values lie  between [mean - 3*sd, mean + 3*sd]. We
        # assume this range for the normalized data. Hence, we can apply the
        # min-max norm to cope with  ADWIN's requirements
        return (drift_input + 3 * sd) / (6 * sd) if sd > 0 else 0.5

    def reset(self, n_samples_seen):
        super().reset(n_samples_seen)
        # Reset the stats for the drift detector
        self._var = Var()

    def predict_one(self, x):
        return self.model.predict_one(x)
Exemplo n.º 2
0
class GradHessStats:
    """Class used to monitor and update the gradient/hessian information in Stochastic Gradient
    Trees.

    Represents the aggregated gradient/hessian data in a node (global node statistics), category,
    or numerical feature's discretized bin.
    """
    def __init__(self):
        self.g_var = Var()
        self.h_var = Var()
        self.gh_cov = Cov()

    def __iadd__(self, other):
        self.g_var += other.g_var
        self.h_var += other.h_var
        self.gh_cov += other.gh_cov

        return self

    def __isub__(self, other):
        self.g_var -= other.g_var
        self.h_var -= other.h_var
        self.gh_cov -= other.gh_cov

        return self

    def __add__(self, other):
        new = copy.deepcopy(self)
        new += other

        return new

    def __sub__(self, other):
        new = copy.deepcopy(self)
        new -= other

        return new

    def update(self, gh: GradHess, w: float = 1.0):
        self.g_var.update(gh.gradient, w)
        self.h_var.update(gh.hessian, w)
        self.gh_cov.update(gh.gradient, gh.hessian, w)

    @property
    def mean(self) -> GradHess:
        return GradHess(self.g_var.mean.get(), self.h_var.mean.get())

    @property
    def variance(self) -> GradHess:
        return GradHess(self.g_var.get(), self.h_var.get())

    @property
    def covariance(self) -> float:
        return self.gh_cov.get()

    @property
    def total_weight(self) -> float:
        return self.g_var.mean.n

    # This method ignores correlations between delta_pred and the gradients/hessians! Considering
    # delta_pred is derived from the gradient and hessian sample, this assumption is definitely
    # violated. However, as empirically demonstrated in the original SGT, this fact does not seem
    # to significantly impact on the obtained results.
    def delta_loss_mean_var(self, delta_pred: float) -> Var:
        m = self.mean
        n = self.total_weight
        mean = delta_pred * m.gradient + 0.5 * m.hessian * delta_pred * delta_pred

        variance = self.variance
        covariance = self.covariance

        grad_term_var = delta_pred * delta_pred * variance.gradient
        hess_term_var = 0.25 * variance.hessian * (delta_pred**4.0)
        sigma = max(
            0.0, grad_term_var + hess_term_var + (delta_pred**3) * covariance)
        return Var._from_state(n, mean, sigma)  # noqa