示例#1
0
    def __init__(
        self,
        template_splitter,
        split_criterion,
        pred_model,
        drift_detector,
    ):
        super().__init__(
            template_splitter=template_splitter,
            split_criterion=split_criterion,
        )
        self.pred_model = pred_model
        self.drift_detector = drift_detector

        self._target_stats = stats.Var()
        self._feat_stats = collections.defaultdict(functools.partial(
            stats.Var))
示例#2
0
    def _eval_numerical_splits(
        self, feature_idx, candidate, sgt
    ) -> typing.Tuple[BranchFactory, bool]:
        skip_candidate = True
        quantizer = self._split_stats[feature_idx]

        # Get updated quantizer params
        self.split_params[feature_idx].update(quantizer._get_params())

        n_bins = len(quantizer)
        if n_bins == 1:  # Insufficient number of bins to perform splits
            return candidate, skip_candidate

        skip_candidate = False
        candidate.merit.loss_mean = math.inf
        candidate.merit.delta_pred = {}

        # Auxiliary gradient and hessian statistics
        left_ghs = GradHessStats()
        left_dlms = stats.Var()
        for thresh, ghs in quantizer:
            left_ghs += ghs
            left_delta_pred = self.delta_prediction(left_ghs.mean, sgt.lambda_value)
            left_dlms += left_ghs.delta_loss_mean_var(left_delta_pred)

            right_ghs = self._update_stats - left_ghs
            right_delta_pred = self.delta_prediction(right_ghs.mean, sgt.lambda_value)
            right_dlms = right_ghs.delta_loss_mean_var(right_delta_pred)

            all_dlms = left_dlms + right_dlms

            loss_mean = all_dlms.mean.get()
            loss_var = all_dlms.get()

            if loss_mean < candidate.merit.loss_mean:
                candidate.merit.loss_mean = (
                    loss_mean + 2.0 * sgt.gamma / self.total_weight
                )
                candidate.merit.loss_var = loss_var
                candidate.merit.delta_pred[0] = left_delta_pred
                candidate.merit.delta_pred[1] = right_delta_pred

                candidate.split_info = thresh

        return candidate, skip_candidate
示例#3
0
 def __init__(
     self,
     index_original: int,
     model: BaseTreeRegressor,
     created_on: int,
     drift_detector: base.DriftDetector,
     warning_detector: base.DriftDetector,
     is_background_learner,
     metric: metrics.RegressionMetric,
 ):
     super().__init__(
         index_original=index_original,
         model=model,
         created_on=created_on,
         drift_detector=drift_detector,
         warning_detector=warning_detector,
         is_background_learner=is_background_learner,
         metric=metric,
     )
     self._var = stats.Var()  # Used to track drift
示例#4
0
 def reset(self, n_samples_seen):
     super().reset(n_samples_seen)
     # Reset the stats for the drift detector
     self._var = stats.Var()
示例#5
0
文件: normal.py 项目: online-ml/river
 def __init__(self, seed=None):
     super().__init__(seed=seed)
     self.variance = stats.Var()
     self.mean = stats.Mean()
     self.seed = seed
    def find_best_split(self, sgt):
        best = SGTSplit()

        # Null split: update the prediction using the new gradient information
        best.delta_pred = delta_prediction(self._update_stats.mean(),
                                           sgt.lambda_value)
        dlms = self._update_stats.delta_loss_mean_var(best.delta_pred)
        best.loss_mean = dlms.mean.get()
        best.loss_var = dlms.get()

        for feature_idx in self._split_stats:
            candidate = SGTSplit()
            candidate.feature_idx = feature_idx

            if sgt.nominal_attributes is not None and feature_idx in sgt.nominal_attributes:
                # Nominal attribute has been already used in a previous split
                if feature_idx in sgt._split_features:
                    continue

                candidate.is_nominal = True
                candidate.delta_pred = {}
                all_dlms = stats.Var()

                cat_collection = self._split_stats[feature_idx]
                for category in cat_collection:
                    dp = delta_prediction(cat_collection[category].mean(),
                                          sgt.lambda_value)

                    dlms = cat_collection[category].delta_loss_mean_var(dp)
                    candidate.delta_pred[category] = dp

                    all_dlms += dlms

                candidate.loss_mean = (
                    all_dlms.mean.get() +
                    len(cat_collection) * sgt.gamma / self.total_weight)
                candidate.loss_var = all_dlms.get()
            else:  # Numerical features
                quantizer = self._split_stats[feature_idx]
                half_radius = quantizer.radius / 2.
                n_bins = len(quantizer)
                if n_bins == 1:  # Insufficient number of bins to perform splits
                    continue

                candidate.loss_mean = math.inf
                candidate.delta_pred = {}

                # Auxiliary gradient and hessian statistics
                left_ghs = GradHessStats()
                left_dlms = stats.Var()
                for i, ghs in enumerate(quantizer):
                    left_ghs += ghs
                    left_delta_pred = delta_prediction(left_ghs.mean(),
                                                       sgt.lambda_value)
                    left_dlms += left_ghs.delta_loss_mean_var(left_delta_pred)

                    right_ghs = self._update_stats - left_ghs
                    right_delta_pred = delta_prediction(
                        right_ghs.mean(), sgt.lambda_value)
                    right_dlms = right_ghs.delta_loss_mean_var(
                        right_delta_pred)

                    all_dlms = left_dlms + right_dlms

                    loss_mean = all_dlms.mean.get()
                    loss_var = all_dlms.get()

                    if loss_mean < candidate.loss_mean:
                        candidate.loss_mean = (
                            loss_mean + 2.0 * sgt.gamma / self.total_weight)
                        candidate.loss_var = loss_var
                        candidate.delta_pred[0] = left_delta_pred
                        candidate.delta_pred[1] = right_delta_pred

                        # Define split point
                        if i == n_bins - 1:  # Last bin
                            candidate.feature_val = ghs.get_x()
                        else:  # Use middle point between bins
                            candidate.feature_val = ghs.get_x() + half_radius

            if candidate.loss_mean < best.loss_mean:
                best = candidate

        return best
示例#7
0
 def __init__(self):
     super().__init__()
     self._y_var = stats.Var()
     self._total_sum_of_squares = 0
     self._residual_sum_of_squares = 0
     self.sample_correction = {}
示例#8
0
 def __init__(self):
     self.x_m = stats.Mean()
     self.g_var = stats.Var()
     self.h_var = stats.Var()
     self.gh_cov = stats.Cov()
示例#9
0
 def __init__(self, regressor: base.Regressor):
     self.var = stats.Var()
     super().__init__(regressor=regressor,
                      func=self._scale,
                      inverse_func=self._unscale)
示例#10
0
    # Check the statistic has a working __str__ and name method
    assert isinstance(str(stat), str)

    if isinstance(stat, stats.Univariate):
        assert isinstance(stat.name, str)


@pytest.mark.parametrize(
    'stat, func',
    [(stats.Kurtosis(bias=True), sp_stats.kurtosis),
     (stats.Kurtosis(bias=False),
      functools.partial(sp_stats.kurtosis, bias=False)),
     (stats.Mean(), statistics.mean), (stats.Skew(bias=True), sp_stats.skew),
     (stats.Skew(bias=False), functools.partial(sp_stats.skew, bias=False)),
     (stats.Var(ddof=0), np.var),
     (stats.Var(), functools.partial(np.var, ddof=1))])
def test_univariate(stat, func):

    # Shut up
    np.warnings.filterwarnings('ignore')

    X = [random.random() for _ in range(30)]

    for i, x in enumerate(X):
        stat.update(x)
        if i >= 1:
            assert math.isclose(stat.get(), func(X[:i + 1]), abs_tol=1e-10)


@pytest.mark.parametrize('stat, func',
示例#11
0
 def __init__(self, seed=None):
     super().__init__()
     self.variance = stats.Var()
     self.mean = stats.Mean()
     self.seed = seed
     self._rng = random.Random(seed)
示例#12
0
文件: r2.py 项目: Leo-VK/creme
 def __init__(self):
     self._y_var = stats.Var()
     self._total_sum_of_squares = 0
     self._residual_sum_of_squares = 0
示例#13
0
import copy
import functools
import math
import random

import numpy as np
import pytest

from river import stats


@pytest.mark.parametrize(
    "stat",
    [
        pytest.param(stat, id=stat.__class__.__name__) for stat in
        [stats.Mean(), stats.Var(
            ddof=0), stats.Var(ddof=1)]
    ],
)
def test_add(stat):
    A = copy.deepcopy(stat)
    B = copy.deepcopy(stat)
    C = copy.deepcopy(stat)

    X = [random.random() for _ in range(30)]
    Y = [random.random() for _ in range(30)]
    W = [random.random() for _ in range(30)]

    for x, y, w in zip(X, Y, W):
        A.update(x, w)
        B.update(y, w)
        C.update(x, w).update(y, w)