Пример #1
0
    def __init__(self,
                 optimizer: optim.Optimizer = None,
                 loss: optim.losses.Loss = None,
                 l2=0.,
                 initializer: optim.initializers.Initializer = None,
                 clip_gradient=1e12):
        self.optimizer = optim.SGD() if optimizer is None else copy.deepcopy(
            optimizer)
        self.u_optimizer = optim.SGD() if optimizer is None else copy.deepcopy(
            optimizer)
        self.i_optimizer = optim.SGD() if optimizer is None else copy.deepcopy(
            optimizer)
        self.loss = optim.losses.Squared() if loss is None else loss
        self.l2 = l2

        if initializer is None:
            initializer = optim.initializers.Zeros()
        self.initializer = initializer

        self.clip_gradient = clip_gradient
        self.global_mean = stats.Mean()
        self.u_biases: typing.DefaultDict[
            int, optim.initializers.Initializer] = collections.defaultdict(
                initializer)
        self.i_biases: typing.DefaultDict[
            int, optim.initializers.Initializer] = collections.defaultdict(
                initializer)
Пример #2
0
def roll_dataframe_stats(
    frame: pd.DataFrame,
    window=14,
    min_steps: int = 1,
    callback: Optional[Callable] = None,
    model=None,
    metric: metrics.ClassificationReport = metrics.ClassificationReport()):
    windower = Windowing(frame,
                         window_size=window,
                         adaptive_window=True,
                         adapted_window_size=0)

    step_count = 0
    history = []

    model_copy = copy(model)

    _mean_down = stats.Mean()
    _mean_up = stats.Mean()
    while windower.has_next_observation:
        res = windower.step()
        x = res.to_dict(orient="record")[0]

        y = x.pop("y")
        if model_copy is not None:
            y_pred = boolean_flip(model.predict_one(x))
            model.fit_one(x, y)
            if y_pred != y:
                prob_up = model.predict_proba_one(x)
                prob_values = list(prob_up.values())
                is_false_pct = _mean_down.update(prob_values[0]).get()
                is_true_pct = _mean_up.update(prob_values[1]).get()
                down_msg = f"Probability going DOWNWARDS for incorrect classifications: {is_false_pct}"
                up_msg = f"Probability going UPWARDS for incorrect classifications: {is_true_pct}"
                logger.error(up_msg)
                logger.warning(down_msg)
            metric.update(y_pred, y)
            mod_acc = metric.accuracy
            logger.debug(f"Overall model accuracy: {mod_acc} \n\n")
        if callback is not None:
            history.append(callback(res))
        step_count += 1

    return step_count >= min_steps, history
Пример #3
0
def main():
    def add_hour(x):
        x['hour'] = x['moment'].hour
        return x

    benchmark.benchmark(
        get_X_y=datasets.fetch_bikes,
        n=182470,
        get_pp=lambda:
        (compose.Whitelister('clouds', 'humidity', 'pressure', 'temperature',
                             'wind') +
         (add_hour | feature_extraction.TargetAgg(by=['station', 'hour'],
                                                  how=stats.Mean())
          ) | preprocessing.StandardScaler()),
        models=[
            # ('creme', 'LinReg', linear_model.LinearRegression(
            #     optimizer=optim.VanillaSGD(0.01),
            #     l2=0.
            # )),
            ('creme', 'GLM',
             linear_model.GLMRegressor(optimizer=optim.VanillaSGD(0.01),
                                       l2=0.)),
            ('creme', 'GLM',
             meta.Detrender(
                 linear_model.GLMRegressor(optimizer=optim.VanillaSGD(0.01),
                                           l2=0.))),

            # ('sklearn', 'SGD', compat.CremeRegressorWrapper(
            #     sklearn_estimator=sk_linear_model.SGDRegressor(
            #         learning_rate='constant',
            #         eta0=0.01,
            #         fit_intercept=True,
            #         penalty='none'
            #     ),
            # )),
            # ('sklearn', 'SGD no intercept', compat.CremeRegressorWrapper(
            #     sklearn_estimator=sk_linear_model.SGDRegressor(
            #         learning_rate='constant',
            #         eta0=0.01,
            #         fit_intercept=False,
            #         penalty='none'
            #     ),
            # )),
        ],
        get_metric=metrics.MSE)
Пример #4
0
def main():
    import datetime as dt
    from creme import compose
    from creme import datasets
    from creme import feature_extraction
    from creme import linear_model
    from creme import metrics as metricss
    from creme import preprocessing
    from creme import stats
    from creme import stream

    X_y = datasets.Bikes()
    X_y = stream.simulate_qa(X_y,
                             moment='moment',
                             delay=dt.timedelta(minutes=30))

    def add_time_features(x):
        return {**x, 'hour': x['moment'].hour, 'day': x['moment'].weekday()}

    model = add_time_features
    model |= (compose.Select('clouds', 'humidity', 'pressure', 'temperature',
                             'wind') + feature_extraction.TargetAgg(
                                 by=['station', 'hour'], how=stats.Mean()) +
              feature_extraction.TargetAgg(by='station', how=stats.EWMean()))
    model |= preprocessing.StandardScaler()
    model |= linear_model.LinearRegression()

    metric = metricss.MAE()

    questions = {}

    for i, x, y in X_y:
        # Question
        is_question = y is None
        if is_question:
            y_pred = model.predict_one(x)
            questions[i] = y_pred

        # Answer
        else:
            metric.update(y, questions[i])
            model = model.fit_one(x, y)

            if i >= 30000 and i % 30000 == 0:
                print(i, metric)
Пример #5
0
def load_stats():
    for _, obj in inspect.getmembers(importlib.import_module('creme.stats'),
                                     inspect.isclass):
        try:

            if issubclass(obj, stats.Link):
                yield obj(stats.Shift(1), stats.Mean())
                continue

            sig = inspect.signature(obj)
            yield obj(
                **{
                    param.name:
                    param.default if param.default != param.empty else 1
                    for param in sig.parameters.values()
                })
        except ValueError:
            yield obj()
Пример #6
0
    def __init__(self,
                 n_factors=10,
                 bias_optimizer: optim.Optimizer = None,
                 latent_optimizer: optim.Optimizer = None,
                 loss: optim.losses.Loss = None,
                 l2_bias=0.,
                 l2_latent=0.,
                 weight_initializer: optim.initializers.Initializer = None,
                 latent_initializer: optim.initializers.Initializer = None,
                 clip_gradient=1e12,
                 seed: int = None):

        self.n_factors = n_factors
        self.u_bias_optimizer = optim.SGD(
        ) if bias_optimizer is None else copy.deepcopy(bias_optimizer)
        self.i_bias_optimizer = optim.SGD(
        ) if bias_optimizer is None else copy.deepcopy(bias_optimizer)
        self.u_latent_optimizer = optim.SGD(
        ) if latent_optimizer is None else copy.deepcopy(latent_optimizer)
        self.i_latent_optimizer = optim.SGD(
        ) if latent_optimizer is None else copy.deepcopy(latent_optimizer)
        self.loss = optim.losses.Squared() if loss is None else loss
        self.l2_bias = l2_bias
        self.l2_latent = l2_latent

        if weight_initializer is None:
            weight_initializer = optim.initializers.Zeros()
        self.weight_initializer = weight_initializer

        if latent_initializer is None:
            latent_initializer = optim.initializers.Normal(sigma=.1, seed=seed)
        self.latent_initializer = latent_initializer

        self.clip_gradient = clip_gradient
        self.seed = seed
        self.global_mean = stats.Mean()

        self.u_biases = collections.defaultdict(weight_initializer)
        self.i_biases = collections.defaultdict(weight_initializer)

        random_latents = functools.partial(self.latent_initializer,
                                           shape=self.n_factors)
        self.u_latents = collections.defaultdict(random_latents)
        self.i_latents = collections.defaultdict(random_latents)
Пример #7
0
 def __init__(self, regressor: base.Regressor, window_size: int = None):
     self.regressor = regressor
     self.mean = stats.Mean() if window_size is None else stats.RollingMean(
         window_size)
Пример #8
0
def get_all_estimators():

    ignored = (Creme2SKLBase, SKL2CremeBase, compat.PyTorch2CremeRegressor,
               compose.FuncTransformer, compose.Pipeline,
               ensemble.StackingBinaryClassifier, feature_extraction.Agg,
               feature_extraction.TargetAgg, feature_extraction.Differ,
               feature_selection.PoissonInclusion, imblearn.RandomOverSampler,
               imblearn.RandomUnderSampler, imblearn.RandomSampler,
               impute.PreviousImputer, impute.StatImputer,
               linear_model.FFMClassifier, linear_model.FFMRegressor,
               linear_model.FMClassifier, linear_model.FMRegressor,
               linear_model.HOFMClassifier, linear_model.HOFMRegressor,
               linear_model.SoftmaxRegression, meta.PredClipper,
               meta.TransformedTargetRegressor, multioutput.ClassifierChain,
               multioutput.RegressorChain, preprocessing.OneHotEncoder,
               reco.Baseline, reco.BiasedMF, reco.FunkMF, reco.RandomNormal,
               time_series.Detrender, time_series.GroupDetrender,
               time_series.SNARIMAX)

    def is_estimator(obj):
        return inspect.isclass(obj) and issubclass(obj, base.Estimator)

    for submodule in importlib.import_module('creme').__all__:

        if submodule == 'base':
            continue

        for _, obj in inspect.getmembers(
                importlib.import_module(f'creme.{submodule}'), is_estimator):

            if issubclass(obj, ignored):
                continue

            elif issubclass(obj, dummy.StatisticRegressor):
                inst = obj(statistic=stats.Mean())

            elif issubclass(obj, meta.BoxCoxRegressor):
                inst = obj(regressor=linear_model.LinearRegression())

            elif issubclass(obj, tree.RandomForestClassifier):
                inst = obj()

            elif issubclass(obj, ensemble.BaggingClassifier):
                inst = obj(linear_model.LogisticRegression())

            elif issubclass(obj, ensemble.BaggingRegressor):
                inst = obj(linear_model.LinearRegression())

            elif issubclass(obj, ensemble.AdaBoostClassifier):
                inst = obj(linear_model.LogisticRegression())

            elif issubclass(obj, ensemble.HedgeRegressor):
                inst = obj([
                    preprocessing.StandardScaler()
                    | linear_model.LinearRegression(intercept_lr=.1),
                    preprocessing.StandardScaler()
                    | linear_model.PARegressor(),
                ])

            elif issubclass(obj, feature_selection.SelectKBest):
                inst = obj(similarity=stats.PearsonCorrelation())

            elif issubclass(obj, linear_model.LinearRegression):
                inst = preprocessing.StandardScaler() | obj(intercept_lr=.1)

            elif issubclass(obj, linear_model.PARegressor):
                inst = preprocessing.StandardScaler() | obj()

            elif issubclass(obj, multiclass.OneVsRestClassifier):
                inst = obj(binary_classifier=linear_model.LogisticRegression())

            else:
                inst = obj()

            yield inst
Пример #9
0
    assert isinstance(pickle.loads(pickle.dumps(stat)), stat.__class__)
    assert isinstance(copy.deepcopy(stat), stat.__class__)

    # Check the statistic has a working __str__ and name method
    assert isinstance(str(stat), str)

    if isinstance(stat, stats.Univariate):
        assert isinstance(stat.name, str)


@pytest.mark.parametrize(
    'stat, func',
    [(stats.Kurtosis(bias=True), sp_stats.kurtosis),
     (stats.Kurtosis(bias=False),
      functools.partial(sp_stats.kurtosis, bias=False)),
     (stats.Mean(), statistics.mean), (stats.Skew(bias=True), sp_stats.skew),
     (stats.Skew(bias=False), functools.partial(sp_stats.skew, bias=False)),
     (stats.Var(ddof=0), np.var),
     (stats.Var(), functools.partial(np.var, ddof=1))])
def test_univariate(stat, func):

    # Shut up
    np.warnings.filterwarnings('ignore')

    X = [random.random() for _ in range(30)]

    for i, x in enumerate(X):
        stat.update(x)
        try:
            assert math.isclose(stat.get(), func(X[:i + 1]), abs_tol=1e-10)
        except AssertionError:
Пример #10
0
 def __init__(self):
     self._mean = stats.Mean()
Пример #11
0
def get_all_estimators():

    ignored = (CremeBaseWrapper, SKLBaseWrapper, base.Wrapper,
               compose.FuncTransformer, ensemble.StackingBinaryClassifier,
               feature_extraction.Agg, feature_extraction.TargetAgg,
               feature_extraction.Differ, linear_model.FMRegressor,
               linear_model.SoftmaxRegression, multioutput.ClassifierChain,
               multioutput.RegressorChain, naive_bayes.BernoulliNB,
               naive_bayes.ComplementNB, preprocessing.OneHotEncoder,
               tree.DecisionTreeClassifier)

    def is_estimator(obj):
        return inspect.isclass(obj) and issubclass(obj, base.Estimator)

    for submodule in importlib.import_module('creme').__all__:

        if submodule == 'base':
            continue

        for name, obj in inspect.getmembers(
                importlib.import_module(f'creme.{submodule}'), is_estimator):

            if issubclass(obj, ignored):
                continue

            if issubclass(obj, dummy.StatisticRegressor):
                inst = obj(statistic=stats.Mean())

            elif issubclass(obj, ensemble.BaggingClassifier):
                inst = obj(linear_model.LogisticRegression())

            elif issubclass(obj, ensemble.BaggingRegressor):
                inst = obj(linear_model.LinearRegression())

            elif issubclass(obj, ensemble.HedgeRegressor):
                inst = obj([
                    preprocessing.StandardScaler()
                    | linear_model.LinearRegression(intercept_lr=0.1),
                    preprocessing.StandardScaler()
                    | linear_model.PARegressor(),
                ])

            elif issubclass(obj, feature_selection.RandomDiscarder):
                inst = obj(n_to_keep=5)

            elif issubclass(obj, feature_selection.SelectKBest):
                inst = obj(similarity=stats.PearsonCorrelation())

            elif issubclass(obj, linear_model.LinearRegression):
                inst = preprocessing.StandardScaler() | obj(intercept_lr=0.1)

            elif issubclass(obj, linear_model.PARegressor):
                inst = preprocessing.StandardScaler() | obj()

            elif issubclass(obj, multiclass.OneVsRestClassifier):
                inst = obj(binary_classifier=linear_model.LogisticRegression())

            else:
                inst = obj()

            yield inst
Пример #12
0
 def __init__(self, seed=None):
     super().__init__()
     self.variance = stats.Var()
     self.mean = stats.Mean()
     self.seed = seed
     self._rng = random.Random(seed)
Пример #13
0
def roll_dataframe_stats(
    frame: pd.DataFrame,
    window=14,
    min_steps: int = 1,
    callback: Optional[Callable] = None,
    metric: metrics.ClassificationReport = metrics.ClassificationReport()):
    windower = Windowing(frame,
                         window_size=window,
                         adaptive_window=False,
                         adapted_window_size=0)

    # while windower.has_next_observation:
    #     res = windower.step()
    #     x = res.to_dict(orient="record")[0]
    #     y = x.pop("y")

    #     if model_copy is not None:
    #         y_pred = boolean_flip(model.predict_one(x))
    #         model.fit_one(x, y)
    #         if y_pred != y:
    #             prob_up = model.predict_proba_one(x)
    #             prob_values = list(prob_up.values())
    #    window=14,
    #                      min_steps: int = 1,
    #                      callback: Optional[Callable] = None,
    #                      metric: metrics.ClassificationReport = metrics.ClassificationReport()):

    step_count = 0
    history = []

    model = None
    model_copy = copy(model)

    _mean_down = stats.Mean()
    _mean_up = stats.Mean()
    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    has_train = False
    model = ExactGPModel(torch.tensor([[]]), torch.tensor([]), likelihood)
    optimizer = torch.optim.Adam(
        [
            # Includes GaussianLikelihood parameters
            {
                'params': model.parameters()
            },
        ],
        lr=0.1)
    prior_training = None
    while windower.has_next_observation:
        if not windower.is_between_bounds:
            windower.step(incr_only=True)
            continue
        res = windower.step()
        train_x = torch.tensor(res.values.astype(np.float32))
        y = res.pop("y")
        train_y = torch.tensor(y.values.astype(np.float32))
        print(train_x.size())
        print(train_y.size())
        if has_train is False:
            model = ExactGPModel(train_x, train_y, likelihood)
            model.train()
            likelihood.train()
            has_train = True
        else:
            model.eval()
            likelihood.eval()
            logger.warning(model)
            predicted = model(train_x)
            model = model.get_fantasy_model(train_x, train_y)

        model.train()
        likelihood.train()
        optimizer = torch.optim.Adam(
            [
                # Includes GaussianLikelihood parameters
                {
                    'params': model.parameters()
                },
            ],
            lr=0.1)

        prior_information = train_x
        training_iter = 5
        for i in range(training_iter):
            optimizer.zero_grad()
            output = model(train_x)
            mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
            loss = -mll(output, train_y)
            loss.backward()
            print('Iter %d/%d - Loss: %.3f   lengthscale: %.3f   noise: %.3f' %
                  (i + 1, training_iter, loss.item(),
                   model.covar_module.base_kernel.lengthscale.item(),
                   model.likelihood.noise.item()))
            optimizer.step()

    return step_count >= min_steps, history
Пример #14
0
    assert isinstance(pickle.loads(pickle.dumps(stat)), stat.__class__)
    assert isinstance(copy.deepcopy(stat), stat.__class__)

    # Check the statistic has a working __str__ and name method
    assert isinstance(str(stat), str)

    if isinstance(stat, stats.Univariate):
        assert isinstance(stat.name, str)


@pytest.mark.parametrize(
    'stat, func',
    [
        (stats.Kurtosis(bias=True), sp_stats.kurtosis),
        (stats.Kurtosis(bias=False), functools.partial(sp_stats.kurtosis, bias=False)),
        (stats.Mean(), statistics.mean),
        (stats.Skew(bias=True), sp_stats.skew),
        (stats.Skew(bias=False), functools.partial(sp_stats.skew, bias=False)),
        (stats.Var(ddof=0), np.var),
        (stats.Var(), functools.partial(np.var, ddof=1))
    ]
)
def test_univariate(stat, func):

    # Shut up
    np.warnings.filterwarnings('ignore')

    X = [random.random() for _ in range(30)]

    for i, x in enumerate(X):
        stat.update(x)
Пример #15
0
 def _default_params(cls):
     return {'statistic': stats.Mean()}