Пример #1
0
 def _unit_test_params(cls):
     yield {
         "models": [
             compose.Pipeline(
                 preprocessing.StandardScaler(),
                 linear_model.LinearRegression(optimizer=optim.SGD(
                     lr=1e-2)),
             ),
             compose.Pipeline(
                 preprocessing.StandardScaler(),
                 linear_model.LinearRegression(optimizer=optim.SGD(
                     lr=1e-1)),
             ),
         ],
         "metric":
         metrics.MAE(),
     }
     yield {
         "models": [
             compose.Pipeline(
                 preprocessing.StandardScaler(),
                 linear_model.LinearRegression(optimizer=optim.SGD(lr=lr)),
             ) for lr in [1e-4, 1e-3, 1e-2, 1e-1]
         ],
         "metric":
         metrics.MAE(),
     }
Пример #2
0
    def __init__(
        self,
        optimizer: optim.Optimizer = None,
        loss: optim.losses.Loss = None,
        l2=0.0,
        initializer: optim.initializers.Initializer = None,
        clip_gradient=1e12,
        seed=None,
    ):
        super().__init__(seed=seed)

        self.optimizer = optim.SGD() if optimizer is None else copy.deepcopy(
            optimizer)
        self.u_optimizer = (optim.SGD()
                            if optimizer is None else copy.deepcopy(optimizer))
        self.i_optimizer = (optim.SGD()
                            if optimizer is None else copy.deepcopy(optimizer))
        self.loss = optim.losses.Squared() if loss is None else loss
        self.l2 = l2

        if initializer is None:
            initializer = optim.initializers.Zeros()
        self.initializer = initializer

        self.clip_gradient = clip_gradient
        self.global_mean = stats.Mean()
        self.u_biases: typing.DefaultDict[
            int, optim.initializers.Initializer] = collections.defaultdict(
                initializer)
        self.i_biases: typing.DefaultDict[
            int, optim.initializers.Initializer] = collections.defaultdict(
                initializer)
Пример #3
0
    def __init__(self,
                 n_factors=10,
                 optimizer: optim.Optimizer = None,
                 loss: optim.losses.Loss = None,
                 l2=0.,
                 initializer: optim.initializers.Initializer = None,
                 clip_gradient=1e12,
                 seed: int = None):

        self.n_factors = n_factors
        self.u_optimizer = optim.SGD() if optimizer is None else copy.deepcopy(
            optimizer)
        self.i_optimizer = optim.SGD() if optimizer is None else copy.deepcopy(
            optimizer)
        self.loss = optim.losses.Squared() if loss is None else loss
        self.l2 = l2

        if initializer is None:
            initializer = optim.initializers.Normal(mu=0., sigma=.1, seed=seed)
        self.initializer = initializer

        self.clip_gradient = clip_gradient
        self.seed = seed

        random_latents = functools.partial(self.initializer,
                                           shape=self.n_factors)
        self.u_latents: typing.DefaultDict[
            int, optim.initializers.Initializer] = collections.defaultdict(
                random_latents)
        self.i_latents: typing.DefaultDict[
            int, optim.initializers.Initializer] = collections.defaultdict(
                random_latents)
Пример #4
0
    def __init__(
        self,
        n_factors=10,
        bias_optimizer: optim.Optimizer = None,
        latent_optimizer: optim.Optimizer = None,
        loss: optim.losses.Loss = None,
        l2_bias=0.0,
        l2_latent=0.0,
        weight_initializer: optim.initializers.Initializer = None,
        latent_initializer: optim.initializers.Initializer = None,
        clip_gradient=1e12,
        seed: int = None,
    ):

        self.n_factors = n_factors
        self.u_bias_optimizer = (
            optim.SGD() if bias_optimizer is None else copy.deepcopy(bias_optimizer)
        )
        self.i_bias_optimizer = (
            optim.SGD() if bias_optimizer is None else copy.deepcopy(bias_optimizer)
        )
        self.u_latent_optimizer = (
            optim.SGD() if latent_optimizer is None else copy.deepcopy(latent_optimizer)
        )
        self.i_latent_optimizer = (
            optim.SGD() if latent_optimizer is None else copy.deepcopy(latent_optimizer)
        )
        self.loss = optim.losses.Squared() if loss is None else loss
        self.l2_bias = l2_bias
        self.l2_latent = l2_latent

        if weight_initializer is None:
            weight_initializer = optim.initializers.Zeros()
        self.weight_initializer = weight_initializer

        if latent_initializer is None:
            latent_initializer = optim.initializers.Normal(sigma=0.1, seed=seed)
        self.latent_initializer = latent_initializer

        self.clip_gradient = clip_gradient
        self.seed = seed
        self.global_mean = stats.Mean()

        self.u_biases: typing.DefaultDict[
            int, optim.initializers.Initializer
        ] = collections.defaultdict(weight_initializer)
        self.i_biases: typing.DefaultDict[
            int, optim.initializers.Initializer
        ] = collections.defaultdict(weight_initializer)

        random_latents = functools.partial(
            self.latent_initializer, shape=self.n_factors
        )
        self.u_latents: typing.DefaultDict[
            int, optim.initializers.Initializer
        ] = collections.defaultdict(random_latents)
        self.i_latents: typing.DefaultDict[
            int, optim.initializers.Initializer
        ] = collections.defaultdict(random_latents)
Пример #5
0
def get_model():
    extract_features = compose.TransformerUnion(
    get_ordinal_date,
    get_day_distances
    )
    
    model = (
     extract_features |
     time_series.SNARIMAX(
        p=0,
        d=0,
        q=0,
        m=7,
        sp=3,
        sq=0,
        regressor=(
            preprocessing.StandardScaler() |
            linear_model.LinearRegression(
                intercept_init=0,
                intercept_lr=0.3,
                optimizer=optim.SGD(0.01)
                )
            )
        )
    )
    return model
Пример #6
0
    def __init__(self, n_factors, weight_optimizer, latent_optimizer, int_weight_optimizer, loss,
                 sample_normalization, l1_weight, l2_weight, l1_latent, l2_latent, intercept,
                 intercept_lr, weight_initializer, latent_initializer, clip_gradient, seed):
        super().__init__(
            n_factors=n_factors,
            weight_optimizer=weight_optimizer,
            latent_optimizer=latent_optimizer,
            loss=loss,
            sample_normalization=sample_normalization,
            l1_weight=l1_weight,
            l2_weight=l2_weight,
            l1_latent=l1_latent,
            l2_latent=l2_latent,
            intercept=intercept,
            intercept_lr=intercept_lr,
            weight_initializer=weight_initializer,
            latent_initializer=latent_initializer,
            clip_gradient=clip_gradient,
            seed=seed
        )
        if int_weight_optimizer is None:
            self.int_weight_optimizer = optim.SGD(0.01)
        else:
            self.int_weight_optimizer = int_weight_optimizer

        one = functools.partial(float, 1)
        self.interaction_weights = collections.defaultdict(one)
Пример #7
0
 def __init__(self):
     optimizer = optim.SGD(0.1)
     self.model = compose.Pipeline(
         preprocessing.StandardScaler(),
         linear_model.LogisticRegression(optimizer))
     self.metric = metrics.Accuracy()
     self.count = 0
Пример #8
0
 def _unit_test_params(cls):
     return {
         "models": [
             compose.Pipeline(
                 preprocessing.StandardScaler(),
                 linear_model.LinearRegression(optimizer=optim.SGD(
                     lr=0.01)),
             ),
             compose.Pipeline(
                 preprocessing.StandardScaler(),
                 linear_model.LinearRegression(optimizer=optim.SGD(lr=0.1)),
             ),
         ],
         "metric":
         metrics.MAE(),
     }
Пример #9
0
 def __init__(self,
              l2=.0,
              clip_gradient=1e12,
              initializer: optim.initializers.Initializer = None):
     super().__init__(optimizer=optim.SGD(1),
                      intercept_lr=1,
                      loss=optim.losses.Hinge(threshold=0.),
                      l2=l2,
                      clip_gradient=clip_gradient,
                      initializer=initializer)
Пример #10
0
 def __init__(self,
              optimizer: optim.Optimizer = None,
              loss: optim.losses.MultiClassLoss = None,
              l2=0):
     if optimizer is None:
         optimizer = optim.SGD(0.01)
     new_optimizer = functools.partial(copy.deepcopy, optimizer)
     self.optimizers = collections.defaultdict(
         new_optimizer)  # type: ignore
     self.loss = optim.losses.CrossEntropy() if loss is None else loss
     self.l2 = l2
     self.weights = collections.defaultdict(
         functools.partial(collections.defaultdict, float))  # type: ignore
Пример #11
0
 def __init__(self, optimizer: optim.Optimizer = None, loss: optim.losses.RegressionLoss = None,
              l2=.0, intercept=0.,
              intercept_lr: typing.Union[optim.schedulers.Scheduler, float] = .01,
              clip_gradient=1e+12, initializer: optim.initializers.Initializer = None):
     super().__init__(
         optimizer=optim.SGD(.01) if optimizer is None else optimizer,
         loss=optim.losses.Squared() if loss is None else loss,
         intercept=intercept,
         intercept_lr=intercept_lr,
         l2=l2,
         clip_gradient=clip_gradient,
         initializer=initializer if initializer else optim.initializers.Zeros()
     )
Пример #12
0
 def __init__(
     self,
     hidden_dims,
     activations,
     loss: optim.losses.Loss = None,
     optimizer: optim.Optimizer = None,
     seed: int = None,
 ):
     super().__init__(
         hidden_dims=hidden_dims,
         activations=activations,
         loss=loss or optim.losses.Squared(),
         optimizer=optimizer or optim.SGD(0.01),
         seed=seed,
     )
Пример #13
0
    def __init__(self,
                 optimizer: optim.Optimizer = None,
                 loss: optim.losses.BinaryLoss = None,
                 l2=.0,
                 intercept_init=0.,
                 intercept_lr: typing.Union[float,
                                            optim.schedulers.Scheduler] = .01,
                 clip_gradient=1e12,
                 initializer: optim.initializers.Initializer = None):

        super().__init__(
            optimizer=optim.SGD(.01) if optimizer is None else optimizer,
            loss=optim.losses.Log() if loss is None else loss,
            intercept_init=intercept_init,
            intercept_lr=intercept_lr,
            l2=l2,
            clip_gradient=clip_gradient,
            initializer=initializer
            if initializer else optim.initializers.Zeros())
Пример #14
0
def test_log_reg_sklearn_coherence():
    """Checks that the sklearn and river implementations produce the same results."""

    ss = preprocessing.StandardScaler()
    cr = lm.LogisticRegression(optimizer=optim.SGD(.01))
    sk = sklm.SGDClassifier(learning_rate='constant',
                            eta0=.01,
                            alpha=.0,
                            loss='log')

    for x, y in datasets.Bananas():
        x = ss.learn_one(x).transform_one(x)
        cr.learn_one(x, y)
        sk.partial_fit([list(x.values())], [y], classes=[False, True])

    for i, w in enumerate(cr.weights.values()):
        assert math.isclose(w, sk.coef_[0][i])

    assert math.isclose(cr.intercept, sk.intercept_[0])
Пример #15
0
 def __init__(
     self,
     nu=0.1,
     optimizer: optim.Optimizer = None,
     intercept_lr: typing.Union[optim.schedulers.Scheduler, float] = 0.01,
     clip_gradient=1e12,
     initializer: optim.initializers.Initializer = None,
 ):
     super().__init__(
         optimizer=optim.SGD(0.01) if optimizer is None else optimizer,
         loss=optim.losses.Hinge(),
         intercept_init=1.0,
         intercept_lr=intercept_lr,
         l2=nu / 2,
         clip_gradient=clip_gradient,
         initializer=initializer
         if initializer else optim.initializers.Zeros(),
     )
     self.nu = nu
Пример #16
0
def test_lin_reg_sklearn_coherence():
    """Checks that the sklearn and river implementations produce the same results."""
    class SquaredLoss:
        """sklearn removes the leading 2 from the gradient of the squared loss."""
        def gradient(self, y_true, y_pred):
            return y_pred - y_true

    ss = preprocessing.StandardScaler()
    cr = lm.LinearRegression(optimizer=optim.SGD(.01), loss=SquaredLoss())
    sk = sklm.SGDRegressor(learning_rate='constant', eta0=.01, alpha=.0)

    for x, y in datasets.TrumpApproval():
        x = ss.learn_one(x).transform_one(x)
        cr.learn_one(x, y)
        sk.partial_fit([list(x.values())], [y])

    for i, w in enumerate(cr.weights.values()):
        assert math.isclose(w, sk.coef_[i])

    assert math.isclose(cr.intercept, sk.intercept_[0])
Пример #17
0
                     id=f'{lm.__name__} - {optimizer} - {initializer}')
        for lm, dataset in [(lm.LinearRegression, datasets.TrumpApproval()
                             ), (lm.LogisticRegression, datasets.Bananas())]
        for optimizer, initializer in itertools.product(
            [
                optim.AdaBound(),
                optim.AdaDelta(),
                optim.AdaGrad(),
                optim.AdaMax(),
                optim.Adam(),
                optim.AMSGrad(),
                # TODO: check momentum optimizers
                # optim.Momentum(),
                # optim.NesterovMomentum(),
                optim.RMSProp(),
                optim.SGD()
            ],
            [
                optim.initializers.Zeros(),
                optim.initializers.Normal(mu=0, sigma=1, seed=42)
            ])
    ])
@pytest.mark.slow
def test_finite_differences(lm, dataset):
    """Checks the gradient of a linear model via finite differences.

    References
    ----------
    [^1]: [How to test gradient implementations](https://timvieira.github.io/blog/post/2017/04/21/how-to-test-gradient-implementations/)
    [^2]: [Stochastic Gradient Descent Tricks](https://cilvr.cs.nyu.edu/diglib/lsml/bottou-sgd-tricks-2012.pdf)
Пример #18
0
from river import linear_model
from river import optim
from river import preprocessing
from sklearn import datasets
from sklearn import metrics
from river import stream

scaler = preprocessing.StandardScaler()
optimizer = optim.SGD(lr=0.01)
log_reg = linear_model.LogisticRegression(optimizer)

y_true = []
y_pred = []

for xi, yi in stream.iter_sklearn_dataset(datasets.load_breast_cancer(), shuffle=True, seed=42):

    # Scale the features
    xi_scaled = scaler.learn_one(xi).transform_one(xi)

    # Test the current model on the new "unobserved" sample
    yi_pred = log_reg.predict_proba_one(xi_scaled)
    # Train the model with the new sample
    log_reg.learn_one(xi_scaled, yi)

    # Store the truth and the prediction
    y_true.append(yi)
    y_pred.append(yi_pred[True])

print(f'ROC AUC: {metrics.roc_auc_score(y_true, y_pred):.3f}')
Пример #19
0


from river import compose
from river import linear_model
from river import metrics
from river import evaluate
from river import preprocessing
from river import optim
from river import stream
import river



model = preprocessing.StandardScaler()
model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))

s2 = stream.iter_array(Xa)
audit = [model.predict_one(i[0]) for i in s2]
plt.imshow(reshape_vals(audit))

s1 = stream.iter_array(Xc, a0.ev(contexts))
for c, v in s1:
    model.learn_one(c, v)

s2 = stream.iter_array(Xa)
audit = [model.predict_one(i[0]) for i in s2]
plt.imshow(reshape_vals(audit))

metric = metrics.RMSE()
evaluate.progressive_val_score(stream.iter_array(Xc, a0.ev(contexts)), model, metric, print_every=int(Xc.shape[0]/20))
Пример #20
0
        y_pred = model.predict_one(x)
        model.learn_one(x, y)

        # Update the error metric
        metric.update(y, y_pred)

        # Store the true value and the prediction
        # dates.append(x['secs_elapsed'])
        y_trues.append(y)
        y_preds.append(y_pred)

    # Plot the results
    fig, ax = plt.subplots(figsize=(10, 6))
    ax.grid(alpha=0.75)
    ax.plot(y_trues, lw=3, color='#2ecc71', alpha=0.8, label='Ground truth')
    ax.plot(y_preds, lw=3, color='#e74c3c', alpha=0.8, label='Prediction')
    ax.legend()
    ax.set_title(metric)
    plt.show()


model = compose.Pipeline(
    ('ordinal_date', compose.FuncTransformer(get_ordinal_date)),
    ('scale', preprocessing.MinMaxScaler()),
    ('lin_reg',
     linear_model.LinearRegression(intercept_lr=0, optimizer=optim.SGD(0.03))),
)

model = time_series.Detrender(regressor=model, window_size=10)

evaluate_model(model)
Пример #21
0
        ) for lm, dataset in [
            (lm.LinearRegression, datasets.TrumpApproval()),
            (lm.LogisticRegression, datasets.Bananas()),
        ] for optimizer, initializer in itertools.product(
            [
                optim.AdaBound(),
                optim.AdaDelta(),
                optim.AdaGrad(),
                optim.AdaMax(),
                optim.Adam(),
                optim.AMSGrad(),
                # TODO: check momentum optimizers
                # optim.Momentum(),
                # optim.NesterovMomentum(),
                optim.RMSProp(),
                optim.SGD(),
            ],
            [
                optim.initializers.Zeros(),
                optim.initializers.Normal(mu=0, sigma=1, seed=42),
            ],
        )
    ],
)
@pytest.mark.slow
def test_finite_differences(lm, dataset):
    """Checks the gradient of a linear model via finite differences.

    References
    ----------
    [^1]: [How to test gradient implementations](https://timvieira.github.io/blog/post/2017/04/21/how-to-test-gradient-implementations/)
Пример #22
0
    def build_model_4snarimax(self):
        if os.path.exists(
                self.pck_filename
        ):  #if model backup exists then load it and update model from start1 to start2
            src_bck = pickle.load(open(self.pck_filename, 'rb'))
            model = src_bck.snarimax_model
            metric = src_bck.snarimax_metric
            self.snarimax_para = src_bck.snarimax_para
            self.snarimax_model = model
            self.snarimax_metric = metric

            start1 = src_bck.data.index[-1]
            start2 = self.data.index[
                -1]  #self.data.index[-self.data.index[-1].weekday()]

        else:  #if model backup does not exist then rebuild model from the start
            p, d, q, m, sp, sd, sq = self.snarimax_para
            extract_features = compose.TransformerUnion(get_ordinal_date)
            model = (
                extract_features | time_series.SNARIMAX(
                    p=p,
                    d=d,
                    q=q,
                    m=m,
                    sp=sp,
                    sd=sd,
                    sq=sq,
                    regressor=(
                        #preprocessing.Normalizer() |
                        preprocessing.AdaptiveStandardScaler(alpha=0.1)
                        | preprocessing.StandardScaler() |

                        #preprocessing.RobustScaler(with_scaling=True) |
                        linear_model.LinearRegression(
                            intercept_init=0,
                            optimizer=optim.SGD(0.0001),  #important parameter
                            #optimizer=optim.AdaDelta(0.8,0.00001), #important parameter
                            #optimizer=optim.AMSGrad(lr=0.01,beta_1=0.8,beta_2=0.1),
                            intercept_lr=0.001))))

            metric = metrics.Rolling(metrics.MSE(), self.dd_historic)
            #metric = metrics.MSE()

            start1 = self.data.index[0]
            start2 = self.data.index[
                -1]  #self.data.index[-self.data.index[-1].weekday()]

        if start1 < start2:
            for t in pd.date_range(start1, start2, freq='D'):
                x, y = self.snarimax_data.loc[t][['ds', 'temp']].values
                y_pred = model.forecast(horizon=1, xs=[x])
                #print(x,y,y_pred[0],y-y_pred[0])
                model = model.learn_one(x, y)
                metric = metric.update(y, y_pred[0])

            self.snarimax_model = model
            self.snarimax_metric = metric
            with open(self.pck_filename, 'wb') as fh:
                pickle.dump(self, fh)

            #for t in pd.date_range(start1, start2):
            #    x = self.snarimax_data.loc[pd.date_range(t-timedelta(self.dd_historic),t)][['ds']].values
            #    y = self.snarimax_data.loc[pd.date_range(t-timedelta(self.dd_historic),t)][['temp']].values
            #    x = np.hstack(x)
            #    y = np.hstack(y)
            #    y_pred = model.forecast(horizon=self.dd_historic+1, xs=x)
            #    for i in range(0,self.dd_historic):
            #        model = model.learn_one(x[i], y[i])
            #        metric = metric.update(y[i], y_pred[i])

        return
Пример #23
0
from river import compose
from river import preprocessing
from river import linear_model
from river import metrics
from river import datasets
from river import optim

optimizer = optim.SGD(0.1)
model = compose.Pipeline(preprocessing.StandardScaler(),
                         linear_model.LogisticRegression(optimizer))

metric = metrics.ROCAUC()
precision = metrics.Precision()

for x, y in datasets.Phishing():
    y_pred = model.predict_proba_one(x)
    model.learn_one(x, y)
    metric.update(y, y_pred)
    precision.update(y, y_pred)

print(metric)
print(precision)
Пример #24
0
import math

import pytest
from sklearn import linear_model as sklm

from river import anomaly, datasets, optim

tests = {
    "Vanilla": (
        {
            "optimizer": optim.SGD(1e-2),
            "nu": 0.5
        },
        {
            "learning_rate": "constant",
            "eta0": 1e-2,
            "nu": 0.5
        },
    ),
    "No intercept": (
        {
            "optimizer": optim.SGD(1e-2),
            "nu": 0.5,
            "intercept_lr": 0.0
        },
        {
            "learning_rate": "constant",
            "eta0": 1e-2,
            "nu": 0.5,
            "fit_intercept": False
        },