def __init__(self, n_factors=10, optimizer: optim.Optimizer = None, loss: optim.losses.Loss = None, l2=0., initializer: optim.initializers.Initializer = None, clip_gradient=1e12, seed: int = None): self.n_factors = n_factors self.u_optimizer = optim.SGD() if optimizer is None else copy.deepcopy( optimizer) self.i_optimizer = optim.SGD() if optimizer is None else copy.deepcopy( optimizer) self.loss = optim.losses.Squared() if loss is None else loss self.l2 = l2 if initializer is None: initializer = optim.initializers.Normal(mu=0., sigma=.1, seed=seed) self.initializer = initializer self.clip_gradient = clip_gradient self.seed = seed random_latents = functools.partial(self.initializer, shape=self.n_factors) self.u_latents: typing.DefaultDict[ int, optim.initializers.Initializer] = collections.defaultdict( random_latents) self.i_latents: typing.DefaultDict[ int, optim.initializers.Initializer] = collections.defaultdict( random_latents)
def __init__(self, optimizer: optim.Optimizer = None, loss: optim.losses.Loss = None, l2=0., initializer: optim.initializers.Initializer = None, clip_gradient=1e12): self.optimizer = optim.SGD() if optimizer is None else copy.deepcopy( optimizer) self.u_optimizer = optim.SGD() if optimizer is None else copy.deepcopy( optimizer) self.i_optimizer = optim.SGD() if optimizer is None else copy.deepcopy( optimizer) self.loss = optim.losses.Squared() if loss is None else loss self.l2 = l2 if initializer is None: initializer = optim.initializers.Zeros() self.initializer = initializer self.clip_gradient = clip_gradient self.global_mean = stats.Mean() self.u_biases: typing.DefaultDict[ int, optim.initializers.Initializer] = collections.defaultdict( initializer) self.i_biases: typing.DefaultDict[ int, optim.initializers.Initializer] = collections.defaultdict( initializer)
def __init__(self, n_factors, weight_optimizer, latent_optimizer, int_weight_optimizer, loss, sample_normalization, l1_weight, l2_weight, l1_latent, l2_latent, intercept, intercept_lr, weight_initializer, latent_initializer, clip_gradient, seed): super().__init__( n_factors=n_factors, weight_optimizer=weight_optimizer, latent_optimizer=latent_optimizer, loss=loss, sample_normalization=sample_normalization, l1_weight=l1_weight, l2_weight=l2_weight, l1_latent=l1_latent, l2_latent=l2_latent, intercept=intercept, intercept_lr=intercept_lr, weight_initializer=weight_initializer, latent_initializer=latent_initializer, clip_gradient=clip_gradient, seed=seed ) if int_weight_optimizer is None: self.int_weight_optimizer = optim.SGD(0.01) else: self.int_weight_optimizer = int_weight_optimizer one = functools.partial(float, 1) self.interaction_weights = collections.defaultdict(one)
def __init__(self, n_factors=10, bias_optimizer: optim.Optimizer = None, latent_optimizer: optim.Optimizer = None, loss: optim.losses.Loss = None, l2_bias=0., l2_latent=0., weight_initializer: optim.initializers.Initializer = None, latent_initializer: optim.initializers.Initializer = None, clip_gradient=1e12, seed: int = None): self.n_factors = n_factors self.u_bias_optimizer = optim.SGD( ) if bias_optimizer is None else copy.deepcopy(bias_optimizer) self.i_bias_optimizer = optim.SGD( ) if bias_optimizer is None else copy.deepcopy(bias_optimizer) self.u_latent_optimizer = optim.SGD( ) if latent_optimizer is None else copy.deepcopy(latent_optimizer) self.i_latent_optimizer = optim.SGD( ) if latent_optimizer is None else copy.deepcopy(latent_optimizer) self.loss = optim.losses.Squared() if loss is None else loss self.l2_bias = l2_bias self.l2_latent = l2_latent if weight_initializer is None: weight_initializer = optim.initializers.Zeros() self.weight_initializer = weight_initializer if latent_initializer is None: latent_initializer = optim.initializers.Normal(sigma=.1, seed=seed) self.latent_initializer = latent_initializer self.clip_gradient = clip_gradient self.seed = seed self.global_mean = stats.Mean() self.u_biases = collections.defaultdict(weight_initializer) self.i_biases = collections.defaultdict(weight_initializer) random_latents = functools.partial(self.latent_initializer, shape=self.n_factors) self.u_latents = collections.defaultdict(random_latents) self.i_latents = collections.defaultdict(random_latents)
def __init__(self, l2=.0, clip_gradient=1e12, initializer: optim.initializers.Initializer = None): super().__init__( optimizer=optim.SGD(1), intercept_lr=1, loss=optim.losses.Hinge(threshold=0.), l2=l2, clip_gradient=clip_gradient, initializer=initializer )
def __init__(self, optimizer: optim.Optimizer = None, loss: optim.losses.RegressionLoss = None, l2=.0, intercept=0., intercept_lr: typing.Union[optim.schedulers.Scheduler, float] = .01, clip_gradient=1e+12, initializer: optim.initializers.Initializer = None): super().__init__( optimizer=optim.SGD(.01) if optimizer is None else optimizer, loss=optim.losses.Squared() if loss is None else loss, intercept=intercept, intercept_lr=intercept_lr, l2=l2, clip_gradient=clip_gradient, initializer=initializer if initializer else optim.initializers.Zeros() )
def __init__(self, optimizer: optim.Optimizer = None, loss: optim.losses.MultiClassLoss = None, l2=0): if optimizer is None: optimizer = optim.SGD(0.01) new_optimizer = functools.partial(copy.deepcopy, optimizer) self.optimizers = collections.defaultdict( new_optimizer) # type: ignore self.loss = optim.losses.CrossEntropy() if loss is None else loss self.l2 = l2 self.weights = collections.defaultdict( functools.partial(collections.defaultdict, float)) # type: ignore
def test_log_reg_sklearn_coherence(): """Checks that the sklearn and creme implementations produce the same results.""" ss = preprocessing.StandardScaler() cr = lm.LogisticRegression(optimizer=optim.SGD(.01)) sk = sklm.SGDClassifier(learning_rate='constant', eta0=.01, alpha=.0, loss='log') for x, y in datasets.Bananas(): x = ss.fit_one(x).transform_one(x) cr.fit_one(x, y) sk.partial_fit([list(x.values())], [y], classes=[False, True]) for i, w in enumerate(cr.weights.values()): assert math.isclose(w, sk.coef_[0][i]) assert math.isclose(cr.intercept, sk.intercept_[0])
def test_lin_reg_sklearn_coherence(): """Checks that the sklearn and creme implementations produce the same results.""" class SquaredLoss: """sklearn removes the leading 2 from the gradient of the squared loss.""" def gradient(self, y_true, y_pred): return y_pred - y_true ss = preprocessing.StandardScaler() cr = lm.LinearRegression(optimizer=optim.SGD(.01), loss=SquaredLoss()) sk = sklm.SGDRegressor(learning_rate='constant', eta0=.01, alpha=.0) for x, y in datasets.TrumpApproval(): x = ss.fit_one(x).transform_one(x) cr.fit_one(x, y) sk.partial_fit([list(x.values())], [y]) for i, w in enumerate(cr.weights.values()): assert math.isclose(w, sk.coef_[i]) assert math.isclose(cr.intercept, sk.intercept_[0])
y = [[y]] self.model.train_on_batch(x, y) return self class KerasRegressor(KerasModel, base.Regressor): def predict_one(self, x): x = [[list(x.values())]] return self.model.predict_on_batch(x)[0][0] KERAS_EPS = K.epsilon() LR = .01 OPTIMIZERS = { 'SGD': (optim.SGD(lr=LR), functools.partial(torch.optim.SGD, lr=LR), optimizers.SGD(lr=LR)), 'Adam': (optim.Adam(lr=LR, beta_1=.9, beta_2=.999, eps=KERAS_EPS), functools.partial(torch.optim.Adam, lr=LR, betas=(.9, .999), eps=KERAS_EPS), optimizers.Adam(lr=LR, beta_1=.9, beta_2=.999)), 'AdaDelta': (optim.AdaDelta(rho=.95, eps=KERAS_EPS), functools.partial(torch.optim.Adadelta, rho=.95, eps=KERAS_EPS), optimizers.Adadelta(rho=.95)), 'AdaGrad': (optim.AdaGrad(lr=LR, eps=KERAS_EPS), functools.partial(torch.optim.Adagrad, lr=LR), optimizers.Adagrad(lr=LR)), 'Momentum': (optim.Momentum(lr=LR, rho=.1),
(lm.LinearRegression, datasets.TrumpApproval()), (lm.LogisticRegression, datasets.Bananas()) ] for optimizer, initializer in itertools.product( [ optim.AdaBound(), optim.AdaDelta(), optim.AdaGrad(), optim.AdaMax(), optim.Adam(), optim.AMSGrad(), # TODO: check momentum optimizers # optim.Momentum(), # optim.NesterovMomentum(), optim.RMSProp(), optim.SGD() ], [ optim.initializers.Zeros(), optim.initializers.Normal(mu=0, sigma=1, seed=42) ] ) ] ) @pytest.mark.slow def test_finite_differences(lm, dataset): """Checks the gradient of a linear model via finite differences. References ---------- [^1]: [How to test gradient implementations](https://timvieira.github.io/blog/post/2017/04/21/how-to-test-gradient-implementations/) [^2]: [Stochastic Gradient Descent Tricks](https://cilvr.cs.nyu.edu/diglib/lsml/bottou-sgd-tricks-2012.pdf)
def __init__(self, data_collector): dc = data_collector data = dc.get_data_frame() metric = metrics.MAE() # delete NA examples data = data.dropna() # shuffle data X_y = data.sample(frac=1).reset_index(drop=True) data = X_y[['x', 'y', 'theta']].to_dict('records') target_1 = X_y[['sensor_1']] target_2 = X_y[['sensor_3']] target_3 = X_y[['sensor_5']] target_4 = X_y[['sensor_7']] print('constructing models') # construct our pipeline model_1 = Pipeline([ ("scale", StandardScaler()), ("learn", ensemble.HedgeRegressor([ linear_model.LinearRegression(optim.SGD()), linear_model.LinearRegression(optim.RMSProp()), linear_model.LinearRegression(optim.Adam()) ])) ]) # construct our pipeline model_2 = Pipeline([ ("scale", StandardScaler()), ("learn", ensemble.HedgeRegressor([ linear_model.LinearRegression(optim.SGD()), linear_model.LinearRegression(optim.RMSProp()), linear_model.LinearRegression(optim.Adam()) ])) ]) # construct our pipeline model_3 = Pipeline([ ("scale", StandardScaler()), ("learn", ensemble.HedgeRegressor([ linear_model.LinearRegression(optim.SGD()), linear_model.LinearRegression(optim.RMSProp()), linear_model.LinearRegression(optim.Adam()) ])) ]) # construct our pipeline model_4 = Pipeline([ ("scale", StandardScaler()), ("learn", ensemble.HedgeRegressor([ linear_model.LinearRegression(optim.SGD()), linear_model.LinearRegression(optim.RMSProp()), linear_model.LinearRegression(optim.Adam()) ])) ]) print('start training') for x, y_1, y_2, y_3, y_4 in zip( data, target_1.values, target_2.values, target_3.values, target_4.values, ): model_1, y_pred_1 = self._update_model(model_1, x, y_1) model_2, y_pred_2 = self._update_model(model_2, x, y_2) model_3, y_pred_3 = self._update_model(model_3, x, y_3) model_4, y_pred_4 = self._update_model(model_4, x, y_4) self.models = [model_1, model_2, model_3, model_4] print('done...')
type=int, required=True, help="# of feature columns in the CSV file (excluding class column") args = vars(ap.parse_args()) # construct our data dictionary which maps the data types of the # columns in the CSV file to built-in data types print("[INFO] building column names...") types = {"feat_{}".format(i): float for i in range(0, args["cols"])} types["class"] = int # create a CSV data generator for the extracted Keras features dataset = stream.iter_csv(args["csv"], target="class", converters=types) # construct our pipeline (maybe set to .0000003) model = Pipeline(StandardScaler(), LogisticRegression(optimizer=optim.SGD(.0000001))) # initialize our metric print("[INFO] starting training...") metric = Accuracy() # loop over the dataset for (i, (X, y)) in enumerate(dataset): # make predictions on the current set of features, train the # model on the features, and then update our metric preds = model.predict_one(X) model = model.fit_one(X, y) metric = metric.update(y, preds) print("INFO] update {} - {}".format(i, metric)) if i == 2500: break