class LogisticRegressionWithAdadelta(BaseEstimator, ClassifierMixin): def __init__(self, rho=0.9, regw=.01, epochs=1, rate=1000, class_weight=None): self.rho = rho self.regw = regw self.epochs = epochs self.class_weight = class_weight self.classes_ = None self.X_ = None self.y_ = None self.E_ = None self.Edx_ = None self.w_ = None self.fit_flag_ = False self.train_tracker_ = ClassificationTrainTracker(rate) def _clear_params(self): # All models parameters are set to their original value (see __init__ description) self.classes_ = None self.class_weight_ = None self.X_ = None self.y_ = None self.E_ = None self.Edx_ = None self.w_ = None self.fit_flag_ = False self.train_tracker_.clear() def _update_class_weight(self, _X, _y): if self.class_weight is None: self.class_weight_ = compute_class_weight(_y) else: self.class_weight_ = self.class_weight def _update(self, y, p, x): for idxi, xi in enumerate(x): if xi != 0.0: grad = self.class_weight_[y] * ((p - y) * xi + self.regw * self.w_[idxi]) self.E_[idxi] = self.rho * self.E_[idxi] + (1.0 - self.rho) * grad * grad deltax = - (sqrt(self.Edx_[idxi] + 1e-8) / sqrt(self.E_[idxi] + 1e-8)) * grad self.Edx_[idxi] = self.rho * self.Edx_[idxi] + (1.0 - self.rho) * deltax * deltax self.w_[idxi] += deltax def _train(self, X, y, n_samples, _): iter_idx = np.arange(n_samples) np.random.shuffle(iter_idx) for t, data_idx in enumerate(iter_idx): curr_x = X[data_idx, :] curr_y = y[data_idx] wtx = np.dot(curr_x, self.w_) curr_p = sigmoid(wtx) log_likelihood = logloss(curr_p, curr_y) self.train_tracker_.track(log_likelihood) self._update(curr_y, curr_p, curr_x) def fit(self, X, y): if self.fit_flag_: self._clear_params() X, y = check_X_y(X, y) check_classification_targets(y) self.classes_ = unique_labels(y) self.X_ = X self.y_ = y # setup parameters n_samples, n_features = X.shape if self.E_ is None and self.w_ is None: self.E_ = np.zeros(n_features) self.Edx_ = np.zeros(n_features) self.w_ = np.zeros(n_features) self._update_class_weight(X, y) self.train_tracker_.start_train() for epoch in range(self.epochs): self.train_tracker_.start_epoch(epoch) self._train(X, y, n_samples, n_features) self.train_tracker_.end_epoch() self.train_tracker_.end_train() self.fit_flag_ = True return self def predict(self, X): check_is_fitted(self, ['X_', 'y_']) X = check_array(X) n_samples = X.shape[0] y_test_predict = np.zeros(n_samples) for t in xrange(n_samples): wtx = np.dot(X[t, :], self.w_) p = sigmoid(wtx) y_test_predict[t] = 0. if p < 0.5 else 1. return y_test_predict def raw_predict(self, X): check_is_fitted(self, ['X_', 'y_']) X = check_array(X) y = np.dot(X, self.w_) for idxi in xrange(y.size): y[idxi] = sigmoid(y[idxi]) return y
class FMWithSGD(BaseEstimator, ClassifierMixin): def __init__(self, eta=0.001, k0=True, k1=True, reg0=.0, regw=.0, regv=.0, n_factors=2, epochs=1, rate=10000, class_weight=None): self.eta = eta self.k0 = k0 self.k1 = k1 self.reg0 = reg0 self.regw = regw self.regv = regv self.n_factors = n_factors self.epochs = epochs self.class_weight = class_weight self.classes_ = None self.X_ = None self.y_ = None self.w0_ = None self.w_ = None self.V_ = None self.fit_flag_ = False self.train_tracker_ = ClassificationTrainTracker(rate) def _clear_params(self): # All models parameters are set to their original value (see __init__ description) self.classes_ = None self.class_weight_ = None self.log_likelihood_ = 0 self.loss_ = [] # self.target_ratio_ = 0. self.X_ = None self.y_ = None self.w0_ = None self.w_ = None self.V_ = None self.fit_flag_ = False self.train_tracker_.clear() def _update_class_weight(self, _X, _y): if self.class_weight is None: self.class_weight_ = compute_class_weight(_y) else: self.class_weight_ = self.class_weight def _update(self, curr_x, g_sum, multiplier): if self.k0: self.w0_ -= self.eta * (multiplier + 2. * self.reg0 * self.w0_) if self.k1: for idx in range(curr_x.size): if curr_x[idx] != 0.0: self.w_[idx] -= self.eta * (multiplier * curr_x[idx] + self.regw * self.w_[idx]) for f in range(self.n_factors): for idx in range(curr_x.size): if curr_x[idx] != 0.0: grad = g_sum[f] * curr_x[idx] - self.V_[f, idx] * (curr_x[idx] * curr_x[idx]) self.V_[f, idx] -= self.eta * (multiplier * grad + self.regv * self.V_[f, idx]) def _predict_with_feedback(self, curr_x, g_sum, g_sum_sqr): result = 0. if self.k0: result += self.w0_ if self.k1: result += np.dot(self.w_, curr_x) for f in range(self.n_factors): # v = self.V_[f, :] # g_sum[f] = float(0.) # g_sum_sqr[f] = float(0.) # # for idx in range(curr_x.size): # d = v[idx] * curr_x[idx] # g_sum[f] += d # g_sum_sqr[f] += (d * d) # d = self.V_[f, :] * curr_x g_sum[f] = np.sum(d) g_sum_sqr[f] = np.dot(d, d) result += 0.5 * (g_sum[f] * g_sum[f] - g_sum_sqr[f]) return result def _predict(self, curr_x): result = 0. if self.k0: result += self.w0_ if self.k1: result += np.dot(self.w_, curr_x) for f in range(self.n_factors): d = self.V_[f, :] * curr_x g_sum = np.sum(d) g_sum_sqr = np.dot(d, d) result += 0.5 * (g_sum * g_sum - g_sum_sqr) return result def _train(self, X, y, n_samples, _): iter_idx = np.arange(n_samples) np.random.shuffle(iter_idx) g_sum = np.zeros(self.n_factors) g_sum_sqr = np.zeros(self.n_factors) for t, data_idx in enumerate(iter_idx): curr_x = X[data_idx, :] curr_y = y[data_idx] curr_y_adj = -1. if curr_y == 0. else 1. p = self._predict_with_feedback(curr_x, g_sum, g_sum_sqr) # TODO: multiplier can go out of control if the learning rate is too big, why? multiplier = -curr_y_adj * (1. - 1./(1. + exp(-curr_y_adj*p))) * self.class_weight_[curr_y] log_likelihood = logloss(p, curr_y) self.train_tracker_.track(log_likelihood) self._update(curr_x, g_sum, multiplier) def fit(self, X, y): if self.fit_flag_: self._clear_params() X, y = check_X_y(X, y) check_classification_targets(y) self.classes_ = unique_labels(y) self.X_ = X self.y_ = y # setup parameters n_samples, n_features = X.shape if self.w_ is None: self.w0_ = 0 self.w_ = np.zeros(n_features) self.V_ = np.random.normal(0, 0.001, (self.n_factors, n_features)) self._update_class_weight(X, y) self.train_tracker_.start_train() for n_epoch in range(self.epochs): self.train_tracker_.start_epoch(n_epoch) self._train(X, y, n_samples, n_features) self.train_tracker_.end_epoch() self.train_tracker_.end_train() self.fit_flag_ = True return self def predict(self, X): check_is_fitted(self, ['X_', 'y_']) X = check_array(X) n_samples = X.shape[0] y_test_predict = np.zeros(n_samples) g_sum = np.zeros(self.n_factors) g_sum_sqr = np.zeros(self.n_factors) for t in range(n_samples): p = sigmoid(self._predict_with_feedback(X[t, :], g_sum, g_sum_sqr)) y_test_predict[t] = 0. if p < 0.5 else 1. return y_test_predict def raw_predict(self, X): check_is_fitted(self, ['X_', 'y_']) X = check_array(X) n_samples = X.shape[0] y_test_predict = np.zeros(n_samples) g_sum = np.zeros(self.n_factors) g_sum_sqr = np.zeros(self.n_factors) for t in range(n_samples): y_test_predict[t] = sigmoid(self._predict_with_feedback(X[t, :], g_sum, g_sum_sqr)) return y_test_predict
class LogisticRegressionFTRL(BaseEstimator, ClassifierMixin): def __init__(self, alpha=0.05, beta=0.05, l1=.01, l2=.01, epochs=1, rate=50000, class_weight=None): self.alpha = alpha self.beta = beta self.l1 = l1 self.l2 = l2 self.epochs = epochs self.class_weight = class_weight self.classes_ = None self.X_ = None self.y_ = None self.z_ = None self.n_ = None self.fit_flag_ = False self.train_tracker_ = ClassificationTrainTracker(rate) def _clear_params(self): # All models parameters are set to their original value (see __init__ description) self.classes_ = None self.class_weight_ = None self.log_likelihood_ = 0 self.loss_ = [] self.target_ratio_ = 0. self.X_ = None self.y_ = None self.z_ = None self.n_ = None self.fit_flag_ = False def _update_class_weight(self, _X, _y): if self.class_weight is None: self.class_weight_ = compute_class_weight(_y) else: self.class_weight_ = self.class_weight def _update(self, y, p, x, w): d = (p - y) for idxi in range(len(x)): # for idxi, xi in enumerate(x): g = d * x[idxi] s = (sqrt(self.n_[idxi] + g * g) - sqrt(self.n_[idxi])) / self.alpha self.z_[idxi] += self.class_weight_[y] * (g - s * w[idxi]) self.n_[idxi] += self.class_weight_[y] * (g * g) def _get_w(self, idxi): if fabs(self.z_[idxi]) <= self.l1: return 0. else: sign = 1. if self.z_[idxi] >= 0 else -1. return - (self.z_[idxi] - sign * self.l1) / (self.l2 + (self.beta + sqrt(self.n_[idxi])) / self.alpha) def _train(self, X, y, n_samples, n_features): iter_idx = np.arange(n_samples) np.random.shuffle(iter_idx) for t, data_idx in enumerate(iter_idx): curr_x = X[data_idx, :] curr_y = y[data_idx] wtx = 0. curr_w = {} for idxi in range(n_features): curr_w[idxi] = self._get_w(idxi) wtx += (curr_w[idxi] * curr_x[idxi]) curr_p = sigmoid(wtx) log_likelihood = logloss(curr_p, curr_y) self.train_tracker_.track(log_likelihood) self._update(curr_y, curr_p, curr_x, curr_w) def fit(self, X, y): if self.fit_flag_: self._clear_params() X, y = check_X_y(X, y) check_classification_targets(y) self.classes_ = unique_labels(y) self.X_ = X self.y_ = y # setup parameters n_samples, n_features = X.shape if self.z_ is None and self.n_ is None: self.z_ = np.zeros(n_features) self.n_ = np.zeros(n_features) self._update_class_weight(X, y) self.train_tracker_.start_train() for epoch in range(self.epochs): self.train_tracker_.start_epoch(epoch) self._train(X, y, n_samples, n_features) self.train_tracker_.end_epoch() self.train_tracker_.end_train() self.fit_flag_ = True return self def predict(self, X): check_is_fitted(self, ['X_', 'y_']) X = check_array(X) n_samples, n_features = X.shape y_test_predict = np.zeros(n_samples) w = np.zeros(n_features) for idxi in range(n_features): w[idxi] = self._get_w(idxi) # print w for t in range(n_samples): x = X[t, :] wtx = np.dot(w, x) p = sigmoid(wtx) y_test_predict[t] = 0. if p < 0.5 else 1. return y_test_predict def raw_predict(self, X): check_is_fitted(self, ['X_', 'y_']) X = check_array(X) n_samples, n_features = X.shape w = np.zeros(n_features) for idxi in range(n_features): w[idxi] = self._get_w(idxi) y = np.dot(X, w) for idxi in range(y.size): y[idxi] = sigmoid(y[idxi]) return y