def test_l1_regularized_saga(): beta = 1e-3 pysaga = PySAGAClassifier(eta=1e-3, alpha=0.0, beta=beta, max_iter=10, penalty="l1", random_state=0) saga = SAGAClassifier(eta=1e-3, alpha=0.0, beta=beta, max_iter=10, penalty="l1", random_state=0) pysaga.fit(X_bin, y_bin) saga.fit(X_bin, y_bin) np.testing.assert_array_almost_equal(pysaga.coef_, saga.coef_)
def test_enet_regularized_saga(): X_sparse = sparse.rand(100, 50, density=.5, random_state=0).tocsr() y_sparse = np.random.randint(0, high=2, size=100) eta = 1e-3 for (X, y) in ((X_bin, y_bin), (X_sparse, y_sparse)): for alpha in np.logspace(-3, 0, 5): for beta in np.logspace(-3, 3, 5): pysaga = PySAGAClassifier(eta=eta, alpha=alpha, beta=beta, max_iter=5, penalty='l1', random_state=0) saga = SAGAClassifier(eta=eta, alpha=alpha, beta=beta, max_iter=5, penalty='l1', random_state=0, tol=1e-24) pysaga.fit(X, y) saga.fit(X, y) np.testing.assert_array_almost_equal(pysaga.coef_, saga.coef_)
def test_no_reg_saga(): # Using no regularisation at all pysaga = PySAGAClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10, penalty=None, random_state=0) saga = SAGAClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10, penalty=None, random_state=0) pysaga.fit(X_bin, y_bin) saga.fit(X_bin, y_bin) np.testing.assert_array_almost_equal(pysaga.coef_, saga.coef_)
def test_elastic_saga(): ab = [1e-5, 1e-2, 1e-1, 1.0] for alpha, beta in zip(ab, ab): pysaga = PySAGAClassifier(eta=1e-3, alpha=alpha, beta=beta, max_iter=1, penalty="l1", random_state=0) saga = SAGAClassifier(eta=1e-3, alpha=alpha, beta=beta, max_iter=1, penalty="l1", random_state=0, tol=0) pysaga.fit(X_bin, y_bin) saga.fit(X_bin, y_bin) np.testing.assert_array_almost_equal(pysaga.coef_, saga.coef_)
def test_l2_regularized_saga(): pysaga = PySAGAClassifier(eta=1e-3, alpha=1.0, max_iter=10, penalty=None, random_state=0) saga = SAGAClassifier(eta=1e-3, alpha=1.0, max_iter=10, penalty=None, random_state=0) pysaga.fit(X_bin, y_bin) saga.fit(X_bin, y_bin) np.testing.assert_array_almost_equal(pysaga.coef_, saga.coef_)
def test_saga_score(): X, y = make_classification(1000, random_state=0) pysaga = PySAGAClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=1, penalty=None, random_state=0) saga = SAGAClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=1, penalty=None, random_state=0) pysaga.fit(X, y) saga.fit(X, y) assert_equal(pysaga.score(X, y), saga.score(X, y))
def test_elastic_saga(): ab = [1e-5, 1e-2, 1e-1, 1.] for alpha, beta in zip(ab, ab): pysaga = PySAGAClassifier(eta=1e-3, alpha=alpha, beta=beta, max_iter=1, penalty='l1', random_state=0) saga = SAGAClassifier(eta=1e-3, alpha=alpha, beta=beta, max_iter=1, penalty='l1', random_state=0, tol=0) pysaga.fit(X_bin, y_bin) saga.fit(X_bin, y_bin) np.testing.assert_array_almost_equal(pysaga.coef_, saga.coef_)
def test_enet_regularized_saga(): X_sparse = sparse.rand(100, 50, density=.5, random_state=0).tocsr() y_sparse = np.random.randint(0, high=2, size=100) eta = 1e-3 for (X, y) in ((X_bin, y_bin), (X_sparse, y_sparse)): for alpha in np.logspace(-3, 0, 5): for beta in np.logspace(-3, 3, 5): pysaga = PySAGAClassifier( eta=eta, alpha=alpha, beta=beta, max_iter=5, penalty='l1', random_state=0) saga = SAGAClassifier( eta=eta, alpha=alpha, beta=beta, max_iter=5, penalty='l1', random_state=0, tol=1e-24) pysaga.fit(X, y) saga.fit(X, y) np.testing.assert_array_almost_equal(pysaga.coef_, saga.coef_)
def test_sag_sample_weights(): clf1 = SAGAClassifier(loss='log', max_iter=5, verbose=0, random_state=0) clf2 = SAGAClassifier(loss='log', max_iter=5, verbose=0, random_state=0) clf1.fit(X, y) sample_weights = [1] * y.size clf2.fit(X, y, sample_weight=sample_weights) np.testing.assert_array_equal(clf1.coef_.ravel(), clf2.coef_.ravel()) # same thing but for a regression object alpha = 1.0 clf1 = SAGARegressor(loss='squared', alpha=alpha, max_iter=5, random_state=0) clf1.fit(X, y) sample_weights = [2] * y.size # alpha needs to be multiplied accordingly clf2 = SAGARegressor(loss='squared', alpha=2 * alpha, max_iter=5, random_state=0) clf2.fit(X, y, sample_weight=sample_weights) np.testing.assert_array_equal(clf1.coef_.ravel(), clf2.coef_.ravel()) # # check that samples with a zero weight do not have an influence on the # resulting coefficients by adding noise to original samples X2 = np.concatenate((X, np.random.randn(*X.shape)), axis=0) # augment with noise y2 = np.concatenate((y, y), axis=0) sample_weights = np.ones(y2.size, dtype=float) sample_weights[X.shape[0]:] = 0. clf1 = SAGARegressor(loss='squared', alpha=alpha, max_iter=100, random_state=0, tol=1e-24) clf1.fit(X, y) clf2 = SAGARegressor(loss='squared', alpha=0.5 * alpha, max_iter=100, random_state=0, tol=1e-24) clf2.fit(X2, y2, sample_weight=sample_weights) np.testing.assert_array_almost_equal(clf1.coef_.ravel(), clf2.coef_.ravel(), decimal=6)
class BaselineStruct(BaseArgumentMixin): def __init__(self, alpha_link, alpha_prop, l1_ratio): self.alpha_link = alpha_link self.alpha_prop = alpha_prop self.l1_ratio = l1_ratio self.compat_features = False def initialize_labels(self, y_props_flat, y_links_flat): self.prop_encoder_ = LabelEncoder().fit(y_props_flat) self.link_encoder_ = LabelEncoder().fit(y_links_flat) self.n_prop_states = len(self.prop_encoder_.classes_) self.n_link_states = len(self.link_encoder_.classes_) def fit(self, X_link, y_link, X_prop, y_prop): self.initialize_labels(y_prop, y_link) y_link = self.link_encoder_.transform(y_link) y_prop = self.prop_encoder_.transform(y_prop) self.link_clf_ = SAGAClassifier(loss='smooth_hinge', penalty='l1', tol=1e-4, max_iter=500, random_state=0, verbose=0) self.prop_clf_ = clone(self.link_clf_) alpha_link = self.alpha_link * (1 - self.l1_ratio) beta_link = self.alpha_link * self.l1_ratio sw = compute_sample_weight('balanced', y_link) self.link_clf_.set_params(alpha=alpha_link, beta=beta_link) self.link_clf_.fit(X_link, y_link, sample_weight=sw) alpha_prop = self.alpha_prop * (1 - self.l1_ratio) beta_prop = self.alpha_prop * self.l1_ratio self.prop_clf_.set_params(alpha=alpha_prop, beta=beta_prop) self.prop_clf_.fit(X_prop, y_prop) return self def decision_function(self, X_link, X_prop, docs): link_offsets = np.cumsum([len(doc.features) for doc in docs]) y_link_flat = self.link_clf_.decision_function(X_link) y_link_marg = np.zeros( (len(y_link_flat), len(self.link_encoder_.classes_))) link_on, = self.link_encoder_.transform([True]) y_link_marg[:, link_on] = y_link_flat.ravel() Y_link = [ y_link_marg[start:end] for start, end in zip(np.append(0, link_offsets), link_offsets) ] prop_offsets = np.cumsum([len(doc.prop_features) for doc in docs]) y_prop_marg = self.prop_clf_.decision_function(X_prop) Y_prop = [ y_prop_marg[start:end] for start, end in zip(np.append(0, prop_offsets), prop_offsets) ] Y_pred = [] for y_link, y_prop in zip(Y_link, Y_prop): Y_pred.append(DocLabel(y_prop, y_link)) assert len(Y_pred) == len(docs) return Y_pred
def fit_single(solver, X, y, penalty='l2', single_target=True, C=1, max_iter=10, skip_slow=False): if skip_slow and solver == 'lightning' and penalty == 'l1': print('skip_slowping l1 logistic regression with solver lightning.') return print('Solving %s logistic regression with penalty %s, solver %s.' % ('binary' if single_target else 'multinomial', penalty, solver)) if solver == 'lightning': from lightning.classification import SAGAClassifier if single_target or solver not in ['sag', 'saga']: multi_class = 'ovr' else: multi_class = 'multinomial' X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y) n_samples = X_train.shape[0] n_classes = np.unique(y_train).shape[0] test_scores = [1] train_scores = [1] accuracies = [1 / n_classes] times = [0] if penalty == 'l2': alpha = 1. / (C * n_samples) beta = 0 lightning_penalty = None else: alpha = 0. beta = 1. / (C * n_samples) lightning_penalty = 'l1' for this_max_iter in range(1, max_iter + 1, 2): print('[%s, %s, %s] Max iter: %s' % ('binary' if single_target else 'multinomial', penalty, solver, this_max_iter)) if solver == 'lightning': lr = SAGAClassifier(loss='log', alpha=alpha, beta=beta, penalty=lightning_penalty, tol=-1, max_iter=this_max_iter) else: lr = LogisticRegression(solver=solver, multi_class=multi_class, C=C, penalty=penalty, fit_intercept=False, tol=1e-24, max_iter=this_max_iter, random_state=42, ) t0 = time.clock() lr.fit(X_train, y_train) train_time = time.clock() - t0 scores = [] for (X, y) in [(X_train, y_train), (X_test, y_test)]: try: y_pred = lr.predict_proba(X) except NotImplementedError: # Lightning predict_proba is not implemented for n_classes > 2 y_pred = _predict_proba(lr, X) score = log_loss(y, y_pred, normalize=False) / n_samples score += (0.5 * alpha * np.sum(lr.coef_ ** 2) + beta * np.sum(np.abs(lr.coef_))) scores.append(score) train_score, test_score = tuple(scores) y_pred = lr.predict(X_test) accuracy = np.sum(y_pred == y_test) / y_test.shape[0] test_scores.append(test_score) train_scores.append(train_score) accuracies.append(accuracy) times.append(train_time) return lr, times, train_scores, test_scores, accuracies
def fit_single( solver, X, y, penalty="l2", single_target=True, C=1, max_iter=10, skip_slow=False, dtype=np.float64, ): if skip_slow and solver == "lightning" and penalty == "l1": print("skip_slowping l1 logistic regression with solver lightning.") return print("Solving %s logistic regression with penalty %s, solver %s." % ("binary" if single_target else "multinomial", penalty, solver)) if solver == "lightning": from lightning.classification import SAGAClassifier if single_target or solver not in ["sag", "saga"]: multi_class = "ovr" else: multi_class = "multinomial" X = X.astype(dtype) y = y.astype(dtype) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y) n_samples = X_train.shape[0] n_classes = np.unique(y_train).shape[0] test_scores = [1] train_scores = [1] accuracies = [1 / n_classes] times = [0] if penalty == "l2": alpha = 1.0 / (C * n_samples) beta = 0 lightning_penalty = None else: alpha = 0.0 beta = 1.0 / (C * n_samples) lightning_penalty = "l1" for this_max_iter in range(1, max_iter + 1, 2): print("[%s, %s, %s] Max iter: %s" % ( "binary" if single_target else "multinomial", penalty, solver, this_max_iter, )) if solver == "lightning": lr = SAGAClassifier( loss="log", alpha=alpha, beta=beta, penalty=lightning_penalty, tol=-1, max_iter=this_max_iter, ) else: lr = LogisticRegression( solver=solver, multi_class=multi_class, C=C, penalty=penalty, fit_intercept=False, tol=0, max_iter=this_max_iter, random_state=42, ) # Makes cpu cache even for all fit calls X_train.max() t0 = time.clock() lr.fit(X_train, y_train) train_time = time.clock() - t0 scores = [] for (X, y) in [(X_train, y_train), (X_test, y_test)]: try: y_pred = lr.predict_proba(X) except NotImplementedError: # Lightning predict_proba is not implemented for n_classes > 2 y_pred = _predict_proba(lr, X) score = log_loss(y, y_pred, normalize=False) / n_samples score += 0.5 * alpha * np.sum(lr.coef_**2) + beta * np.sum( np.abs(lr.coef_)) scores.append(score) train_score, test_score = tuple(scores) y_pred = lr.predict(X_test) accuracy = np.sum(y_pred == y_test) / y_test.shape[0] test_scores.append(test_score) train_scores.append(train_score) accuracies.append(accuracy) times.append(train_time) return lr, times, train_scores, test_scores, accuracies
class BaselineStruct(BaseArgumentMixin): def __init__(self, alpha_link, alpha_prop, l1_ratio, exact_test=False): self.alpha_link = alpha_link self.alpha_prop = alpha_prop self.l1_ratio = l1_ratio self.compat_features = False self.exact_test = exact_test def initialize_labels(self, y_props_flat, y_links_flat): self.prop_encoder_ = LabelEncoder().fit(y_props_flat) self.link_encoder_ = LabelEncoder().fit(y_links_flat) self.n_prop_states = len(self.prop_encoder_.classes_) self.n_link_states = len(self.link_encoder_.classes_) def fit(self, X_link, y_link, X_prop, y_prop): self.initialize_labels(y_prop, y_link) y_link = self.link_encoder_.transform(y_link) y_prop = self.prop_encoder_.transform(y_prop) self.link_clf_ = SAGAClassifier(loss='smooth_hinge', penalty='l1', tol=1e-4, max_iter=500, random_state=0, verbose=0) self.prop_clf_ = clone(self.link_clf_) alpha_link = self.alpha_link * (1 - self.l1_ratio) beta_link = self.alpha_link * self.l1_ratio sw = compute_sample_weight('balanced', y_link) self.link_clf_.set_params(alpha=alpha_link, beta=beta_link) self.link_clf_.fit(X_link, y_link, sample_weight=sw) alpha_prop = self.alpha_prop * (1 - self.l1_ratio) beta_prop = self.alpha_prop * self.l1_ratio self.prop_clf_.set_params(alpha=alpha_prop, beta=beta_prop) self.prop_clf_.fit(X_prop, y_prop) return self def decision_function(self, X_link, X_prop, docs): link_offsets = np.cumsum([len(doc.features) for doc in docs]) y_link_flat = self.link_clf_.decision_function(X_link) y_link_marg = np.zeros( (len(y_link_flat), len(self.link_encoder_.classes_))) link_on, = self.link_encoder_.transform([True]) y_link_marg[:, link_on] = y_link_flat.ravel() Y_link = [ y_link_marg[start:end] for start, end in zip(np.append(0, link_offsets), link_offsets) ] prop_offsets = np.cumsum([len(doc.prop_features) for doc in docs]) y_prop_marg = self.prop_clf_.decision_function(X_prop) Y_prop = [ y_prop_marg[start:end] for start, end in zip(np.append(0, prop_offsets), prop_offsets) ] Y_pred = [] for y_link, y_prop in zip(Y_link, Y_prop): Y_pred.append(DocLabel(y_prop, y_link)) assert len(Y_pred) == len(docs) return Y_pred def fast_decode(self, Y_marg, docs, constraints): if constraints: Y_pred = [] zero_compat = np.zeros( (self.n_prop_states, self.n_prop_states, self.n_link_states)) for doc, y in zip(docs, Y_marg): potentials = (y.nodes, y.links, zero_compat, [], [], []) y_decoded, _ = self._inference(doc, potentials, relaxed=False, exact=self.exact_test, constraints=constraints) Y_pred.append(y_decoded) else: Y_pred = [ self._round(y.nodes, y.links, inverse_transform=True) for y in Y_marg ] return Y_pred def predict(self, X_link, X_prop, docs, constraints=""): Y_marg = self.decision_function(X_link, X_prop, docs) return self.fast_decode(Y_marg, docs, constraints)
def test_sag_sample_weights(): clf1 = SAGAClassifier(loss='log', max_iter=5, verbose=0, random_state=0) clf2 = SAGAClassifier(loss='log', max_iter=5, verbose=0, random_state=0) clf1.fit(X, y) sample_weights = [1] * y.size clf2.fit(X, y, sample_weight=sample_weights) np.testing.assert_array_equal(clf1.coef_.ravel(), clf2.coef_.ravel()) # same thing but for a regression object alpha = 1.0 clf1 = SAGARegressor(loss='squared', alpha=alpha, max_iter=5, random_state=0) clf1.fit(X, y) sample_weights = [2] * y.size # alpha needs to be multiplied accordingly clf2 = SAGARegressor(loss='squared', alpha=2 * alpha, max_iter=5, random_state=0) clf2.fit(X, y, sample_weight=sample_weights) np.testing.assert_array_equal(clf1.coef_.ravel(), clf2.coef_.ravel()) # # check that samples with a zero weight do not have an influence on the # resulting coefficients by adding noise to original samples X2 = np.concatenate((X, np.random.randn(*X.shape)), axis=0) # augment with noise y2 = np.concatenate((y, y), axis=0) sample_weights = np.ones(y2.size, dtype=np.float) sample_weights[X.shape[0]:] = 0. clf1 = SAGARegressor(loss='squared', alpha=alpha, max_iter=100, random_state=0, tol=1e-24) clf1.fit(X, y) clf2 = SAGARegressor(loss='squared', alpha=0.5*alpha, max_iter=100, random_state=0, tol=1e-24) clf2.fit(X2, y2, sample_weight=sample_weights) np.testing.assert_array_almost_equal(clf1.coef_.ravel(), clf2.coef_.ravel(), decimal=6)
def saga_cv(which, alphas, l1_ratio): if which == 'cdcp': n_folds = 3 path = os.path.join("data", "process", "erule", "folds", "{}", "{}") elif which == 'ukp': n_folds = 5 path = os.path.join("data", "process", "ukp-essays", "folds", "{}", "{}") else: raise ValueError clf_link = SAGAClassifier(loss='smooth_hinge', penalty='l1', tol=1e-4, max_iter=100, random_state=0, verbose=0) clf_prop = clone(clf_link) link_scores = np.zeros((n_folds, len(alphas))) prop_scores = np.zeros_like(link_scores) for k in range(n_folds): X_tr_link, y_tr_link = load_csr(path.format(k, 'train.npz'), return_y=True) X_te_link, y_te_link = load_csr(path.format(k, 'val.npz'), return_y=True) X_tr_prop, y_tr_prop = load_csr(path.format(k, 'prop-train.npz'), return_y=True) X_te_prop, y_te_prop = load_csr(path.format(k, 'prop-val.npz'), return_y=True) le = LabelEncoder() y_tr_prop_enc = le.fit_transform(y_tr_prop) y_te_prop_enc = le.transform(y_te_prop) link_sw = compute_sample_weight('balanced', y_tr_link) for j, alpha in enumerate(alphas): beta = alpha * l1_ratio alpha *= 1 - l1_ratio clf_link.set_params(alpha=alpha, beta=beta) clf_prop.set_params(alpha=alpha, beta=beta) clf_link.fit(X_tr_link, y_tr_link, sample_weight=link_sw) y_pred_link = clf_link.predict(X_te_link) clf_prop.fit(X_tr_prop, y_tr_prop_enc) y_pred_prop = clf_prop.predict(X_te_prop) with warnings.catch_warnings() as w: warnings.simplefilter('ignore') link_f = f1_score(y_te_link, y_pred_link, average='binary') prop_f = f1_score(y_te_prop_enc, y_pred_prop, average='macro') link_scores[k, j] = link_f prop_scores[k, j] = prop_f return link_scores, prop_scores