def test_weight(): # Test class weights X_, y_ = make_classification(n_samples=200, n_features=100, weights=[0.833, 0.167], random_state=0) X_ = sparse.csr_matrix(X_) for clf in (linear_model.LogisticRegression(), svm.LinearSVC(random_state=0), svm.SVC()): clf.set_params(class_weight={0: 5}) clf.fit(X_[:180], y_[:180]) y_pred = clf.predict(X_[180:]) assert np.sum(y_pred == y_[180:]) >= 11
def test_weight(): # Test class weights clf = svm.SVC(class_weight={1: 0.1}) # we give a small weights to class 1 clf.fit(X, Y) # so all predicted values belong to class 2 assert_array_almost_equal(clf.predict(X), [2] * 6) X_, y_ = make_classification(n_samples=200, n_features=10, weights=[0.833, 0.167], random_state=2) for clf in (linear_model.LogisticRegression(), svm.LinearSVC(random_state=0), svm.SVC()): clf.set_params(class_weight={0: .1, 1: 10}) clf.fit(X_[:100], y_[:100]) y_pred = clf.predict(X_[100:]) assert f1_score(y_[100:], y_pred) > .3
X = X[y != 2] y = y[y != 2] X /= X.max() # Normalize X to speed-up convergence # ############################################################################# # Demo path functions cs = l1_min_c(X, y, loss='log') * np.logspace(0, 7, 16) print("Computing regularization path ...") start = time() clf = linear_model.LogisticRegression(penalty='l1', solver='saga', tol=1e-6, max_iter=int(1e6), warm_start=True) coefs_ = [] for c in cs: clf.set_params(C=c) clf.fit(X, y) coefs_.append(clf.coef_.ravel().copy()) print("This took %0.3fs" % (time() - start)) coefs_ = np.array(coefs_) plt.plot(np.log10(cs), coefs_, marker='o') ymin, ymax = plt.ylim() plt.xlabel('log(C)') plt.ylabel('Coefficients') plt.title('Logistic Regression Path')
from mrex import linear_model from scipy.special import expit # General a toy dataset:s it's just a straight line with some Gaussian noise: xmin, xmax = -5, 5 n_samples = 100 np.random.seed(0) X = np.random.normal(size=n_samples) y = (X > 0).astype(np.float) X[X > 0] *= 4 X += .3 * np.random.normal(size=n_samples) X = X[:, np.newaxis] # Fit the classifier clf = linear_model.LogisticRegression(C=1e5) clf.fit(X, y) # and plot the result plt.figure(1, figsize=(4, 3)) plt.clf() plt.scatter(X.ravel(), y, color='black', zorder=20) X_test = np.linspace(-5, 10, 300) loss = expit(X_test * clf.coef_ + clf.intercept_).ravel() plt.plot(X_test, loss, color='red', linewidth=3) ols = linear_model.LinearRegression() ols.fit(X, y) plt.plot(X_test, ols.coef_ * X_test + ols.intercept_, linewidth=1) plt.axhline(.5, color='.5')
Digits Classification Exercise ================================ A tutorial exercise regarding the use of classification techniques on the Digits dataset. This exercise is used in the :ref:`clf_tut` part of the :ref:`supervised_learning_tut` section of the :ref:`stat_learn_tut_index`. """ print(__doc__) from mrex import datasets, neighbors, linear_model X_digits, y_digits = datasets.load_digits(return_X_y=True) X_digits = X_digits / X_digits.max() n_samples = len(X_digits) X_train = X_digits[:int(.9 * n_samples)] y_train = y_digits[:int(.9 * n_samples)] X_test = X_digits[int(.9 * n_samples):] y_test = y_digits[int(.9 * n_samples):] knn = neighbors.KNeighborsClassifier() logistic = linear_model.LogisticRegression(max_iter=1000) print('KNN score: %f' % knn.fit(X_train, y_train).score(X_test, y_test)) print('LogisticRegression score: %f' % logistic.fit(X_train, y_train).score(X_test, y_test))
return X, Y # Load Data X, y = datasets.load_digits(return_X_y=True) X = np.asarray(X, 'float32') X, Y = nudge_dataset(X, y) X = (X - np.min(X, 0)) / (np.max(X, 0) + 0.0001) # 0-1 scaling X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) # Models we will use logistic = linear_model.LogisticRegression(solver='newton-cg', tol=1) rbm = BernoulliRBM(random_state=0, verbose=True) rbm_features_classifier = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)]) # ############################################################################# # Training # Hyper-parameters. These were set by cross-validation, # using a GridSearchCV. Here we are not performing cross-validation to # save time. rbm.learning_rate = 0.06 rbm.n_iter = 10 # More components tend to give better prediction performance, but larger # fitting time