示例#1
0
def ridge():

    X_crime, y_crime = load_crime_dataset()
    scaler = MinMaxScaler()
    X_train, X_test, y_train, y_test = train_test_split(X_crime,
                                                        y_crime,
                                                        random_state=0)
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    linridge = Ridge(alpha=20.0).fit(X_train_scaled, y_train)
    print('Crime dataset')
    print('ridge regression linear model intercept: {}'.format(
        linridge.intercept_))
    print('ridge regression linear model coeff:\n{}'.format(linridge.coef_))
    print('R-squared score (training): {:.3f}'.format(
        linridge.score(X_train_scaled, y_train)))
    print('R-squared score (test): {:.3f}'.format(
        linridge.score(X_test_scaled, y_test)))
    print('Number of non-zero features: {}'.format(
        np.sum(linridge.coef_ != 0)))

    print('Ridge regression: effect of alpha regularization parameter\n')
    for this_alpha in [0, 1, 10, 20, 50, 100, 1000]:
        linridge = Ridge(alpha=this_alpha).fit(X_train_scaled, y_train)
        r2_train = linridge.score(X_train_scaled, y_train)
        r2_test = linridge.score(X_test_scaled, y_test)
        num_coeff_bigger = np.sum(abs(linridge.coef_) > 1.0)
        print(
            'Alpha = {:.2f}\nnum abs(coeff) > 1.0: {}, r-squared training: {:.2f}, r-squared test: {:.2f}'
            .format(this_alpha, num_coeff_bigger, r2_train, r2_test))
示例#2
0
def linear():

    X_R1, y_R1 = make_regression(n_samples=100,
                                 n_features=1,
                                 n_informative=1,
                                 bias=150.0,
                                 noise=30,
                                 random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X_R1,
                                                        y_R1,
                                                        random_state=0)
    linreg = LinearRegression().fit(X_train, y_train)
    print('linear model coeff (w): {}'.format(linreg.coef_))
    print('linear model intercept (b): {:.3f}'.format(linreg.intercept_))
    print('R-squared score (training): {:.3f}'.format(
        linreg.score(X_train, y_train)))
    print('R-squared score (test): {:.3f}'.format(linreg.score(X_test,
                                                               y_test)))

    plt.figure(figsize=(5, 4))
    plt.scatter(X_R1, y_R1, marker='o', s=50, alpha=0.8)
    plt.plot(X_R1, linreg.coef_ * X_R1 + linreg.intercept_, 'r-')
    plt.title('Least-squares linear regression')
    plt.xlabel('Feature value (x)')
    plt.ylabel('Target value (y)')
    plt.show()

    X_crime, y_crime = load_crime_dataset()
    X_train, X_test, y_train, y_test = train_test_split(X_crime,
                                                        y_crime,
                                                        random_state=0)
    linreg = LinearRegression().fit(X_train, y_train)
    print('Crime dataset')
    print('linear model intercept: {}'.format(linreg.intercept_))
    print('linear model coeff:\n{}'.format(linreg.coef_))
    print('R-squared score (training): {:.3f}'.format(
        linreg.score(X_train, y_train)))
    print('R-squared score (test): {:.3f}'.format(linreg.score(X_test,
                                                               y_test)))
示例#3
0
def lasso():

    X_crime, y_crime = load_crime_dataset()
    scaler = MinMaxScaler()
    X_train, X_test, y_train, y_test = train_test_split(X_crime,
                                                        y_crime,
                                                        random_state=0)
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    linlasso = Lasso(alpha=2.0, max_iter=10000).fit(X_train_scaled, y_train)
    print('Crime dataset')
    print('lasso regression linear model intercept: {}'.format(
        linlasso.intercept_))
    print('lasso regression linear model coeff:{}'.format(linlasso.coef_))
    print('Non-zero features: {}'.format(np.sum(linlasso.coef_ != 0)))
    print('R-squared score (training): {:.3f}'.format(
        linlasso.score(X_train_scaled, y_train)))
    print('R-squared score (test): {:.3f}'.format(
        linlasso.score(X_test_scaled, y_test)))
    print('Features with non-zero weight (sorted by absolute magnitude):')

    for e in sorted(list(zip(list(X_crime), linlasso.coef_)),
                    key=lambda e: -abs(e[1])):
        if e[1] != 0:
            print('\t{}, {:.3f}'.format(e[0], e[1]))

    print(
        'Lasso regression: effect of alpha regularization parameter on number of features kept in final model\n'
    )
    for alpha in [0.5, 1, 2, 3, 5, 10, 20, 50]:
        linlasso = Lasso(alpha, max_iter=10000).fit(X_train_scaled, y_train)
        r2_train = linlasso.score(X_train_scaled, y_train)
        r2_test = linlasso.score(X_test_scaled, y_test)

        print(
            'Alpha = {:.2f}\nFeatures kept: {}, r-squared training: {:.2f}, r-squared test: {:.2f}'
            .format(alpha, np.sum(linlasso.coef_ != 0), r2_train, r2_test))
示例#4
0
                        centers=8,
                        cluster_std=1.3,
                        random_state=4)
y_D2 = y_D2 % 2
plt.figure()
plt.title(
    'Sample binary classification problem with non-linearly separable classes')
plt.scatter(X_D2[:, 0], X_D2[:, 1], c=y_D2, marker='o', s=50, cmap=cmap_bold)
plt.show()

# Breast cancer dataset for classification
cancer = load_breast_cancer()
(X_cancer, y_cancer) = load_breast_cancer(return_X_y=True)

# Communities and Crime dataset
(X_crime, y_crime) = load_crime_dataset()

# ## K-Nearest Neighbors

# ### Classification

# In[ ]:

from adspy_shared_utilities import plot_two_class_knn

X_train, X_test, y_train, y_test = train_test_split(X_C2, y_C2, random_state=0)

plot_two_class_knn(X_train, y_train, 1, 'uniform', X_test, y_test)
plot_two_class_knn(X_train, y_train, 3, 'uniform', X_test, y_test)
plot_two_class_knn(X_train, y_train, 11, 'uniform', X_test, y_test)
示例#5
0
def naive_bayes():
    
    cmap_bold = ListedColormap(['#FFFF00', '#00FF00', '#0000FF','#000000'])
    fruits = pd.read_table('fruit_data_with_colors.txt')
    feature_names_fruits = ['height', 'width', 'mass', 'color_score']
    X_fruits = fruits[feature_names_fruits]
    y_fruits = fruits['fruit_label']
    target_names_fruits = ['apple', 'mandarin', 'orange', 'lemon']
    
    X_fruits_2d = fruits[['height', 'width']]
    y_fruits_2d = fruits['fruit_label']
    
    plt.figure()
    plt.title('Sample regression problem with one input variable')
    X_R1, y_R1 = make_regression(n_samples = 100, n_features=1,n_informative=1, bias = 150.0, noise = 30, random_state=0)
    plt.scatter(X_R1, y_R1, marker= 'o', s=50)
    plt.show()

    plt.figure()
    plt.title('Complex regression problem with one input variable')
    X_F1, y_F1 = make_friedman1(n_samples = 100, n_features = 7, random_state=0)

    plt.scatter(X_F1[:, 2], y_F1, marker= 'o', s=50)
    plt.show()

    plt.figure()
    plt.title('Sample binary classification problem with two informative features')
    X_C2, y_C2 = make_classification(n_samples = 100, n_features=2, n_redundant=0, n_informative=2, n_clusters_per_class=1, flip_y = 0.1, class_sep = 0.5, random_state=0)
    plt.scatter(X_C2[:, 0], X_C2[:, 1], marker= 'o', c=y_C2, s=50, cmap=cmap_bold)
    plt.show()
    
    X_train, X_test, y_train, y_test = train_test_split(X_C2, y_C2, random_state=0)
    nbclf = GaussianNB().fit(X_train, y_train)
    plot_class_regions_for_classifier(nbclf, X_train, y_train, X_test, y_test, 'Gaussian Naive Bayes classifier: Dataset 1')

    # more difficult synthetic dataset for classification (binary)
    # with classes that are not linearly separable
    X_D2, y_D2 = make_blobs(n_samples = 100, n_features = 2, centers = 8, cluster_std = 1.3, random_state = 4)
    y_D2 = y_D2 % 2
    plt.figure()
    plt.title('Sample binary classification problem with non-linearly separable classes')
    plt.scatter(X_D2[:,0], X_D2[:,1], c=y_D2, marker= 'o', s=50, cmap=cmap_bold)
    plt.show()
    
    X_train, X_test, y_train, y_test = train_test_split(X_D2, y_D2, random_state=0)
    nbclf = GaussianNB().fit(X_train, y_train)
    plot_class_regions_for_classifier(nbclf, X_train, y_train, X_test, y_test, 'Gaussian Naive Bayes classifier: Dataset 2')

    # Breast cancer dataset for classification
    cancer = load_breast_cancer()
    (X_cancer, y_cancer) = load_breast_cancer(return_X_y = True)
    nbclf = GaussianNB().fit(X_train, y_train)
    print('Breast cancer dataset')
    print('Accuracy of GaussianNB classifier on training set: {:.2f}' .format(nbclf.score(X_train, y_train)))

    print('Accuracy of GaussianNB classifier on test set: {:.2f}' .format(nbclf.score(X_test, y_test)))
    # Communities and Crime dataset
    (X_crime, y_crime) = load_crime_dataset()
    print('Crime dataset')
    print('Accuracy of GaussianNB classifier on training set: {:.2f}' .format(nbclf.score(X_train, y_train)))
    print('Accuracy of GaussianNB classifier on test set: {:.2f}' .format(nbclf.score(X_test, y_test)))