Пример #1
0
from sklearn.decomposition import PCA
pca = PCA(n_components=2)  # Ici on n'en garder que 2
pca.fit(X_scaled)
#Ou bien  pca = PCA(n_components=2).fit(X_scaled)

X_pca = pca.transform(X_scaled)
print("Original shape {}".format(str(X_scaled.shape)))
print("PCA shape {}".format(str(X_pca.shape)))
#In [70]: run UNS_PCA_cancer.py
#Original shape (569, 30)
#PCA shape (569, 2) meaning that we only kept 2 features

#plt first vs second components
if verbose == True:
    plt.figure(figsize=(8, 8))
    dsets.discrete_scatter(X_pca[:, 0], X_pca[:, 1], cancer.target)
    plt.legend(cancer.target_names, loc="best")
    plt.gca().set_aspect("equal")
    plt.xlabel("First Principal component")
    plt.ylabel("Second Principal component")

print("PCA components:\n {}".format(pca.components_))

## Heat map
plt.matshow(pca.components_, cmap='viridis')
plt.yticks([0, 1], ['First component', 'Second component'])
plt.colorbar()
plt.xticks(xrange(len(cancer.feature_names)),
           cancer.feature_names,
           rotation=60,
           ha='left')
Пример #2
0
#Output
#Predicted probabilities:
#[[ 0.01573626  0.98426374]
# [ 0.84335828  0.15664172]
# [ 0.98112869  0.01887131]
# [ 0.97407199  0.02592801]
# [ 0.01352142  0.98647858]
# [ 0.02504637  0.97495363]]
# Nous permet de savoir quelles sont les prédictions qui sont plus ou moins sûres
# Toutefois il faut faire attention au fait que dans un modèle qui OverF, les predictions pourront plus souvent être des faux positifs ou faux négatifs
#Il faut alors calibrer notre modèle. 
# On regardera alors sur un graphe si la prédiction correspond aux proba

fig, axes = plt.subplots(1,2,figsize=(13,5))

dsets.plot_2d_separator(gbrt, X, ax=axes[0], fill=True, alpha=.4, cm=dsets.cm2)
score_images = dsets.plot_2d_scores(gbrt, X, ax=axes[1], cm=dsets.ReBl, function='predict_proba')

for ax in axes:
    dsets.discrete_scatter(X_test[:,0], X_test[:,1], y_test, markers='^', ax=ax)
    dsets.discrete_scatter(X_train[:,0], X_train[:,1], y_train, markers='o', ax=ax)
    
    ax.set_xlabel("Feature 0")
    ax.set_ylabel("Feature 1")
cbar = plt.colorbar(score_images, ax=axes.tolist())
axes[0].legend(["Test Class 0", "Test Class 1", "Train Class 0", "Test Class 1"],ncol=4, loc=(.1,1.1))




Пример #3
0
from sklearn.ensemble import GradientBoostingClassifier as GBC

import datasets_mglearn as dsets
dsets = reload(dsets)

iris = load_iris()

X, y = iris.data, iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

gbrt = GBC(max_depth=3, learning_rate=0.01, random_state=0).fit(X_train, y_train)

print("Train Accuracy prediction: {}".format(gbrt.score(X_train, y_train)))
print("Test Accuracy prediction: {}".format(gbrt.score(X_test, y_test)))

print("Prediction Probabilities to detect flase positives or true negatives: \n{}".format(gbrt.predict_proba(X_test)[:6,:]))
print("Sum of these probabilities should be equal to one, each time: \n{}".format(gbrt.predict_proba(X_test)[:6].sum(axis=1)))

print("We compare y_pred that we know with y_test predicted by the ML: \n{}".format(y_test == gbrt.predict(X_test)))

print("{}".format({k:v for k,v in zip (["False","True"], np.bincount(y_test == gbrt.predict(X_test)))}))

dsets.discrete_scatter(X[:,0], X[:,1], y, markers=['o','^', 'v'])
plt.legend(["{}".format(iris.feature_names[0]), "{}".format(iris.feature_names[1]),"{}".format(iris.feature_names[2])], loc=(0.1,1.1), ncol=4)

plt.ion()
dsets.plot_feature_importances(gbrt, iris)

 
Пример #4
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm

import datasets_mglearn as dsets

dsets = reload(dsets)

plt.ion()
#generate dataset
X, y = dsets.make_forge()

plt.figure("Blolbs-Classification example")
dsets.discrete_scatter(X[:, 0], X[:, 1], y)
plt.legend(["Class 0", "Class 1"], loc=4)
plt.xlabel("First Feature")
plt.ylabel("Second feature")
print "X.shape: {}".format(X.shape)

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

from sklearn.neighbors import KNeighborsClassifier

clf = KNeighborsClassifier(n_neighbors=3)

clf.fit(X_train, y_train)
print("Test set prediction: {}".format(clf.predict(X_test)))
Пример #5
0
plt.ion()

from sklearn.neural_network import MLPClassifier
from sklearn.datasets import make_moons

import datasets_mglearn as dsets
dsets = reload(dsets)

X,y = make_moons(n_samples=100, noise=0.25, random_state=3)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

mlp = MLPClassifier(solver='lbfgs', random_state=0, hidden_layer_sizes=[10,10]).fit(X_train, y_train)
plt.figure("[10,10]")
dsets.plot_2d_separator(mlp, X_train, fill=True, alpha=.3)
dsets.discrete_scatter(X_train[:,0], X_train[:,1], y_train)
plt.xlabel("Feature 0")
plt.ylabel("Feature 1")

print("hidden_layer_sizes : [10] : 1 hidden avec 10 nœuds \n \t \t [10,10] : 2 hidden 10 nœuds chacune")

mlp = MLPClassifier(solver='lbfgs', random_state=0, hidden_layer_sizes=[10,10,10]).fit(X_train, y_train)
plt.figure("{}".format(mlp.__getattribute__("hidden_layer_sizes"    )))
dsets.plot_2d_separator(mlp, X_train, fill=True, alpha=.3)
dsets.discrete_scatter(X_train[:,0], X_train[:,1], y_train)
plt.xlabel("Feature 0")
plt.ylabel("Feature 1")


mlp = MLPClassifier(solver='lbfgs', activation='tanh',random_state=0, hidden_layer_sizes=[10,10,10]).fit(X_train, y_train)
plt.figure("{}, {}".format(mlp.__getattribute__("hidden_layer_sizes"), mlp.__getattribute__("activation")))