示例#1
0
X_test_lda = X_test_std.dot(w)

lr = LogisticRegression()
lr = lr.fit(X_train_lda, y_train)
y_hat = lr.predict(X_test_lda)
f1 = f1_score(y_test, y_hat, average='micro')
print('f1 score (LDA) =', "%.2f" % f1)

colors = ['r', 'b', 'g']
markers = ['s', 'x', 'o']

for l, c, m in zip(np.unique(y_train), colors, markers):
    plt.scatter(X_train_lda[y_train == l, 0],
                X_train_lda[y_train == l, 1],
                c=c,
                label=l,
                marker=m)

plt.xlabel('LD 1')
plt.ylabel('LD 2')
plt.legend(loc='lower right')
plt.tight_layout()
plt.show()

# Plotting
plot_decision_regions(X_test_lda, y_test, classifier=lr)
plt.xlabel('LD 1')
plt.ylabel('LD 2')
plt.legend(loc='lower right')
plt.tight_layout()
plt.show()
plt.xlabel('Epochs')
plt.ylabel('Sum-squared-error')
plt.title('GD Adaline - Learning rate 0.0001')
plt.show()

# standardize features
X_std = np.copy(X)
X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std()
X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std()

# Training a adaptive linear model on the Iris dataset
ada = AdalineGD(n_iter=10, eta=0.01)
ada.fit(X_std, y)

plt.plot(range(1, len(ada.cost_) + 1), ada.cost_, marker='o')
plt.xlabel('Epochs')
plt.ylabel('Sum-squared-error')
plt.title('GD Adaline - Learning rate 0.01 with standardization')
plt.tight_layout()
plt.show()

# Plotting decision regions
plot_decision_regions(X_std, y, classifier=ada)
plt.title('Adaline - Gradient Descent')
plt.xlabel('sepal length [standardized]')
plt.ylabel('petal length [standardized]')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()

示例#3
0
from sklearn.model_selection import train_test_split

# Loading the Iris dataset from scikit-learn
iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target
print('Class labels:', np.unique(y))

# Splitting data into 70% training and 30% test data
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=1,
                                                    stratify=y)

from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier(criterion='gini', max_depth=7, random_state=1)
tree.fit(X_train, y_train)

X_combined = np.vstack((X_train, X_test))
y_combined = np.hstack((y_train, y_test))
plot_decision_regions(X_combined,
                      y_combined,
                      classifier=tree,
                      test_idx=range(105, 150))
plt.xlabel('petal length [cm]')
plt.ylabel('petal width [cm]')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()
## Standardized
sc = StandardScaler()
sc.fit(X)
X_std = sc.transform(X)

## classifier MLP
from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier(hidden_layer_sizes=(50),
                    max_iter=10,
                    alpha=1e-4,
                    solver='sgd',
                    verbose=False,
                    tol=1e-4,
                    random_state=1,
                    learning_rate_init=0.1)

mlp.fit(X_std, y)
print('MLP')
print("Training set score: %f" % mlp.score(X_std, y))
plot_decision_regions(
    X=X_std,
    y=y,
    classifier=mlp,
)
plt.xlabel('x1')
plt.ylabel('x2')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()
X_kpca_test = kpca.transform(X_test)

plt.scatter(X_kpca_train[y_train == 0, 0],
            X_kpca_train[y_train == 0, 1],
            color='red',
            marker='^',
            alpha=0.5)
plt.scatter(X_kpca_train[y_train == 1, 0],
            X_kpca_train[y_train == 1, 1],
            color='blue',
            marker='o',
            alpha=0.5)
plt.xlabel('KPC 1')
plt.ylabel('KPC 2')
plt.tight_layout()
plt.show()

## Using the result of kernel PCA to classifier
lr = LogisticRegression()
lr = lr.fit(X_kpca_train, y_train)
y_hat = lr.predict(X_kpca_test)
f1 = f1_score(y_test, y_hat, average='micro')
print('f1 score =', "%.2f" % f1)

plot_decision_regions(X_kpca_test, y_test, classifier=lr)
plt.xlabel('KPCA 1')
plt.ylabel('KPCA 2')
plt.legend(loc='lower left')
plt.tight_layout()
plt.show()
# Testing
y_pred = ppn.predict(X_test_std)
print('Misclassified samples: %d' % (y_test != y_pred).sum())
print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))
print('Precision: %.2f' % precision_score(y_test, y_pred))
print('Recall: %.2f' % recall_score(y_test, y_pred))
print('F1-score: %.2f' % f1_score(y_test, y_pred))
confmat = confusion_matrix(y_test, y_pred)

# Plot confusion matrix
fig, ax = plt.subplots(figsize=(2.5, 2.5))
ax.matshow(confmat, cmap=plt.cm.Blues, alpha=0.3)
for i in range(confmat.shape[0]):
    for j in range(confmat.shape[1]):
        ax.text(x=j, y=i, s=confmat[i, j], va='center', ha='center')
plt.xlabel('predicted label')
plt.ylabel('true label')
plt.show()

# Plot samples and classifications
X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))
plot_decision_regions(X=X_combined_std,
                      y=y_combined,
                      classifier=ppn,
                      test_idx=range(70, 100))
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()
# change str to float
X = X.astype(np.float)


# standardize features
X_std = np.copy(X)
X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std()
X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std()



lrgd = LogisticRegressionGD(n_iter=10, eta=0.1, random_state=1)
lrgd.fit(X_std, y)

plt.plot(range(1, len(lrgd.cost_) + 1), lrgd.cost_, marker='o')
plt.xlabel('Epochs')
plt.ylabel('Cost')
plt.title('LogisticRegressionGD - Learning rate 0.1 with standardization')
plt.tight_layout()
plt.show()

print('final cost=', lrgd.cost_[-1])

plot_decision_regions(X_std, y, classifier=lrgd)
plt.title('LogisticRegressionGD - Gradient Descent')
plt.xlabel('sepal length [standardized]')
plt.ylabel('petal length [standardized]')
plt.legend(loc='upper left')

plt.tight_layout()
plt.show()
X = X.astype(np.float)

# Plotting the Iris data
plt.scatter(X[:50, 0], X[:50, 1], color='red', marker='o', label='setosa')
plt.scatter(X[50:100, 0],
            X[50:100, 1],
            color='blue',
            marker='x',
            label='versicolor')

plt.xlabel('sepal length [cm]')
plt.ylabel('petal length [cm]')
plt.legend(loc='upper left')
plt.show()

# Training pahsea perceptron model on the Iris dataset
max_inter = 10
ppn = Perceptron(eta=0.01, n_iter=max_inter)
ppn.fit(X, y)

# Plotting decision regions
plot_decision_regions(X, y, classifier=ppn)
plt.xlabel('sepal length [cm]')
plt.ylabel('petal length [cm]')
plt.legend(loc='upper left')
plt.show()

plt.plot(range(1, len(ppn.errors_) + 1), ppn.errors_, marker='o')
plt.xlabel('Epochs')
plt.ylabel('Number of updates')
plt.show()