X_test_lda = X_test_std.dot(w) lr = LogisticRegression() lr = lr.fit(X_train_lda, y_train) y_hat = lr.predict(X_test_lda) f1 = f1_score(y_test, y_hat, average='micro') print('f1 score (LDA) =', "%.2f" % f1) colors = ['r', 'b', 'g'] markers = ['s', 'x', 'o'] for l, c, m in zip(np.unique(y_train), colors, markers): plt.scatter(X_train_lda[y_train == l, 0], X_train_lda[y_train == l, 1], c=c, label=l, marker=m) plt.xlabel('LD 1') plt.ylabel('LD 2') plt.legend(loc='lower right') plt.tight_layout() plt.show() # Plotting plot_decision_regions(X_test_lda, y_test, classifier=lr) plt.xlabel('LD 1') plt.ylabel('LD 2') plt.legend(loc='lower right') plt.tight_layout() plt.show()
plt.xlabel('Epochs') plt.ylabel('Sum-squared-error') plt.title('GD Adaline - Learning rate 0.0001') plt.show() # standardize features X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() # Training a adaptive linear model on the Iris dataset ada = AdalineGD(n_iter=10, eta=0.01) ada.fit(X_std, y) plt.plot(range(1, len(ada.cost_) + 1), ada.cost_, marker='o') plt.xlabel('Epochs') plt.ylabel('Sum-squared-error') plt.title('GD Adaline - Learning rate 0.01 with standardization') plt.tight_layout() plt.show() # Plotting decision regions plot_decision_regions(X_std, y, classifier=ada) plt.title('Adaline - Gradient Descent') plt.xlabel('sepal length [standardized]') plt.ylabel('petal length [standardized]') plt.legend(loc='upper left') plt.tight_layout() plt.show()
from sklearn.model_selection import train_test_split # Loading the Iris dataset from scikit-learn iris = datasets.load_iris() X = iris.data[:, [2, 3]] y = iris.target print('Class labels:', np.unique(y)) # Splitting data into 70% training and 30% test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y) from sklearn.tree import DecisionTreeClassifier tree = DecisionTreeClassifier(criterion='gini', max_depth=7, random_state=1) tree.fit(X_train, y_train) X_combined = np.vstack((X_train, X_test)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(X_combined, y_combined, classifier=tree, test_idx=range(105, 150)) plt.xlabel('petal length [cm]') plt.ylabel('petal width [cm]') plt.legend(loc='upper left') plt.tight_layout() plt.show()
## Standardized sc = StandardScaler() sc.fit(X) X_std = sc.transform(X) ## classifier MLP from sklearn.neural_network import MLPClassifier mlp = MLPClassifier(hidden_layer_sizes=(50), max_iter=10, alpha=1e-4, solver='sgd', verbose=False, tol=1e-4, random_state=1, learning_rate_init=0.1) mlp.fit(X_std, y) print('MLP') print("Training set score: %f" % mlp.score(X_std, y)) plot_decision_regions( X=X_std, y=y, classifier=mlp, ) plt.xlabel('x1') plt.ylabel('x2') plt.legend(loc='upper left') plt.tight_layout() plt.show()
X_kpca_test = kpca.transform(X_test) plt.scatter(X_kpca_train[y_train == 0, 0], X_kpca_train[y_train == 0, 1], color='red', marker='^', alpha=0.5) plt.scatter(X_kpca_train[y_train == 1, 0], X_kpca_train[y_train == 1, 1], color='blue', marker='o', alpha=0.5) plt.xlabel('KPC 1') plt.ylabel('KPC 2') plt.tight_layout() plt.show() ## Using the result of kernel PCA to classifier lr = LogisticRegression() lr = lr.fit(X_kpca_train, y_train) y_hat = lr.predict(X_kpca_test) f1 = f1_score(y_test, y_hat, average='micro') print('f1 score =', "%.2f" % f1) plot_decision_regions(X_kpca_test, y_test, classifier=lr) plt.xlabel('KPCA 1') plt.ylabel('KPCA 2') plt.legend(loc='lower left') plt.tight_layout() plt.show()
# Testing y_pred = ppn.predict(X_test_std) print('Misclassified samples: %d' % (y_test != y_pred).sum()) print('Accuracy: %.2f' % accuracy_score(y_test, y_pred)) print('Precision: %.2f' % precision_score(y_test, y_pred)) print('Recall: %.2f' % recall_score(y_test, y_pred)) print('F1-score: %.2f' % f1_score(y_test, y_pred)) confmat = confusion_matrix(y_test, y_pred) # Plot confusion matrix fig, ax = plt.subplots(figsize=(2.5, 2.5)) ax.matshow(confmat, cmap=plt.cm.Blues, alpha=0.3) for i in range(confmat.shape[0]): for j in range(confmat.shape[1]): ax.text(x=j, y=i, s=confmat[i, j], va='center', ha='center') plt.xlabel('predicted label') plt.ylabel('true label') plt.show() # Plot samples and classifications X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(X=X_combined_std, y=y_combined, classifier=ppn, test_idx=range(70, 100)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]') plt.legend(loc='upper left') plt.tight_layout() plt.show()
# change str to float X = X.astype(np.float) # standardize features X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() lrgd = LogisticRegressionGD(n_iter=10, eta=0.1, random_state=1) lrgd.fit(X_std, y) plt.plot(range(1, len(lrgd.cost_) + 1), lrgd.cost_, marker='o') plt.xlabel('Epochs') plt.ylabel('Cost') plt.title('LogisticRegressionGD - Learning rate 0.1 with standardization') plt.tight_layout() plt.show() print('final cost=', lrgd.cost_[-1]) plot_decision_regions(X_std, y, classifier=lrgd) plt.title('LogisticRegressionGD - Gradient Descent') plt.xlabel('sepal length [standardized]') plt.ylabel('petal length [standardized]') plt.legend(loc='upper left') plt.tight_layout() plt.show()
X = X.astype(np.float) # Plotting the Iris data plt.scatter(X[:50, 0], X[:50, 1], color='red', marker='o', label='setosa') plt.scatter(X[50:100, 0], X[50:100, 1], color='blue', marker='x', label='versicolor') plt.xlabel('sepal length [cm]') plt.ylabel('petal length [cm]') plt.legend(loc='upper left') plt.show() # Training pahsea perceptron model on the Iris dataset max_inter = 10 ppn = Perceptron(eta=0.01, n_iter=max_inter) ppn.fit(X, y) # Plotting decision regions plot_decision_regions(X, y, classifier=ppn) plt.xlabel('sepal length [cm]') plt.ylabel('petal length [cm]') plt.legend(loc='upper left') plt.show() plt.plot(range(1, len(ppn.errors_) + 1), ppn.errors_, marker='o') plt.xlabel('Epochs') plt.ylabel('Number of updates') plt.show()