from sklearn.preprocessing import StandardScaler from sklearn.tree import DecisionTreeClassifier from utils.graph import plot_decision_regions iris = datasets.load_iris() X = iris.data[:, [2, 3]] y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) tree = DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=0) tree.fit(X_train_std, y_train) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(X=X_combined_std, y=y_combined, classifier=tree, test_idx=range(105, 150)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]') plt.legend(loc='upper left') plt.show()
import numpy as np from models.Perceptron import Perceptron from utils.graph import plot_decision_regions df = pd.read_csv( 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None) y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', -1, 1) X = df.iloc[0:100, [0, 2]].values # plt.scatter(X[:50, 0], X[:50, 1], color='red', marker='o', label='setosa') # plt.scatter(X[50:100, 0], X[50:100, 1], color='blue', marker='x', label='versicolor') # plt.xlabel('sepal length') # plt.ylabel('petal length') # plt.legend(loc='upper left') # plt.show() ppn = Perceptron() ppn.fit(X, y) plt.plot(range(1, len(ppn._errors) + 1), ppn._errors, marker='o') plt.xlabel('Epochs') plt.ylabel('Number of misclassifications') plt.show() plot_decision_regions(X, y, classifier=ppn) plt.xlabel('sepal length [cm]') plt.ylabel('petal length [cm]') plt.legend(loc='upper left') plt.show()
y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', -1, 1) X = df.iloc[0:100, [0, 2]].values # fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(8,4)) # ada1 = AdalineGD(n_iter=10, eta=0.01).fit(X, y) # ax[0].plot(range(1, len(ada1._cost) + 1), np.log10(ada1._cost), marker='o') # ax[0].set_xlabel('Epochs') # ax[0].set_ylabel('log(Sum-squared-error)') # ax[0].set_title('Adaline - learning_rate 0.01') # ada2 = AdalineGD(n_iter=10, eta=0.0001).fit(X, y) # ax[1].plot(range(1, len(ada2._cost) + 1), ada2._cost, marker='o') # ax[1].set_xlabel('Epochs') # ax[1].set_ylabel('Sum-squared-error') # ax[1].set_title('Adaline - Learning_rate 0.0001') # plt.show() X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() ada = AdalineGD(n_iter=15, eta=0.01).fit(X_std, y) plot_decision_regions(X_std, y, classifier=ada) plt.title('Adaline - Gradient Descent') plt.xlabel('sepal length [standardized]') plt.ylabel('petal length [standardized]') plt.legend(loc='upper left') plt.show() plt.plot(range(1, len(ada._cost) + 1), ada._cost, marker='o') plt.xlabel('Epochs') plt.ylabel('Sum-squared-error') plt.show()
n = dataset.X_std_train[dataset.y_train == i+1, :].shape[0] mean_vec = mean_vec.reshape(DIMENSION, 1) S_B += n * (mean_vec - mean_overall).dot((mean_vec - mean_overall).T) print(S_B.shape) eigen_vals, eigen_vecs = np.linalg.eig(np.linalg.inv(S_W).dot(S_B)) eigen_pairs = [(np.abs(eigen_vals[i]), eigen_vecs[:, i]) for i in range(len(eigen_vals))] eigen_pairs = sorted(eigen_pairs, key=lambda k: k[0], reverse=True) # the first two linear discriminats (eigenvectors) capture the most class-discrimitatory information tot = sum(eigen_vals.real) discr = [(i / tot) for i in sorted(eigen_vals.real, reverse=True)] cum_discr = np.cumsum(discr) plt.bar(range(1, 14), discr, alpha=0.5, align='center', label='individual "discriminability"') plt.step(range(1, 14), cum_discr, where='mid', label='cumulative "discriminability') plt.ylabel('"discriminability" ratio') plt.xlabel('Linear Discriminants') plt.ylim([-0.1, 1.1]) plt.legend(loc='best') plt.show() w = np.hstack((eigen_pairs[0][1][:, np.newaxis].real, eigen_pairs[1][1][:, np.newaxis].real)) X_lda_train = dataset.X_std_train.dot(w) lr = LogisticRegression() lr.fit(X_lda_train, dataset.y_train) plot_decision_regions(X=X_lda_train, y=dataset.y_train, classifier=lr) plt.xlabel('ld1') plt.ylabel('ld2') plt.legend(loc='lower left') plt.show()
import numpy as np import matplotlib.pyplot as plt from sklearn.svm import SVC from utils.graph import plot_decision_regions np.random.seed(0) X_xor = np.random.randn(200, 2) y_xor = np.logical_xor(X_xor[:, 0] > 0, X_xor[:, 1] > 0) y_xor = np.where(y_xor, 1, -1) plt.scatter(X_xor[y_xor == 1, 0], X_xor[y_xor == 1, 1], c='b', marker='x', label='1') plt.scatter(X_xor[y_xor == -1, 0], X_xor[y_xor == -1, 1], c='r', marker='s', label='-1') plt.ylim(-3, 3) plt.legend() plt.show() svm = SVC(kernel='rbf', random_state=0, gamma=0.10, C=10.0) svm.fit(X_xor, y_xor) plot_decision_regions(X_xor, y_xor, classifier=svm) plt.legend(loc='upper left') plt.show()