def test_scikit_perceptron(): iris = datasets.load_iris() X = iris.data[:, [2, 3]] y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train) StandardScaler(copy=True, with_mean=True, with_std=True) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) # ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0) ppn = Perceptron(n_iter_no_change=40, eta0=0.1, random_state=0) ppn.fit(X_train_std, y_train) y_pred = ppn.predict(X_test_std) print('Misclassified samples: %d' % (y_test != y_pred).sum()) print('Accurancy: %.2f' % accuracy_score(y_test, y_pred)) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(X=X_combined_std, y=y_combined, classifier=ppn, test_idx=range(105, 150)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]') plt.legend(loc='upper left') plt.show()
def test_adaline_sgd(): df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None) df.tail() y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', -1, 1) X = df.iloc[0:100, [0, 2]].values X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() ada = AdalineSGD(n_iter=15, eta=0.01, random_state=1) ada.fit(X_std, y) plot_decision_regions(X_std, y, classifier=ada) plt.title('Adaline - Stochastic Gradient Descent') plt.xlabel('sepal lenght [standardized]') plt.ylabel('petal lenght [standardized]') plt.legend(loc='upper left') plt.show() plt.plot(range(1, len(ada.cost_) + 1), ada.cost_, marker='o') plt.xlabel('Epochs') plt.ylabel('Average cost') plt.show()
def test_perceptron(): df = pd.read_csv( 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None) df.tail() y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', -1, 1) X = df.iloc[0:100, [0, 2]].values plt.scatter(X[:50, 0], X[:50, 1], color='red', marker='o', label='setosa') plt.scatter(X[50:100, 0], X[50:100, 1], color='blue', marker='x', label='versicolor') plt.xlabel('sepal length') plt.ylabel('petal length') plt.legend(loc='upper left') plt.show() # plt.savefig("mygraph.png") ppn = Perceptron(eta=0.1, n_iter=10) ppn.fit(X, y) plt.plot(range(1, len(ppn.errors_) + 1), ppn.errors_, marker='o') plt.xlabel('Epochs') plt.ylabel('Number of misclassifications') plt.show() plot_decision_regions(X, y, classifier=ppn) plt.xlabel('sepal lenght [cm]') plt.ylabel('petal lenght [cm]') plt.legend(loc='upper left') plt.show()
def testTraining(): df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None) df.tail() # 1~100行目の目的変数を抽出 y = df.iloc[0:100, 4].values # Iris-setosaを-1, Iris-virginicaを1に変換 y = np.where(y == 'Iris-setosa', -1, 1) # 1~10行目の1,3列目を抽出 X = df.iloc[0:100, [0, 2]].values #setosaのプロット plt.scatter(X[:50,0], X[:50,1], color='red', marker = 'o', label = 'setosa') plt.scatter(X[50:100,0], X[50:100,1], color='blue', marker = 'x', label = 'versicolor') plt.xlabel('sepal length [cm]') plt.ylabel('petal length [cm]') plt.legend(loc = 'upper left') plt.figure(0) ppn = Perceptron(eta =0.1, n_iter=10) ppn.fit(X, y) plt.plot(range(1, len(ppn.errors_) + 1), ppn.errors_, marker = 'o') plt.xlabel('Epochs') plt.ylabel('Number of misclassifications') plt.figure(1) pdr.plot_decision_regions(X, y, classifier = ppn) plt.show()
def learn_xor(classifier, data_num=200, title='Result'): x_xor, y_xor = read_xor(data_num) classifier.fit(x_xor, y_xor) plot_decision_regions(x_xor, y_xor, classifier=classifier) plt.title(title) plt.legend(loc='upper left') plt.show()
def plot_decision_regions_iris(X, y, classifier, test_idx=None, resolution=0.02): plot_decision_regions(X, y, classifier=classifier, test_idx=test_idx, resolution=resolution) plt.xlabel('sepal length[cm]') plt.ylabel('petal length[cm]') plt.legend(loc='upper left') plt.show()
def plotTraining(): X, y = sdt.setSample_IrisSV() X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() ada = AdalineGD(n_iter=15, eta=0.01) ada.fit(X_std, y) pdr.plot_decision_regions(X_std, y, classifier=ada) plt.title('Adaline - Gradient Descent') plt.xlabel('sepal length [standardized]') plt.ylabel('petal length [standardized]') plt.legend(loc='upper left') plt.show() plt.plot(range(1, len(ada.cost_) + 1), ada.cost_, marker='o') plt.xlabel('Epochs') plt.ylabel('Sum-squared-error') plt.show()
def RunSkMethod(s='ppn'): isTree = False if s == 'ppn': method = Perceptron(n_iter=40, eta0=0.1, random_state=0, shuffle=True) elif s == 'lr': method = LogisticRegression(C=100.0, random_state=0) elif s == 'svc': method = SVC(kernel='linear', C=1.0, random_state=0) elif s == 'svm': method = SVC(kernel='rbf', random_state=0, gamma=float(args[2]), C=float(args[3])) elif s == 'tree': method = DTC(criterion='entropy', max_depth=3, random_state=0) isTree = True elif s == 'forest': method = RFC(criterion='entropy', n_estimators=10, random_state=1, n_jobs=2) elif s == 'knn': method = KNC(n_neighbors=5, p=2, metric='minkowski') elif s == 'pca': method = PCA(n_components=2) return dd = ir.IrisDataSets() dd.useFit(method) pdr.plot_decision_regions(X=dd.X_combined_std, y=dd.y_combined, classifier=method, test_idx=range(105, 150)) dd.drawGraph() if s == 'lr': print(method.predict_proba(dd.X_test_std[0, :].reshape(1, -1))) # after this function, execute following command on terminal # dot -Tpng tree.dot -o tree.png if isTree == True: export_graphviz(method, out_file='tree.dot', feature_names=['petal length', 'petal width'])
def plotTraining(): X, y = sdt.setSample_IrisSV() X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() ada = AdalineSGD(n_iter=15, eta=0.01, random_state=1) ada.fit(X_std, y) pdr.plot_decision_regions(X_std, y, classifier=ada) plt.title('Adaline - Stochastic Gradient Descent') plt.xlabel('sepal length [standardized]') plt.ylabel('petal length [standardized]') plt.legend(loc='upper left') plt.tight_layout() plt.figure(0) plt.plot(range(1, len(ada.cost_) + 1), ada.cost_, marker='o') plt.xlabel('Epochs') plt.ylabel('Average Cost') plt.figure(1) plt.show()
def test_adaline_gd(): df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None) df.tail() y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', -1, 1) X = df.iloc[0:100, [0, 2]].values fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(8, 4)) ada1 = AdalineGD(n_iter=10, eta=0.01).fit(X, y) ax[0].plot(range(1, len(ada1.cost_) + 1), np.log10(ada1.cost_), marker='o') ax[0].set_xlabel('Epochs') ax[0].set_ylabel('log(Sum-squared-error)') ax[0].set_title('Adaline - Learning rate 0.01') ada2 = AdalineGD(n_iter=10, eta=0.0001).fit(X, y) ax[1].plot(range(1, len(ada2.cost_) + 1), ada2.cost_, marker='o') ax[1].set_xlabel('Epochs') ax[1].set_ylabel('Sum-squared-error') ax[1].set_title('Adaline - Learning rate 0.0001') plt.show() X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() ada = AdalineGD(n_iter=15, eta=0.01) ada.fit(X_std, y) plot_decision_regions(X_std, y, classifier=ada) plt.title('Adaline - Gradient Descent') plt.xlabel('sepal lenght [standardized]') plt.ylabel('petal lenght [standardized]') plt.legend(loc='upper left') plt.show() plt.plot(range(1, len(ada.cost_) + 1), ada.cost_, marker='o') plt.xlabel('Epochs') plt.ylabel('Sum-squared-error') plt.show()
from AdalineGD import AdalineGD import matplotlib.pyplot as plt import numpy as np from SampleSetReader import ReadIris from plot_decision_regions import plot_decision_regions X, y = ReadIris() X_std = np.copy(X) X_std[:,0] = (X[:,0] - X[:,0].mean()) / X[:,0].std() X_std[:,1] = (X[:,1] - X[:,1].mean()) / X[:,1].std() ada = AdalineGD(n_iter=15, eta=0.01) ada.fit(X_std, y) plot_decision_regions(X_std,y,classifier=ada) plt.title('Adaline - Gradient Descent') plt.xlabel('sepal length[standardized]') plt.ylabel('petal length[standardized]') plt.legend(loc='upper left') plt.show() plt.plot(range(1,len(ada.cost_) + 1), ada.cost_, marker='o') plt.xlabel('Epochs') plt.ylabel("Sum-squared-error") plt.show()
import pandas as pd import matplotlib.pyplot as plt import numpy as np import perceptron import plot_decision_regions as pdr df = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data", header=None) y = df.iloc[0:100, 4].values y = np.where(y == "Iris-setosa", -1, 1) X = df.iloc[0:100, [0, 2]].values ppn = perceptron.Perceptron(eta=0.1, n_iter=10) ppn.fit(X, y) pdr.plot_decision_regions(X, y, classifier=ppn) plt.xlabel("sepal length [cm]") plt.ylabel("petal length [cm]") plt.legend(loc="upper left") plt.show()
X = iris.data[:, [2, 3]] y = iris.target from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y) import matplotlib.pyplot as plt from plot_decision_regions import plot_decision_regions from sklearn.tree import DecisionTreeClassifier tree = DecisionTreeClassifier(criterion='gini', max_depth=4, random_state=1) tree.fit(X_train, y_train) X_combined = np.vstack((X_train, X_test)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(X_combined, y_combined, classifier=tree, test_idx=range(105, 150)) plt.xlabel('petal length [cm]') plt.ylabel('petal width [cm]') plt.legend(loc='upper left') plt.tight_layout() plt.show()
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA import plot_decision_regions as pdr import wine_data from sklearn.linear_model import LogisticRegression import matplotlib.pyplot as plt w = wine_data.WineDataSets() lda = LDA(n_components = 2) X_train_lda = lda.fit_transform(w.X_train_std, w.y_train) lr = LogisticRegression() lr = lr.fit(X_train_lda, w.y_train) pdr.plot_decision_regions(X_train_lda, w.y_train, classifier = lr) plt.xlabel('LD 1') plt.ylabel('LD 2') plt.legend(loc = 'lower left') plt.show() X_test_lda = lda.transform(w.X_test_std) pdr.plot_decision_regions(X_test_lda, w.y_test, classifier = lr) plt.xlabel('LD 1') plt.ylabel('LD 2') plt.legend(loc = 'lower left') plt.show()
from sklearn.datasets import load_iris from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score import numpy as np from plot_decision_regions import plot_decision_regions iris = load_iris() X,y = iris.data[:,[0,2]], iris.target # two features only # X,y = iris.data[:,:], iris.target # all features X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1) rf = RandomForestClassifier(n_estimators=40, criterion="gini") rf.fit(X_train, y_train) predictions = rf.predict(X_test) X_combined = np.vstack((X_train, X_test)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(X_combined, y_combined, rf, np.array(iris.feature_names)[[2,3]], test_idx=range(90,150), fileName="rf.png") accuracy = accuracy_score(y_test, predictions) print("Accuracy: {:.2f}".format(accuracy))
import matplotlib.pyplot as plt import AdalineSGD as ppn import plot_decision_regions as pdr df=pd.read_csv('iris.data',header=None) y = df.iloc[0:100,4].values y = np.where(y == 'Iris-versicolor' ,-1,1) X = df.iloc[0:100,[0,2]].values # get first 100 rows and col0 and col2 X_std = np.copy(X) X_std[:,0] = (X[:,0] - X[:,0].mean())/X[:,0].std() X_std[:,1] = (X[:,1] - X[:,1].mean())/X[:,1].std() ppn3 = ppn.AdalineSGD(0.01 , 15, True, 1) ppn3.fit(X_std,y) pdr.plot_decision_regions(X_std, y, classifier=ppn3) plt.xlabel('sepal length [standardized]') plt.ylabel('petal length [standardized]') plt.title('Adaline - Stochastic Gradient Descent') plt.legend(loc="upper left") plt.show() plt.plot(range(1,len(ppn3.cost_)+1),ppn3.cost_,marker='o') plt.xlabel('Epochs') plt.ylabel('Average Cost') plt.show()
df = pd.read_csv( 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None) df.tail() y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', -1, 1) X = df.iloc[0:100, [0, 2]].values plt.scatter(X[:50, 0], X[:50, 1], color='red', marker='o', label='setosa') plt.scatter(X[50:100, 0], X[50:100, 1], color='blue', marker='x', label='versicolor') plt.xlabel('sepal length') plt.ylabel('petal length') plt.legend(loc='upper left') #plt.show() ppn = Perceptron(eta=0.1, n_iter=10) ppn.fit(X, y) plt.plot(range(1, len(ppn.errors_) + 1), ppn.errors_, marker='o') plt.xlabel('Epochs') plt.ylabel('Number of misclassifications') #plt.show() plot_decision_regions(X, y, classifier=ppn) plt.xlabel('sepal length [cm]') plt.ylabel('petal length [cm]') plt.legend(loc='upper left') plt.show()
eigen_pairs = [ (np.abs(eigen_vals[i], eigetn_vecs[:, i]) for i in range(len(eigen_vals)))] eigen_pairs = sorted(eigen_pairs, key = lambda k: k[0], reverse = True) print('Eigenvalues in decreasing order:\n') for ev in eigen_pairs: print ev[0] ''' # LDA in sklearn lda = LDA(n_components = 2) X_train_lda = lda.fit_transform(X_train_std, y_train) lr = LogisticRegression() lr = lr.fit(X_train_lda, y_train) plot_decision_regions(X_train_lda, y_train, classifier = lr) plt.xlabel('LD 1') plt.ylabel('LD 2') plt.legend(loc = 'lower left') plt.show() # On test set: X_test_lda = lda.transform(X_test_std) plot_decision_regions(X_test_lda, y_test, classifier = lr) plt.xlabel('LD 1') plt.ylabel('LD 2') plt.legend(loc = 'lower left') plt.show()
plt.scatter(X_train_pca[:, 0], X_train_pca[:, 1]) plt.xlabel('PC 1') plt.ylabel('PC 2') plt.show() # %%----------------------------------------------------------------------- # Apply logistic regression after PCA lr = LogisticRegression() lr = lr.fit(X_train_pca, y_train) # %%----------------------------------------------------------------------- # Plot decision regions on train data pp.plot_decision_regions(X_train_pca, y_train, classifier=lr) plt.xlabel('PC 1') plt.ylabel('PC 2') plt.legend(loc='lower left') plt.tight_layout() plt.show() # %%----------------------------------------------------------------------- # Plot decision regions on test data pp.plot_decision_regions(X_test_pca, y_test, classifier=lr) plt.xlabel('PC 1') plt.ylabel('PC 2') plt.legend(loc='lower left') plt.tight_layout()
### 006 calculate the classification accuracy from sklearn.metrics import accuracy_score print('Accuracy: %.2f' % accuracy_score(y_test, y_pred)) ### 007 plot decision regions from plot_decision_regions import plot_decision_regions import matplotlib.pyplot as plt X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(X=X_combined_std, y=y_combined, classifier=ppn, test_idx=range(105, 150)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]') plt.legend(loc='upper left') plt.show() ### 008 plot the sigmoid function def sigmoid(z): return 1.0 / (1.0 + np.exp(-z)) z = np.arange(-7, 7, 0.1) phi_z = sigmoid(z) plt.plot(z, phi_z) plt.axvline(0.0, color='k') plt.axhspan(0.0, 1.0, facecolor='1.0', edgecolor='0.0', alpha=1.0, ls='dotted')
@author: liuya """ from sklearn.ensemble import RandomForestClassifier from sklearn_datasets import getIris, combinedTrainTest from plot_decision_regions import plot_decision_regions import matplotlib.pyplot as plt X_train, X_test, y_train, y_test = getIris() X_combined, y_combined, testidx_start, testidx_end = combinedTrainTest( X_train, y_train, X_test, y_test) # criterion 不存度衡量标准 forest = RandomForestClassifier( criterion='entropy', n_estimators=10, # 10 棵决策树 random_state=1, n_jobs=20) # 所需处理器内核的数量 forest.fit(X_train, y_train) plot_decision_regions(X_combined, y_combined, classifier=forest, test_idx=range(testidx_start, testidx_end)) plt.xlabel('petal length[cm]') plt.ylabel('petal width[cm]') plt.legend(loc='upper left') plt.show()
@author: surya """ from sklearn import datasets import numpy as np from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split import LogisticRegression import plot_decision_regions iris = datasets.load_iris() X = iris.data[:, [2, 3]] y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) X_train_01_subset = X_train[(y_train == 0) | (y_train == 1)] y_train_01_subset = y_train[(y_train == 0) | (y_train == 1)] lrgd = LogisticRegression.LogisticRegressionGD(eta=0.01, n_iter=1000, random_state=1) lrgd.fit(X_train_01_subset, y_train_01_subset) plot_decision_regions.plot_decision_regions(X=X_train_01_subset, y=y_train_01_subset, classifier=lrgd)
random_state=0) #对数据记录的特征字段进行标准化处理(转化为标准正态分布)··· from sklearn.preprocessing import StandardScaler sc = StandardScaler() sc.fit(x_train) x_train_std = sc.transform(x_train) x_test_std = sc.transform(x_test) #K近邻算法对记录字段数据的数量级很敏感因此需要标准化处理···· #K紧邻算法实现······ from sklearn.neighbors import KNeighborsClassifier knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski') knn.fit(x_train_std, y_train) pdr.plot_decision_regions(x_train_std, y_train, classifier=knn, test_idx=range(105, 105)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]') plt.legend(loc='upper left') plt.show() from sklearn.metrics import accuracy_score print(' k近邻算法分类') print('Accuracy:%.2f' % accuracy_score(y_test, knn.predict(x_test_std))) print('Misclassified sample:%d' % (y_test != knn.predict(x_test_std)).sum())
import sys, os sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../')) from plot_decision_regions import plot_decision_regions iris = datasets.load_iris() X = iris.data[:, [2, 3]] y = iris.target (X_train, X_test, y_train, y_test) = train_test_split( X, y, test_size = 0.3, random_state = 0) sc = StandardScaler() # Scale data sc.fit(X_train) # Use same to transform both train and test sets X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) svm = SVC(kernel = 'linear', C = 1.0, random_state = 0) svm.fit(X_train_std, y_train) plot_decision_regions( X_combined_std, y_combined, classifier = svm, test_idx = range(105, 150)) plt.xlabel('Petal length (Z)') plt.ylabel('Petal width (Z)') plt.legend(loc = 'upper left') plt.show()
X_xor = np.random.randn(200, 2) y_xor = np.logical_xor(X_xor[:, 0] > 0, X_xor[:, 1] > 0) y_xor = np.where(y_xor, 1, -1) plt.scatter(X_xor[y_xor == 1, 0], X_xor[y_xor == 1, 1], c='b', marker='x', label='1') plt.scatter(X_xor[y_xor == -1, 0], X_xor[y_xor == -1, 1], c='r', marker='s', label='-1') plt.xlim([-3, 3]) plt.ylim([-3, 3]) plt.legend(loc='best') plt.tight_layout() plt.show() # Using the kernel trick to find separating hyperplanes in higher dimensional space svm = SVC(kernel='rbf', random_state=1, gamma=0.10, C=10.0) svm.fit(X_xor, y_xor) plot_decision_regions(X_xor, y_xor, classifier=svm) plt.legend(loc='upper left') plt.tight_layout() plt.show()
# Training logistic regression classifier using the first 2 principal components. import matplotlib.pyplot as plt from sklearn.decomposition import PCA from sklearn.linear_model import LogisticRegression from std_wine_data import X_train_std, y_train, X_test_std from plot_decision_regions import plot_decision_regions pca = PCA(n_components=2) X_train_pca = pca.fit_transform(X_train_std) X_test_pca = pca.transform(X_test_std) lr = LogisticRegression() lr = lr.fit(X_train_pca, y_train) plot_decision_regions(X_train_pca, y_train, classifier=lr) plt.xlabel('PC 1') plt.ylabel('PC 2') plt.legend(loc='lower left') plt.tight_layout() # plt.savefig('images/05_04.png', dpi=300) plt.show()
X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) #----------------------------------------------------------------------------- from sklearn.linear_model import LogisticRegression import plot_decision_regions as pp import matplotlib.pyplot as plt from sklearn.neighbors import KNeighborsClassifier knn = KNeighborsClassifier(n_neighbors=1, p=2, metric='minkowski') knn.fit(X_train_std, y_train) pp.plot_decision_regions(X_combined_std, y_combined, classifier=knn, test_idx=range(105, 150)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]') plt.show() # print(knn.score(X_test_std,y_test)) from sklearn.neighbors import NearestNeighbors knn2 = NearestNeighbors(n_neighbors=3) knn2.fit(X_train_std) #print sum(sum((knn2.kneighbors(X_train_std)[0]))) print knn2.kneighbors(X_train_std)[0] ''' def loss_function(x,k):
y = iris.target # train test split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=0) # scale covariates sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) # fit support vector machine svm = SVC(kernel='linear', C=1, random_state=0) svm.fit(X_train_std, y_train) # plot svm decision regions X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(X=X_combined_std, y=y_combined, classifier=svm, test_idx=range(105, 150)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]') plt.legend(loc='upper left') plt.show()
from sklearn.neighbors import KNeighborsClassifier import iris_sk as iris import matplotlib.pyplot as plt import plot_decision_regions as pdr knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski') knn.fit(iris.X_train_std, iris.y_train) pdr.plot_decision_regions(iris.X_combined_std, iris.y_combined, classifier=knn, test_idx=range(105, 150)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]') plt.show()
import matplotlib.pyplot as plt from sklearn.decomposition import PCA from sklearn.linear_model import LogisticRegression from std_wine_data import X_train_std, y_train, X_test_std, y_test from plot_decision_regions import plot_decision_regions pca = PCA(n_components=2) X_train_pca = pca.fit_transform(X_train_std) X_test_pca = pca.transform(X_test_std) lr = LogisticRegression() lr = lr.fit(X_train_pca, y_train) plot_decision_regions(X_test_pca, y_test, classifier=lr) plt.xlabel('PC 1') plt.ylabel('PC 2') plt.legend(loc='lower left') plt.tight_layout() # plt.savefig('images/05_05.png', dpi=300) plt.show()
#对数据记录的特征字段进行标准化处理(转化为标准正态分布)··· from sklearn.preprocessing import StandardScaler sc = StandardScaler() sc.fit(x_train) x_train_std = sc.transform(x_train) x_test_std = sc.transform(x_test) #逻辑斯谛回归分类及错误率分析···· from sklearn.linear_model import LogisticRegression import plot_decision_regions as pdr import matplotlib.pyplot as plt x_combined_std = np.vstack((x_train_std, x_test_std)) y_combined_std = np.hstack((y_train, y_test)) lr = LogisticRegression(C=1000.0, random_state=0) lr_modle = lr.fit(x_train_std, y_train) print(lr.coef_[0], lr.coef_[1], lr.coef_[2]) print(lr_modle) test_idx = range(105, 105) pdr.plot_decision_regions(x_combined_std, y_combined_std, classifier=lr, test_idx=range(105, 105)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]') plt.legend(loc='upper left') plt.show() from sklearn.metrics import accuracy_score print('Accuracy:%.2f' % accuracy_score(y_test, lr.predict(x_test_std))) print('Misclassified sample:%d' % (y_test != lr.predict(x_test_std)).sum())
x_xor[y_xor == -1, 1], c='r', marker='s', label='-1') plt.ylim(-3.0) plt.legend() plt.show() #svm核函数非线性拟合,创建分离超平面···· from sklearn.svm import SVC import plot_decision_regions as pdr svm_hook = SVC(kernel='rbf', random_state=0, gamma=0.1, C=10.0) svm_hook.fit(x_xor, y_xor) pdr.plot_decision_regions(x_xor, y_xor, classifier=svm_hook) plt.legend(loc='upper left') plt.show() from sklearn.metrics import accuracy_score print('svm核函数非线性拟合分类结果评估:') print('Accuracy:%.2f' % accuracy_score(y_xor, svm_hook.predict(x_xor))) print('Misclassified sample:%d' % (y_xor != svm_hook.predict(x_xor)).sum()) print(svm_hook.predict([[1, -1], [5, 5]])) ######################################################################## #svm核函数线性拟合,创建分离超平面···· from sklearn.svm import SVC import plot_decision_regions as pdr svm_linear = SVC(kernel='linear', random_state=0, gamma=0.1, C=10.0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0, stratify=y) stdsc = StandardScaler() X_train_std = stdsc.fit_transform(X_train) X_test_std = stdsc.transform(X_test) lda = LDA(n_components=2) X_train_lda = lda.fit_transform(X_train_std, y_train) lr = LogisticRegression() lr = lr.fit(X_train_lda, y_train) plot_decision_regions(X=X_train_lda, y=y_train, classifier=lr) plt.xlabel("LD 1") plt.ylabel("LD 2") plt.legend(loc="best") plt.pause(5) plt.close() # Let us take a look at the results on the test set: X_test_lda = lda.transform(X_test_std) plot_decision_regions(X_test_lda, y_test, classifier=lr) plt.xlabel("LD 1") plt.ylabel("LD 2") plt.legend(loc="best") plt.pause(5) plt.close() # As we see, the logistic regression classifier is able to get a perfect