#for more info please visit:https://github.com/PhenixI/machine-learning/tree/master/17-RandomForest #load dataset from sklearn import datasets import numpy as np iris = datasets.load_iris() X= iris.data[:,[2,3]] y = iris.target #split the dataset into separate training and test datasets from sklearn.cross_validation import train_test_split X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3,random_state = 0) #random forest from sklearn.ensemble import RandomForestClassifier forest = RandomForestClassifier(criterion='entropy',n_estimators = 10,random_state = 1,n_jobs = 2) forest.fit(X_train,y_train) X_combined = np.vstack((X_train,X_test)) y_combined = np.hstack((y_train,y_test)) import DecisionBoundary DecisionBoundary.plot_decision_regions(X_combined,y_combined,classifier=forest,test_idx = range(105,150)) plt.xlabel('petal length [cm]') plt.ylabel('petal width [cm]') plt.legend(loc='upper left') plt.show()
import os os.chdir('E:\\machine-learning\\6-Perceptron and Neural Networks\\Perceptron_python') import pandas as pd df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',header = None) import matplotlib.pyplot as plt import numpy as np y = df.iloc[0:100,4].values y= np.where(y=='Iris-setosa',-1,1) X = df.iloc[0:100,[0,2]].values import Perceptron ppn = Perceptron.Perceptron(eta=0.1,n_iter=10) ppn.fit(X,y) import DecisionBoundary as DB DB.plot_decision_regions(X,y,classifier=ppn) #standardization X_std = np.copy(X) X_std[:,0] = (X[:,0]-X[:,0].mean())/X[:,0].std(); X_std[:,1] = (X[:,1]-X[:,1].mean())/X[:,0].std(); #training AdalineSGD ada = AdalineSGD.AdalineSGD(n_iter = 15,eta=0.01,random_state=1) ada.fit(X_std,y)
#coding:utf-8 """ Description: 决策树及其可视化 """ from sklearn.tree import DecisionTreeClassifier import DecisionBoundary as db from sklearn import datasets from sklearn.tree import export_graphviz if __name__ == '__main__': #path = '/Users/fannian/Downloads/' path = '/home/fannian/downloads/' iris = datasets.load_iris() X = iris.data[:, [2, 3]] y = iris.target param_grid = [{'clf__max_depth': [2, 3, 4]}] tree = DecisionTreeClassifier() f = db.fitScore(X=X, y=y, model=tree, param_grid=param_grid) f.getScore() db.plot_decision_regions(X=f.X_combined_std,y=f.y_combined,classifier=f.linear,\ test_idx=range(105,150),path_file = path + 'decision_figure.png') #export_graphviz 会检测模型是否为决策树,如果为其他模型,则会报错,固使用网格搜索需要重新训练 tre = DecisionTreeClassifier( max_depth=f.linear.best_params_['clf__max_depth']) tre.fit(X, y) export_graphviz(tre,out_file=path +'tree.dot',\ feature_names=['petal length','petal width'])
X_test_std = sc.transform(X_test) # Mose alogrithms in scikit-learn support muliclass classification by default via the One-Vs-Rest method from sklearn.linear_model import Perceptron ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0) ppn.fit(X_train_std, y_train) y_pred = ppn.predict(X_test_std) print("Misclssified samples: %d" % (y_test != y_pred).sum()) # metric performance from sklearn.metrics import accuracy_score print("Accuracy: %.2f " % accuracy_score(y_test, y_pred)) # draw decision surface X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) import DecisionBoundary DecisionBoundary.plot_decision_regions(X=X_combined_std, y=y_combined, classifier=ppn, test_idx=range(105, 150)) import matplotlib.pyplot as plt plt.xlabel("petal length [standardized]") plt.ylabel("petal width [standardized]") plt.legend(loc="upper left") plt.show()
X,y = df_wine.iloc[:,1:].values,df_wine.iloc[:,0].values X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state = 0) from sklearn.preprocessing import StandardScaler stdsc = StandardScaler() X_train_std = stdsc.fit_transform(X_train) X_test_std = stdsc.transform(X_test) #1. lda from sklearn.lda import LDA lda = LDA(n_components = 2) X_train_lda = lda.fit_transform(X_train_std,y_train) #using logistic regression classifier handles the lower-dimensional training dataset from sklearn.linear_model import LogisticRegression lr = LogisticRegression() lr = lr.fit(X_train_lda,y_train) import DecisionBoundary DecisionBoundary.plot_decision_regions(X_train_lda,y_train,classifier=lr) plt.xlabel('LD 1') plt.ylabel('LD 2') plt.legend(loc='lower left') plt.show() #use it on the test set X_test_lda = lda.transform(X_test_std) DecisionBoundary.plot_decision_regions(X_test_lda,y_test,classifier=lr) plt.xlabel('LD 1') plt.ylabel('LD 2') plt.legend(loc='lower left') plt.show()
#import matplotlib.pyplot as plt #plt.scatter(X_xor[y_xor==1, 0], X_xor[y_xor==1, 1],c='b', marker='x', label='1') #plt.scatter(X_xor[y_xor==-1, 0], X_xor[y_xor==-1, 1],c='r', marker='s', label='-1') #plt.ylim(-3.0) #plt.legend() #plt.show() #replace the parameter of SVC kernel = 'linear' with kernel = 'rbf' from sklearn.svm import SVC svm = SVC(kernel='rbf',random_state = 0,gamma=0.10,C=10.0) svm.fit(X_xor,y_xor) #draw decision boundary import DecisionBoundary DecisionBoundary.plot_decision_regions(X_xor,y_xor,classifier=svm) plt.legend(loc='upper left') plt.show() #---------------------------------------------------------------- #sample 2,using Iris data #load dataset from sklearn import datasets import numpy as np iris = datasets.load_iris() X= iris.data[:,[2,3]] y = iris.target #split the dataset into separate training and test datasets from sklearn.cross_validation import train_test_split X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3,random_state = 0)