Пример #1
0
#for more info please visit:https://github.com/PhenixI/machine-learning/tree/master/17-RandomForest

#load dataset
from sklearn import datasets
import numpy as np
iris = datasets.load_iris()
X= iris.data[:,[2,3]]
y = iris.target

#split the dataset into separate training and test datasets
from sklearn.cross_validation import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3,random_state = 0)

#random forest
from sklearn.ensemble import RandomForestClassifier
forest = RandomForestClassifier(criterion='entropy',n_estimators = 10,random_state = 1,n_jobs = 2)
forest.fit(X_train,y_train)

X_combined = np.vstack((X_train,X_test))
y_combined = np.hstack((y_train,y_test))

import DecisionBoundary
DecisionBoundary.plot_decision_regions(X_combined,y_combined,classifier=forest,test_idx = range(105,150))
plt.xlabel('petal length [cm]')
plt.ylabel('petal width [cm]')
plt.legend(loc='upper left')
plt.show()
Пример #2
0
import os
os.chdir('E:\\machine-learning\\6-Perceptron and Neural Networks\\Perceptron_python')
import pandas as pd
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',header = None)
import matplotlib.pyplot as plt
import numpy as np
y = df.iloc[0:100,4].values
y= np.where(y=='Iris-setosa',-1,1)
X = df.iloc[0:100,[0,2]].values
import Perceptron
ppn = Perceptron.Perceptron(eta=0.1,n_iter=10)
ppn.fit(X,y)
import DecisionBoundary as DB
DB.plot_decision_regions(X,y,classifier=ppn)


#standardization
X_std = np.copy(X)
X_std[:,0] = (X[:,0]-X[:,0].mean())/X[:,0].std();
X_std[:,1] = (X[:,1]-X[:,1].mean())/X[:,0].std();


#training AdalineSGD
ada = AdalineSGD.AdalineSGD(n_iter = 15,eta=0.01,random_state=1)
ada.fit(X_std,y)
Пример #3
0
#coding:utf-8
"""
Description: 决策树及其可视化
"""
from sklearn.tree import DecisionTreeClassifier
import DecisionBoundary as db
from sklearn import datasets
from sklearn.tree import export_graphviz

if __name__ == '__main__':
    #path = '/Users/fannian/Downloads/'
    path = '/home/fannian/downloads/'
    iris = datasets.load_iris()
    X = iris.data[:, [2, 3]]
    y = iris.target

    param_grid = [{'clf__max_depth': [2, 3, 4]}]
    tree = DecisionTreeClassifier()

    f = db.fitScore(X=X, y=y, model=tree, param_grid=param_grid)
    f.getScore()
    db.plot_decision_regions(X=f.X_combined_std,y=f.y_combined,classifier=f.linear,\
        test_idx=range(105,150),path_file = path + 'decision_figure.png')

    #export_graphviz 会检测模型是否为决策树,如果为其他模型,则会报错,固使用网格搜索需要重新训练
    tre = DecisionTreeClassifier(
        max_depth=f.linear.best_params_['clf__max_depth'])
    tre.fit(X, y)
    export_graphviz(tre,out_file=path +'tree.dot',\
        feature_names=['petal length','petal width'])
Пример #4
0
X_test_std = sc.transform(X_test)

# Mose alogrithms in scikit-learn support muliclass classification by default via the One-Vs-Rest method
from sklearn.linear_model import Perceptron

ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0)
ppn.fit(X_train_std, y_train)

y_pred = ppn.predict(X_test_std)
print("Misclssified samples: %d" % (y_test != y_pred).sum())

# metric performance

from sklearn.metrics import accuracy_score

print("Accuracy: %.2f " % accuracy_score(y_test, y_pred))

# draw decision surface
X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))
import DecisionBoundary

DecisionBoundary.plot_decision_regions(X=X_combined_std, y=y_combined, classifier=ppn, test_idx=range(105, 150))

import matplotlib.pyplot as plt

plt.xlabel("petal length [standardized]")
plt.ylabel("petal width [standardized]")
plt.legend(loc="upper left")
plt.show()
Пример #5
0
X,y = df_wine.iloc[:,1:].values,df_wine.iloc[:,0].values
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state = 0)
from sklearn.preprocessing import StandardScaler
stdsc = StandardScaler()
X_train_std = stdsc.fit_transform(X_train)
X_test_std = stdsc.transform(X_test)

#1. lda
from sklearn.lda import LDA
lda = LDA(n_components = 2)
X_train_lda  = lda.fit_transform(X_train_std,y_train)

#using logistic regression classifier handles the lower-dimensional training dataset
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr = lr.fit(X_train_lda,y_train)
import DecisionBoundary
DecisionBoundary.plot_decision_regions(X_train_lda,y_train,classifier=lr)
plt.xlabel('LD 1')
plt.ylabel('LD 2')
plt.legend(loc='lower left')
plt.show()

#use it on the test set
X_test_lda = lda.transform(X_test_std)
DecisionBoundary.plot_decision_regions(X_test_lda,y_test,classifier=lr)
plt.xlabel('LD 1')
plt.ylabel('LD 2')
plt.legend(loc='lower left')
plt.show()
Пример #6
0
#import matplotlib.pyplot as plt
#plt.scatter(X_xor[y_xor==1, 0], X_xor[y_xor==1, 1],c='b', marker='x', label='1')
#plt.scatter(X_xor[y_xor==-1, 0], X_xor[y_xor==-1, 1],c='r', marker='s', label='-1')
#plt.ylim(-3.0)
#plt.legend()
#plt.show()

#replace the parameter of SVC kernel = 'linear' with kernel = 'rbf'
from sklearn.svm import SVC
svm = SVC(kernel='rbf',random_state = 0,gamma=0.10,C=10.0)
svm.fit(X_xor,y_xor)

#draw decision boundary
import DecisionBoundary
DecisionBoundary.plot_decision_regions(X_xor,y_xor,classifier=svm)
plt.legend(loc='upper left')
plt.show()

#----------------------------------------------------------------
#sample 2,using Iris data
#load dataset
from sklearn import datasets
import numpy as np
iris = datasets.load_iris()
X= iris.data[:,[2,3]]
y = iris.target

#split the dataset into separate training and test datasets
from sklearn.cross_validation import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3,random_state = 0)