def test_scikit_perceptron():
    iris = datasets.load_iris()
    X = iris.data[:, [2, 3]]
    y = iris.target

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.3,
                                                        random_state=0)

    sc = StandardScaler()
    sc.fit(X_train)
    StandardScaler(copy=True, with_mean=True, with_std=True)
    X_train_std = sc.transform(X_train)
    X_test_std = sc.transform(X_test)

    # ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0)
    ppn = Perceptron(n_iter_no_change=40, eta0=0.1, random_state=0)
    ppn.fit(X_train_std, y_train)

    y_pred = ppn.predict(X_test_std)
    print('Misclassified samples: %d' % (y_test != y_pred).sum())

    print('Accurancy: %.2f' % accuracy_score(y_test, y_pred))

    X_combined_std = np.vstack((X_train_std, X_test_std))
    y_combined = np.hstack((y_train, y_test))
    plot_decision_regions(X=X_combined_std,
                          y=y_combined,
                          classifier=ppn,
                          test_idx=range(105, 150))
    plt.xlabel('petal length [standardized]')
    plt.ylabel('petal width [standardized]')
    plt.legend(loc='upper left')
    plt.show()
示例#2
0
def test_adaline_sgd():

    df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None)
    df.tail()

    y = df.iloc[0:100, 4].values
    y = np.where(y == 'Iris-setosa', -1, 1)
    X = df.iloc[0:100, [0, 2]].values

    X_std = np.copy(X)
    X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std()
    X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std()

    ada = AdalineSGD(n_iter=15, eta=0.01, random_state=1)
    ada.fit(X_std, y)

    plot_decision_regions(X_std, y, classifier=ada)

    plt.title('Adaline - Stochastic Gradient Descent')
    plt.xlabel('sepal lenght [standardized]')
    plt.ylabel('petal lenght [standardized]')
    plt.legend(loc='upper left')
    plt.show()

    plt.plot(range(1, len(ada.cost_) + 1), ada.cost_, marker='o')
    plt.xlabel('Epochs')
    plt.ylabel('Average cost')
    plt.show()
示例#3
0
def test_perceptron():
    df = pd.read_csv(
        'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',
        header=None)
    df.tail()

    y = df.iloc[0:100, 4].values
    y = np.where(y == 'Iris-setosa', -1, 1)
    X = df.iloc[0:100, [0, 2]].values

    plt.scatter(X[:50, 0], X[:50, 1], color='red', marker='o', label='setosa')
    plt.scatter(X[50:100, 0],
                X[50:100, 1],
                color='blue',
                marker='x',
                label='versicolor')
    plt.xlabel('sepal length')
    plt.ylabel('petal length')
    plt.legend(loc='upper left')
    plt.show()
    # plt.savefig("mygraph.png")

    ppn = Perceptron(eta=0.1, n_iter=10)
    ppn.fit(X, y)
    plt.plot(range(1, len(ppn.errors_) + 1), ppn.errors_, marker='o')
    plt.xlabel('Epochs')
    plt.ylabel('Number of misclassifications')
    plt.show()

    plot_decision_regions(X, y, classifier=ppn)
    plt.xlabel('sepal lenght [cm]')
    plt.ylabel('petal lenght [cm]')
    plt.legend(loc='upper left')
    plt.show()
示例#4
0
def testTraining():
    df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None)
    df.tail()
    # 1~100行目の目的変数を抽出
    y = df.iloc[0:100, 4].values
    # Iris-setosaを-1, Iris-virginicaを1に変換
    y = np.where(y == 'Iris-setosa', -1, 1)
    # 1~10行目の1,3列目を抽出
    X = df.iloc[0:100, [0, 2]].values
    #setosaのプロット
    plt.scatter(X[:50,0], X[:50,1], color='red', marker = 'o', label = 'setosa')
    plt.scatter(X[50:100,0], X[50:100,1], color='blue', marker = 'x', label = 'versicolor')

    plt.xlabel('sepal length [cm]')
    plt.ylabel('petal length [cm]')
    plt.legend(loc = 'upper left')
    plt.figure(0)

    ppn = Perceptron(eta =0.1, n_iter=10)
    ppn.fit(X, y)
    plt.plot(range(1, len(ppn.errors_) + 1), ppn.errors_, marker = 'o')
    plt.xlabel('Epochs')
    plt.ylabel('Number of misclassifications')
    plt.figure(1)

    pdr.plot_decision_regions(X, y, classifier = ppn)
    plt.show()
示例#5
0
def learn_xor(classifier, data_num=200, title='Result'):
    x_xor, y_xor = read_xor(data_num)
    classifier.fit(x_xor, y_xor)

    plot_decision_regions(x_xor, y_xor, classifier=classifier)
    plt.title(title)
    plt.legend(loc='upper left')
    plt.show()
def plot_decision_regions_iris(X,
                               y,
                               classifier,
                               test_idx=None,
                               resolution=0.02):
    plot_decision_regions(X,
                          y,
                          classifier=classifier,
                          test_idx=test_idx,
                          resolution=resolution)
    plt.xlabel('sepal length[cm]')
    plt.ylabel('petal length[cm]')
    plt.legend(loc='upper left')
    plt.show()
示例#7
0
def plotTraining():
    X, y = sdt.setSample_IrisSV()
    X_std = np.copy(X)
    X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std()
    X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std()
    ada = AdalineGD(n_iter=15, eta=0.01)
    ada.fit(X_std, y)
    pdr.plot_decision_regions(X_std, y, classifier=ada)
    plt.title('Adaline - Gradient Descent')
    plt.xlabel('sepal length [standardized]')
    plt.ylabel('petal length [standardized]')
    plt.legend(loc='upper left')
    plt.show()
    plt.plot(range(1, len(ada.cost_) + 1), ada.cost_, marker='o')
    plt.xlabel('Epochs')
    plt.ylabel('Sum-squared-error')
    plt.show()
示例#8
0
def RunSkMethod(s='ppn'):
    isTree = False
    if s == 'ppn':
        method = Perceptron(n_iter=40, eta0=0.1, random_state=0, shuffle=True)
    elif s == 'lr':
        method = LogisticRegression(C=100.0, random_state=0)
    elif s == 'svc':
        method = SVC(kernel='linear', C=1.0, random_state=0)
    elif s == 'svm':
        method = SVC(kernel='rbf',
                     random_state=0,
                     gamma=float(args[2]),
                     C=float(args[3]))
    elif s == 'tree':
        method = DTC(criterion='entropy', max_depth=3, random_state=0)
        isTree = True
    elif s == 'forest':
        method = RFC(criterion='entropy',
                     n_estimators=10,
                     random_state=1,
                     n_jobs=2)
    elif s == 'knn':
        method = KNC(n_neighbors=5, p=2, metric='minkowski')
    elif s == 'pca':
        method = PCA(n_components=2)
        return

    dd = ir.IrisDataSets()
    dd.useFit(method)
    pdr.plot_decision_regions(X=dd.X_combined_std,
                              y=dd.y_combined,
                              classifier=method,
                              test_idx=range(105, 150))
    dd.drawGraph()

    if s == 'lr':
        print(method.predict_proba(dd.X_test_std[0, :].reshape(1, -1)))

    # after this function, execute following command on terminal
    # dot -Tpng tree.dot -o tree.png
    if isTree == True:
        export_graphviz(method,
                        out_file='tree.dot',
                        feature_names=['petal length', 'petal width'])
示例#9
0
def plotTraining():
    X, y = sdt.setSample_IrisSV()
    X_std = np.copy(X)
    X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std()
    X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std()
    ada = AdalineSGD(n_iter=15, eta=0.01, random_state=1)
    ada.fit(X_std, y)
    pdr.plot_decision_regions(X_std, y, classifier=ada)
    plt.title('Adaline - Stochastic Gradient Descent')
    plt.xlabel('sepal length [standardized]')
    plt.ylabel('petal length [standardized]')
    plt.legend(loc='upper left')
    plt.tight_layout()
    plt.figure(0)
    plt.plot(range(1, len(ada.cost_) + 1), ada.cost_, marker='o')
    plt.xlabel('Epochs')
    plt.ylabel('Average Cost')
    plt.figure(1)
    plt.show()
示例#10
0
def test_adaline_gd():

    df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None)
    df.tail()

    y = df.iloc[0:100, 4].values
    y = np.where(y == 'Iris-setosa', -1, 1)
    X = df.iloc[0:100, [0, 2]].values

    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(8, 4))

    ada1 = AdalineGD(n_iter=10, eta=0.01).fit(X, y)
    ax[0].plot(range(1, len(ada1.cost_) + 1), np.log10(ada1.cost_), marker='o')
    ax[0].set_xlabel('Epochs')
    ax[0].set_ylabel('log(Sum-squared-error)')
    ax[0].set_title('Adaline - Learning rate 0.01')

    ada2 = AdalineGD(n_iter=10, eta=0.0001).fit(X,  y)
    ax[1].plot(range(1, len(ada2.cost_) + 1), ada2.cost_, marker='o')
    ax[1].set_xlabel('Epochs')
    ax[1].set_ylabel('Sum-squared-error')
    ax[1].set_title('Adaline - Learning rate 0.0001')

    plt.show()

    X_std = np.copy(X)
    X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std()
    X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std()

    ada = AdalineGD(n_iter=15, eta=0.01)
    ada.fit(X_std, y)
    plot_decision_regions(X_std, y, classifier=ada)

    plt.title('Adaline - Gradient Descent')
    plt.xlabel('sepal lenght [standardized]')
    plt.ylabel('petal lenght [standardized]')
    plt.legend(loc='upper left')
    plt.show()

    plt.plot(range(1, len(ada.cost_) + 1), ada.cost_, marker='o')
    plt.xlabel('Epochs')
    plt.ylabel('Sum-squared-error')
    plt.show()
示例#11
0
from AdalineGD import AdalineGD
import matplotlib.pyplot as plt
import numpy as np
from SampleSetReader import ReadIris
from plot_decision_regions import plot_decision_regions


X, y = ReadIris()
X_std = np.copy(X)
X_std[:,0] = (X[:,0] - X[:,0].mean()) / X[:,0].std()
X_std[:,1] = (X[:,1] - X[:,1].mean()) / X[:,1].std()

ada = AdalineGD(n_iter=15, eta=0.01)

ada.fit(X_std, y)
plot_decision_regions(X_std,y,classifier=ada)
plt.title('Adaline - Gradient Descent')
plt.xlabel('sepal length[standardized]')
plt.ylabel('petal length[standardized]')
plt.legend(loc='upper left')
plt.show()
plt.plot(range(1,len(ada.cost_) + 1), ada.cost_, marker='o')
plt.xlabel('Epochs')
plt.ylabel("Sum-squared-error")

plt.show()
示例#12
0
文件: plot.py 项目: tajima12/study
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import perceptron
import plot_decision_regions as pdr

df = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data", header=None)
y = df.iloc[0:100, 4].values
y = np.where(y == "Iris-setosa", -1, 1)
X = df.iloc[0:100, [0, 2]].values

ppn = perceptron.Perceptron(eta=0.1, n_iter=10)
ppn.fit(X, y)

pdr.plot_decision_regions(X, y, classifier=ppn)

plt.xlabel("sepal length [cm]")
plt.ylabel("petal length [cm]")
plt.legend(loc="upper left")

plt.show()
示例#13
0
X = iris.data[:, [2, 3]]
y = iris.target

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=1,
                                                    stratify=y)

import matplotlib.pyplot as plt
from plot_decision_regions import plot_decision_regions
from sklearn.tree import DecisionTreeClassifier

tree = DecisionTreeClassifier(criterion='gini', max_depth=4, random_state=1)
tree.fit(X_train, y_train)

X_combined = np.vstack((X_train, X_test))
y_combined = np.hstack((y_train, y_test))
plot_decision_regions(X_combined,
                      y_combined,
                      classifier=tree,
                      test_idx=range(105, 150))

plt.xlabel('petal length [cm]')
plt.ylabel('petal width [cm]')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()
示例#14
0
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
import plot_decision_regions as pdr
import wine_data
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt

w = wine_data.WineDataSets()
lda = LDA(n_components = 2)
X_train_lda = lda.fit_transform(w.X_train_std, w.y_train)

lr = LogisticRegression()
lr = lr.fit(X_train_lda, w.y_train)
pdr.plot_decision_regions(X_train_lda, w.y_train, classifier = lr)
plt.xlabel('LD 1')
plt.ylabel('LD 2')
plt.legend(loc = 'lower left')
plt.show()

X_test_lda = lda.transform(w.X_test_std)
pdr.plot_decision_regions(X_test_lda, w.y_test, classifier = lr)
plt.xlabel('LD 1')
plt.ylabel('LD 2')

plt.legend(loc = 'lower left')
plt.show()
示例#15
0
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
from plot_decision_regions import plot_decision_regions

iris = load_iris()
X,y = iris.data[:,[0,2]], iris.target	# two features only
# X,y = iris.data[:,:], iris.target	# all features

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1)

rf = RandomForestClassifier(n_estimators=40, criterion="gini")
rf.fit(X_train, y_train)
predictions = rf.predict(X_test)

X_combined = np.vstack((X_train, X_test))
y_combined = np.hstack((y_train, y_test))

plot_decision_regions(X_combined, y_combined, rf, np.array(iris.feature_names)[[2,3]], test_idx=range(90,150), fileName="rf.png")

accuracy = accuracy_score(y_test, predictions)
print("Accuracy: {:.2f}".format(accuracy))
示例#16
0
import matplotlib.pyplot as plt
import AdalineSGD as ppn

import plot_decision_regions as pdr

df=pd.read_csv('iris.data',header=None)
y = df.iloc[0:100,4].values
y = np.where(y == 'Iris-versicolor' ,-1,1)
X = df.iloc[0:100,[0,2]].values # get first 100 rows and col0 and col2
X_std = np.copy(X)
X_std[:,0] = (X[:,0] - X[:,0].mean())/X[:,0].std()
X_std[:,1] = (X[:,1] - X[:,1].mean())/X[:,1].std()
ppn3 = ppn.AdalineSGD(0.01 , 15, True, 1)
ppn3.fit(X_std,y)

pdr.plot_decision_regions(X_std, y, classifier=ppn3)
plt.xlabel('sepal length [standardized]')
plt.ylabel('petal length [standardized]')
plt.title('Adaline - Stochastic Gradient Descent')
plt.legend(loc="upper left")
plt.show()

plt.plot(range(1,len(ppn3.cost_)+1),ppn3.cost_,marker='o')
plt.xlabel('Epochs')
plt.ylabel('Average Cost')
plt.show()




df = pd.read_csv(
    'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',
    header=None)
df.tail()

y = df.iloc[0:100, 4].values
y = np.where(y == 'Iris-setosa', -1, 1)
X = df.iloc[0:100, [0, 2]].values
plt.scatter(X[:50, 0], X[:50, 1], color='red', marker='o', label='setosa')
plt.scatter(X[50:100, 0],
            X[50:100, 1],
            color='blue',
            marker='x',
            label='versicolor')
plt.xlabel('sepal length')
plt.ylabel('petal length')
plt.legend(loc='upper left')
#plt.show()

ppn = Perceptron(eta=0.1, n_iter=10)
ppn.fit(X, y)
plt.plot(range(1, len(ppn.errors_) + 1), ppn.errors_, marker='o')
plt.xlabel('Epochs')
plt.ylabel('Number of misclassifications')
#plt.show()

plot_decision_regions(X, y, classifier=ppn)
plt.xlabel('sepal length [cm]')
plt.ylabel('petal length [cm]')
plt.legend(loc='upper left')
plt.show()
示例#18
0
eigen_pairs = [
    (np.abs(eigen_vals[i], eigetn_vecs[:, i]) for i in range(len(eigen_vals)))]
eigen_pairs = sorted(eigen_pairs, key = lambda k: k[0], reverse = True)

print('Eigenvalues in decreasing order:\n')
for ev in eigen_pairs:
    print ev[0]
'''

# LDA in sklearn
lda = LDA(n_components = 2)
X_train_lda = lda.fit_transform(X_train_std, y_train)

lr = LogisticRegression()
lr = lr.fit(X_train_lda, y_train)

plot_decision_regions(X_train_lda, y_train, classifier = lr)
plt.xlabel('LD 1')
plt.ylabel('LD 2')
plt.legend(loc = 'lower left')
plt.show()

# On test set:
X_test_lda = lda.transform(X_test_std)

plot_decision_regions(X_test_lda, y_test, classifier = lr)
plt.xlabel('LD 1')
plt.ylabel('LD 2')
plt.legend(loc = 'lower left')
plt.show()
plt.scatter(X_train_pca[:, 0], X_train_pca[:, 1])
plt.xlabel('PC 1')
plt.ylabel('PC 2')
plt.show()

# %%-----------------------------------------------------------------------
# Apply logistic regression after PCA

lr = LogisticRegression()
lr = lr.fit(X_train_pca, y_train)

# %%-----------------------------------------------------------------------
# Plot decision regions on train data

pp.plot_decision_regions(X_train_pca, y_train, classifier=lr)
plt.xlabel('PC 1')
plt.ylabel('PC 2')
plt.legend(loc='lower left')
plt.tight_layout()
plt.show()


# %%-----------------------------------------------------------------------
# Plot decision regions on test data

pp.plot_decision_regions(X_test_pca, y_test, classifier=lr)
plt.xlabel('PC 1')
plt.ylabel('PC 2')
plt.legend(loc='lower left')
plt.tight_layout()

### 006 calculate the classification accuracy
from sklearn.metrics import accuracy_score

print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))



### 007 plot decision regions
from plot_decision_regions import plot_decision_regions
import matplotlib.pyplot as plt

X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))
plot_decision_regions(X=X_combined_std, y=y_combined, classifier=ppn, test_idx=range(105, 150))
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc='upper left')
plt.show()



### 008 plot the sigmoid function
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))
z = np.arange(-7, 7, 0.1)
phi_z = sigmoid(z)
plt.plot(z, phi_z)
plt.axvline(0.0, color='k')
plt.axhspan(0.0, 1.0, facecolor='1.0', edgecolor='0.0', alpha=1.0, ls='dotted')
示例#21
0
@author: liuya
"""

from sklearn.ensemble import RandomForestClassifier
from sklearn_datasets import getIris, combinedTrainTest
from plot_decision_regions import plot_decision_regions
import matplotlib.pyplot as plt

X_train, X_test, y_train, y_test = getIris()
X_combined, y_combined, testidx_start, testidx_end = combinedTrainTest(
    X_train, y_train, X_test, y_test)

# criterion 不存度衡量标准
forest = RandomForestClassifier(
    criterion='entropy',
    n_estimators=10,  # 10 棵决策树
    random_state=1,
    n_jobs=20)  # 所需处理器内核的数量
forest.fit(X_train, y_train)

plot_decision_regions(X_combined,
                      y_combined,
                      classifier=forest,
                      test_idx=range(testidx_start, testidx_end))

plt.xlabel('petal length[cm]')
plt.ylabel('petal width[cm]')
plt.legend(loc='upper left')
plt.show()
@author: surya
"""

from sklearn import datasets
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import LogisticRegression
import plot_decision_regions
iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=1,
                                                    stratify=y)
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)
X_train_01_subset = X_train[(y_train == 0) | (y_train == 1)]
y_train_01_subset = y_train[(y_train == 0) | (y_train == 1)]
lrgd = LogisticRegression.LogisticRegressionGD(eta=0.01,
                                               n_iter=1000,
                                               random_state=1)
lrgd.fit(X_train_01_subset, y_train_01_subset)
plot_decision_regions.plot_decision_regions(X=X_train_01_subset,
                                            y=y_train_01_subset,
                                            classifier=lrgd)
                                                    random_state=0)

#对数据记录的特征字段进行标准化处理(转化为标准正态分布)···
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
sc.fit(x_train)
x_train_std = sc.transform(x_train)
x_test_std = sc.transform(x_test)

#K近邻算法对记录字段数据的数量级很敏感因此需要标准化处理····
#K紧邻算法实现······
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski')
knn.fit(x_train_std, y_train)
pdr.plot_decision_regions(x_train_std,
                          y_train,
                          classifier=knn,
                          test_idx=range(105, 105))
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc='upper left')
plt.show()

from sklearn.metrics import accuracy_score

print(' k近邻算法分类')
print('Accuracy:%.2f' % accuracy_score(y_test, knn.predict(x_test_std)))
print('Misclassified sample:%d' % (y_test != knn.predict(x_test_std)).sum())
示例#24
0
import sys, os
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             '../'))
from plot_decision_regions import plot_decision_regions

iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target
(X_train, X_test, y_train, y_test) = train_test_split(
    X, y, test_size = 0.3, random_state = 0)

sc = StandardScaler()
# Scale data
sc.fit(X_train) # Use same to transform both train and test sets
X_train_std = sc.transform(X_train)
X_test_std  = sc.transform(X_test)

X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))


svm = SVC(kernel = 'linear', C = 1.0, random_state = 0)
svm.fit(X_train_std, y_train)

plot_decision_regions(
    X_combined_std, y_combined, classifier = svm, test_idx = range(105, 150))
plt.xlabel('Petal length (Z)')
plt.ylabel('Petal width (Z)')
plt.legend(loc = 'upper left')
plt.show()
示例#25
0
X_xor = np.random.randn(200, 2)
y_xor = np.logical_xor(X_xor[:, 0] > 0,
                       X_xor[:, 1] > 0)
y_xor = np.where(y_xor, 1, -1)

plt.scatter(X_xor[y_xor == 1, 0],
            X_xor[y_xor == 1, 1],
            c='b', marker='x',
            label='1')
plt.scatter(X_xor[y_xor == -1, 0],
            X_xor[y_xor == -1, 1],
            c='r',
            marker='s',
            label='-1')

plt.xlim([-3, 3])
plt.ylim([-3, 3])
plt.legend(loc='best')
plt.tight_layout()
plt.show()

# Using the kernel trick to find separating hyperplanes in higher dimensional space
svm = SVC(kernel='rbf', random_state=1, gamma=0.10, C=10.0)
svm.fit(X_xor, y_xor)
plot_decision_regions(X_xor, y_xor,
                      classifier=svm)

plt.legend(loc='upper left')
plt.tight_layout()
plt.show()
# Training logistic regression classifier using the first 2 principal components.
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression

from std_wine_data import X_train_std, y_train, X_test_std
from plot_decision_regions import plot_decision_regions

pca = PCA(n_components=2)
X_train_pca = pca.fit_transform(X_train_std)
X_test_pca = pca.transform(X_test_std)

lr = LogisticRegression()
lr = lr.fit(X_train_pca, y_train)

plot_decision_regions(X_train_pca, y_train, classifier=lr)
plt.xlabel('PC 1')
plt.ylabel('PC 2')
plt.legend(loc='lower left')
plt.tight_layout()
# plt.savefig('images/05_04.png', dpi=300)
plt.show()
示例#27
0
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)
X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))
#-----------------------------------------------------------------------------

from sklearn.linear_model import LogisticRegression
import plot_decision_regions as pp
import matplotlib.pyplot as plt

from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=1, p=2, metric='minkowski')
knn.fit(X_train_std, y_train)
pp.plot_decision_regions(X_combined_std,
                         y_combined,
                         classifier=knn,
                         test_idx=range(105, 150))
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.show()

# print(knn.score(X_test_std,y_test))

from sklearn.neighbors import NearestNeighbors

knn2 = NearestNeighbors(n_neighbors=3)
knn2.fit(X_train_std)
#print sum(sum((knn2.kneighbors(X_train_std)[0])))
print knn2.kneighbors(X_train_std)[0]
'''
def loss_function(x,k):
示例#28
0
y = iris.target

# train test split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=.3,
                                                    random_state=0)

# scale covariates
sc = StandardScaler()
sc.fit(X_train)

X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

# fit support vector machine
svm = SVC(kernel='linear', C=1, random_state=0)
svm.fit(X_train_std, y_train)

# plot svm decision regions
X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))
plot_decision_regions(X=X_combined_std,
                      y=y_combined,
                      classifier=svm,
                      test_idx=range(105, 150))
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc='upper left')
plt.show()
示例#29
0
文件: KNN.py 项目: yuepaang/pythonML
from sklearn.neighbors import KNeighborsClassifier
import iris_sk as iris
import matplotlib.pyplot as plt
import plot_decision_regions as pdr

knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski')
knn.fit(iris.X_train_std, iris.y_train)
pdr.plot_decision_regions(iris.X_combined_std,
                          iris.y_combined,
                          classifier=knn,
                          test_idx=range(105, 150))
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.show()
示例#30
0
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression

from std_wine_data import X_train_std, y_train, X_test_std, y_test
from plot_decision_regions import plot_decision_regions

pca = PCA(n_components=2)
X_train_pca = pca.fit_transform(X_train_std)
X_test_pca = pca.transform(X_test_std)

lr = LogisticRegression()
lr = lr.fit(X_train_pca, y_train)

plot_decision_regions(X_test_pca, y_test, classifier=lr)
plt.xlabel('PC 1')
plt.ylabel('PC 2')
plt.legend(loc='lower left')
plt.tight_layout()
# plt.savefig('images/05_05.png', dpi=300)
plt.show()
#对数据记录的特征字段进行标准化处理(转化为标准正态分布)···
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(x_train)
x_train_std = sc.transform(x_train)
x_test_std = sc.transform(x_test)

#逻辑斯谛回归分类及错误率分析····
from sklearn.linear_model import LogisticRegression
import plot_decision_regions as pdr
import matplotlib.pyplot as plt
x_combined_std = np.vstack((x_train_std, x_test_std))
y_combined_std = np.hstack((y_train, y_test))

lr = LogisticRegression(C=1000.0, random_state=0)
lr_modle = lr.fit(x_train_std, y_train)
print(lr.coef_[0], lr.coef_[1], lr.coef_[2])
print(lr_modle)
test_idx = range(105, 105)
pdr.plot_decision_regions(x_combined_std,
                          y_combined_std,
                          classifier=lr,
                          test_idx=range(105, 105))
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc='upper left')
plt.show()

from sklearn.metrics import accuracy_score
print('Accuracy:%.2f' % accuracy_score(y_test, lr.predict(x_test_std)))
print('Misclassified sample:%d' % (y_test != lr.predict(x_test_std)).sum())
示例#32
0
            x_xor[y_xor == -1, 1],
            c='r',
            marker='s',
            label='-1')

plt.ylim(-3.0)
plt.legend()
plt.show()

#svm核函数非线性拟合,创建分离超平面····
from sklearn.svm import SVC
import plot_decision_regions as pdr

svm_hook = SVC(kernel='rbf', random_state=0, gamma=0.1, C=10.0)
svm_hook.fit(x_xor, y_xor)
pdr.plot_decision_regions(x_xor, y_xor, classifier=svm_hook)
plt.legend(loc='upper left')
plt.show()

from sklearn.metrics import accuracy_score

print('svm核函数非线性拟合分类结果评估:')
print('Accuracy:%.2f' % accuracy_score(y_xor, svm_hook.predict(x_xor)))
print('Misclassified sample:%d' % (y_xor != svm_hook.predict(x_xor)).sum())
print(svm_hook.predict([[1, -1], [5, 5]]))
########################################################################
#svm核函数线性拟合,创建分离超平面····
from sklearn.svm import SVC
import plot_decision_regions as pdr

svm_linear = SVC(kernel='linear', random_state=0, gamma=0.1, C=10.0)
示例#33
0
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=0,
                                                    stratify=y)

stdsc = StandardScaler()
X_train_std = stdsc.fit_transform(X_train)
X_test_std = stdsc.transform(X_test)

lda = LDA(n_components=2)
X_train_lda = lda.fit_transform(X_train_std, y_train)

lr = LogisticRegression()
lr = lr.fit(X_train_lda, y_train)
plot_decision_regions(X=X_train_lda, y=y_train, classifier=lr)
plt.xlabel("LD 1")
plt.ylabel("LD 2")
plt.legend(loc="best")
plt.pause(5)
plt.close()

# Let us take a look at the results on the test set:
X_test_lda = lda.transform(X_test_std)
plot_decision_regions(X_test_lda, y_test, classifier=lr)
plt.xlabel("LD 1")
plt.ylabel("LD 2")
plt.legend(loc="best")
plt.pause(5)
plt.close()
# As we see, the logistic regression classifier is able to get a perfect