def main(): load = load_iris() x = load.data[:100, :2] y = load.target[:100] y = np.where(y == 1, 1, -1) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3) clf = perception(eta=0.1, loop=30) clf.fit(x_train, y_train) #训练模型 plot_decision_regions(clf, x, y)
def main(): iris = load_iris() X = iris.data[:100, [0, 2]] #提取100个带有两个特征值的数据集 y = iris.target[:100] #提取100个标签 #print(y) y = np.where(y == 1, 1, -1) #构建正规标签 X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.3) #用来随机划分样本数据为训练集和测试集的 ppn = perception(alpha=0.1, loop=10) ppn.train(X_train, y_train) #训练集训练模型 plot_decision_regions(ppn, X, y) #打印模型
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) from sklearn.preprocessing import StandardScaler sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) lr = LogisticRegression(C=1000.0, random_state=0) lr.fit(X_train_std, y_train) # print(lr.predict_proba(X_test_std[0,:])) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(X_combined_std, y_combined, classifier=lr, test_idx=range(105, 150)) # Page 68 Regularization weights, params = [], [] for c in np.arange(-5, 5): lr = LogisticRegression(C=10.0**c, random_state=0) lr.fit(X_train_std, y_train) weights.append(lr.coef_[1]) params.append(10.0**c) weights = np.array(weights) plt.plot(params, weights[:, 0], label='petal length') plt.plot(params, weights[:, 1], linestyle='--', label='petal width') plt.ylabel('weight coefficient') plt.xlabel('C') plt.legend(loc='upper left') plt.xscale('log')
def predict(self, X): return np.where(self.activation(X) >= 0.0, 1, -1) if __name__ == '__main__': df = pd.read_csv( 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None) y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', 1, -1) X = df.iloc[0:100, [0, 2]].values X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() ada = AdalineSGD(n_iter=15, eta=0.01, random_state=1) ada.fit(X_std, y) plt.plot(range(1, len(ada.cost_) + 1), np.log10(ada.cost_), marker='o') plt.xlabel('迭代次数') plt.ylabel('误差平方和') plt.title('adaline--标准化后') plt.show() plot.plot_decision_regions(X_std, y, classifier=ada) plt.title('adaline--标准化后') plt.show() """ 可以看出,在标准化后的数据上,使用0.01的学习速率,训练依旧可以收敛。 """
from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) from sklearn.preprocessing import StandardScaler sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) from sklearn.linear_model import Perceptron ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0) ppn.fit(X_train_std, y_train) y_pred = ppn.predict(X_test_std) print('Misclassified samples: %d' % (y_test != y_pred).sum()) from sklearn.metrics import accuracy_score print('Accuracy: %.2f' % accuracy_score(y_test, y_pred)) from plot import plot_decision_regions X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(X=X_combined_std, y=y_combined, classifier=ppn, test_idx=range(105, 150))
import numpy as np import matplotlib.pyplot as plt from sklearn.svm import SVC from plot import plot_decision_regions # Page 75, 78 np.random.seed(0) X_xor = np.random.randn(200, 2) Y_xor = np.logical_xor(X_xor[:, 0] > 0, X_xor[:, 1] > 0) y_xor = np.where(Y_xor, 1, -1) plt.scatter(X_xor[y_xor==1, 0], X_xor[y_xor==1, 1], c='b', marker='x', label='1') plt.scatter(X_xor[y_xor==-1, 0], X_xor[y_xor==-1, 1], c='r', marker='s', label='-1') svm = SVC(kernel='rbf', random_state=0, gamma=0.10, C=10.0) svm.fit(X_xor, y_xor) plot_decision_regions(X_xor, y_xor, classifier=svm)
Y = [1 if label == "Iris-virginica" else 0 for label in labels] """ labels = df.iloc[0:, 5].values features = np.unique(labels) Y=[None for x in labels] for i, f in enumerate(features): for x, label in enumerate(labels): if label==f : Y[x] = i """ Y_train, Y_test = Y[:n_train_data], Y[n_train_data:n_data] #assign the imput data to X (only the rows 0 and 2 are chosen as features -> just so it's still 2D) X_train = df.iloc[0:n_train_data, [0, 3]].astype(float).values X_test = df.iloc[n_train_data:n_data, [0, 3]].astype(float).values classifier = SVM.Support_vector_machine() print(Y_train) #cProfile.run('SVM.Support_vector_machine(visual=False).fit(X_train,Y_train)') W = classifier.fit_binary(X_train, Y_train, X_test=X_test, Y_test=Y_test) print("W: ", W) """ X_combined= np.vstack((X_train, X_test)) Y_combined = np.hstack((Y_train, Y_test)) plot.plot_decision_regions(X=X_combined, y=Y_combined, classifier=classifier, test_idx=range(n_train_data, n_data)) plt.xlabel('petal length [cm]') plt.ylabel('petal width [cm]') plt.legend(loc='upper left') plt.show()"""