def plot_data(self): """ Plot the train data using matplotlib. """ X_combined = np.vstack((self.train_X, self.test_X)) y_combined = np.hstack((self.train_y, self.test_y)) plot_decision_regions(X_combined, y_combined, classifier=self.model, test_idx=range(105, 150)) plt.xlabel('petal length [cm]') plt.ylabel('petal width [cm]') plt.legend(loc='upper left') plt.show()
iris = datasets.load_iris() X = iris.data[:, [2, 3]] y = iris.target print('Class labels:', np.unique(y)) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y ) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) lr = LogisticRegression(C=100, solver='lbfgs', multi_class='ovr', random_state=1) lr.fit(X_train_std, y_train) import matplotlib.pyplot as plt X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(X_combined_std, y_combined, lr, test_idx=range(105, 150)) plt.xlabel('petal length [std]') plt.ylabel('petal width [std]') plt.legend(loc='upper left') plt.show() y_pred = lr.predict(X_test_std) print('Accuracy:', accuracy_score(y_test, y_pred))
y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) lr = LogisticRegression(C=1000.0, random_state=0) lr.fit(X_train_std, y_train) utils.plot_decision_regions(X_combined_std, y_combined, classifier=lr, test_idx=range(105, 150)) plt.xlabel('petal length [standardized') plt.ylabel('pegal width [standardized') plt.legend(loc='upper left') plt.show() # 逆正則化パラメータによる変化 weights, params = [], [] for c in range(-5, 5): lr = LogisticRegression(C=10**c, random_state=0) lr.fit(X_train_std, y_train) weights.append(lr.coef_[1]) params.append(10**c) weights = np.array(weights)
cost = 0.5 * error**2 return cost def net_input(self, X): return np.dot(X, self.w_[1:]) + self.w_[0] def activation(self, X): return X def predict(self, X): return np.where(self.activation(self.net_input(X)) >= 0.0, 1, -1) if __name__ == '__main__': df = pd.read_csv( 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None) y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', -1, 1) X = df.iloc[0:100, [0, 2]].values X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() ada = AdalineSGD(n_iter=15, eta=0.01, random_state=1) ada.fit(X_std, y) plot_decision_regions(X_std, y, classifier=ada) plt.title('Adaline - Stochastic Gradient Descent') plt.xlabel('sepal length [standardized]') plt.ylabel('petal length [standardized]') plt.legend(loc='upper left') plt.show()
import numpy as np import pandas as pd import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap from sklearn.svm import SVC from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler import utils if __name__ == '__main__': X_xor, y_xor = utils.generateDataSet() # gammaが小さいとトレーニングサンプルの影響力大。決定協会が滑らかに svm = SVC(kernel='rbf', gamma=0.10, C=10.0, random_state=0) svm.fit(X_xor, y_xor) utils.plot_decision_regions(X_xor, y_xor, classifier=svm) plt.xlabel('petal length [standardized') plt.ylabel('petal width [standardized') plt.legend(loc='upper left') plt.show()
print("Class labels:", np.unique(y)) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) lda = LinearDiscriminantAnalysis(n_components=2) X_train_lda = lda.fit_transform(X_train_std, y_train) X_combined_lda = lda.transform(X_combined_std) lr = LogisticRegression() le = lr.fit(X_train_lda, y_train) utils.plot_decision_regions(X_combined_lda, y_combined, classifier=lr, test_idx=range(X_train.shape[0], X_train.shape[0] + X_test.shape[0])) plt.xlabel('LD 1') plt.ylabel('LD 2') plt.legend(loc='lower left') plt.show()
import pandas as pd df = pd.read_csv('iris.data', header=None) # select petal and sepal length X = df.iloc[0:100, [0, 1]].values # Feature standardization X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() # select setosa and versicolor y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', -1, 1) # y = np.where(y == 'Iris-setosa', 0, 1) lr = LogisticRegressionSGD(eta=0.05, n_iter=150, random_state=1) lr.fit(X_std, y) import matplotlib.pyplot as plt plt.plot(lr.cost_, marker='o') plt.xlabel('Epochs') plt.ylabel('Total cost') plt.show() from utils import plot_decision_regions plot_decision_regions(X_std, y, lr) plt.xlabel('petal length [cm]') plt.ylabel('sepal length [cm]') plt.legend(loc='upper left') plt.show()
# L2-regularizer lambda=1/C, set to np.inf to get MLE logreg = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial') logreg.fit(X_train, y_train) y_pred = logreg.predict(X_test) errs = (y_pred != y_test) nerrs = np.sum(errs) print("Made {} errors out of {}, on instances {}".format( nerrs, len(y_pred), np.where(errs))) # With ndims=2: Made 10 errors out of 50, on instances # (array([ 4, 15, 21, 32, 35, 36, 40, 41, 42, 48]),) from sklearn.metrics import zero_one_loss err_rate_test = zero_one_loss(y_test, y_pred) assert np.isclose(err_rate_test, nerrs / len(y_pred)) err_rate_train = zero_one_loss(y_train, logreg.predict(X_train)) print("Error rates on train {:0.3f} and test {:0.3f}".format( err_rate_train, err_rate_test)) #Error rates on train 0.180 and test 0.200 if ndims == 2: fig, ax = utils.plot_decision_regions(X, y, logreg, iris.target_names) ax.set(xlabel='Sepal length') ax.set(ylabel='Sepal width') utils.save_fig("iris-logistic") plt.show() # Get predictive distribution for some ambiguous test points X = [[5.7, 3.5]] # (1,2) array y_probs = logreg.predict_proba(X) print(np.round(y_probs, 2))
y = (data[0]-data[0].mean())/data[0].std() x = (data[1]-data[1].mean())/data[1].std() #fit the linear regression prediction line to the graph params = regression.fit(x,y) #add the data to the plot plot.scatter(x[:180],y[:180], s = 15) prediction = np.matmul(np.array(x[:180]).reshape(-1,1),params[0])+params[1] plot.plot(x[:180],prediction, color = 'red') #set plot title and axes plot.title('Linear Regression: Iris Flower Set') plot.xlabel('Sepal Length') plot.ylabel('Sepal Width') plot.show() ##creating and setting perceptron for classification of data based on x dimension xPercep = data.iloc[0:180, [1,0]].values yPercep = data.iloc[0:180, 4].values yPercep = np.where(yPercep == 'Iris-setosa', -1, 1) perceptronClassifier.fit(xPercep,yPercep) stump.fit(xPercep,yPercep) #plot vc dim classification utils.plot_decision_regions(xPercep, yPercep, stump, "Stump Decision Classifier, Low VC") utils.plot_decision_regions(xPercep, yPercep, perceptronClassifier, "Perceptron Classifier on Same Data")
def predict(self, X): """Predict class label(s)""" return np.where(self.net_input(X) >= 0, 1, -1) if __name__ == '__main__': import pandas as pd df = pd.read_csv('iris.data', header=None) # select petal and sepal length X = df.iloc[0:100, [0, 2]].values # select setosa and versicolor y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', -1, 1) perceptron = Perceptron(eta=0.01, n_iter=10, random_state=1) perceptron.fit(X, y) import matplotlib.pyplot as plt plt.plot(perceptron.errors_, marker='o') plt.xlabel('Epochs') plt.ylabel('Number of updates') plt.show() from utils import plot_decision_regions plot_decision_regions(X, y, perceptron) plt.xlabel('petal length [cm]') plt.ylabel('sepal length [cm]') plt.legend(loc='upper left') plt.show()
#set axes info with csv x = df.iloc[0:100, [0, 2]].values y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', -1, 1) #set graph characteristics for sepal/petal plt.scatter(x[:50, 0], x[:50, 1], color='red', marker='o', label='setosa') plt.scatter(x[50:100, 0], x[50:100, 1], color='blue', marker='x', label='versicolor') plt.xlabel('sepal length') plt.ylabel('petal length') plt.legend(loc='upper left') plt.show() #create and set perceptron, fit using x,y pn = Perceptron(0.1, 10) pn.fit(x, y) #set graph characteristics for misclassifications plt.plot(range(1, len(pn.misclass) + 1), pn.misclass, marker='o') plt.xlabel('Iteration') plt.ylabel('# of misclassifications') plt.show() #plot decision regions utils.plot_decision_regions(x, y, pn)
import pandas as pd df = pd.read_csv('iris.data', header=None) # select petal and sepal length X = df.iloc[0:100, [0, 2]].values # Feature standardization X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() # select setosa and versicolor y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', -1, 1) adaline = AdalineSGD(eta=0.01, n_iter=15, random_state=1) adaline.fit(X_std, y) adaline.partial_fit(X_std[0, :], y[0]) import matplotlib.pyplot as plt plt.plot(adaline.cost_, marker='o') plt.xlabel('Epochs') plt.ylabel('Total cost') plt.show() from utils import plot_decision_regions plot_decision_regions(X_std, y, adaline) plt.xlabel('petal length [cm]') plt.ylabel('sepal length [cm]') plt.legend(loc='upper left') plt.show()