def main2(): X, y = get_data_sklearn() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) lr = LogisticRegression(C=1000.0, random_state=0) lr.fit(X_train_std, y_train) pdb.set_trace() plot_decision_regions(X_combined_std, y_combined, classifier=lr, test_idx=range(105, 150)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]') plt.legend(loc='upper left') plt.tight_layout() plt.savefig(PIC_LOC + 'logistic_regression.png', dpi=300)
def main3(): # logistic regretion with regularization X, y = get_data_sklearn() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) weights, params = [], [] for c in np.arange(-5, 5): lr = LogisticRegression(C=10**c, random_state=0) lr.fit(X_train_std, y_train) weights.append(lr.coef_[1]) params.append(10**c) weights = np.array(weights) plt.plot(params, weights[:, 0], label='petal length') plt.plot(params, weights[:, 1], linestyle='--', label='petal width') plt.ylabel('weight coefficient') plt.xlabel('C') plt.legend(loc='upper left') plt.xscale('log') plt.savefig(PIC_LOC + 'regression_path.png', dpi=300)
def KNN_model(): X, y = get_data_sklearn() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) X_combined = np.vstack((X_train, X_test)) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski') knn.fit(X_train_std, y_train) plot_decision_regions(X_combined_std, y_combined, classifier=knn, test_idx=range(105, 150)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]') plt.legend(loc='upper left') plt.tight_layout() plt.savefig(PIC_LOC + 'k_nearest_neighbors.png', dpi=300)
def build_dec_tree(): X, y = get_data_sklearn() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) tree = DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=0) tree.fit(X_train, y_train) X_combined = np.vstack((X_train, X_test)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(X_combined, y_combined, classifier=tree, test_idx=range(105, 150)) plt.xlabel('petal length [cm]') plt.ylabel('petal width [cm]') plt.legend(loc='upper left') plt.tight_layout() plt.savefig(PIC_LOC + 'decision_tree_decision.png', dpi=300) export_graphviz(tree, out_file='tree.dot', feature_names=['petal length', 'petal width'])
def main1(): X, y = get_data_sklearn() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0) ppn.fit(X_train_std, y_train) y_pred = ppn.predict(X_test_std) print('Misclassified samples: %d' % (y_test != y_pred).sum()) print('Accuracy: %.2f' % accuracy_score(y_test, y_pred)) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(X=X_combined_std, y=y_combined, classifier=ppn, test_idx=range(105, 150)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]') plt.legend(loc='upper left') plt.tight_layout() plt.savefig(PIC_LOC + 'iris_perceptron_scikit.png', dpi=300)
def linear_svm(): X, y = get_data_sklearn() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) svm = SVC(kernel='rbf', random_state=0, gamma=0.10, C=10.0) svm.fit(X_train_std, y_train) plot_decision_regions(X_combined_std, y_combined, classifier=svm, test_idx=range(105, 150)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]') plt.legend(loc='upper left') plt.tight_layout() plt.savefig(PIC_LOC + 'support_vector_machine_linear.png', dpi=300) # plt.show() plt.close() svm = SVC(kernel='rbf', random_state=0, gamma=100.0, C=10.0) svm.fit(X_train_std, y_train) plot_decision_regions(X_combined_std, y_combined, classifier=svm, test_idx=range(105, 150)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]') plt.legend(loc='upper left') plt.tight_layout() plt.savefig(PIC_LOC + 'svm_linear_highgamma.png', dpi=300)
def random_forests(): X, y = get_data_sklearn() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) forest = RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=1, n_jobs=2) forest.fit(X_train, y_train) X_combined = np.vstack((X_train, X_test)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(X_combined, y_combined, classifier=forest, test_idx=range(105, 150)) plt.xlabel('petal length [cm]') plt.ylabel('petal width [cm]') plt.legend(loc='upper left') plt.tight_layout() plt.savefig(PIC_LOC + 'random_forest.png', dpi=300)