X = X[:,attribute_included].reshape(-1,1) attributeNames = attributeNames[attribute_included] N, M = X.shape C = len(classNames) # K-fold crossvalidation K = 2 CV = StratifiedKFold(K, shuffle=True) k=0 for train_index, test_index in CV.split(X,y): print(train_index) # extract training and test set for current CV fold X_train, y_train = X[train_index,:], y[train_index] X_test, y_test = X[test_index,:], y[test_index] logit_classifier = LogisticRegression() logit_classifier.fit(X_train, y_train) y_test_est = logit_classifier.predict(X_test).T p = logit_classifier.predict_proba(X_test)[:,1].T figure(k) rocplot(p,y_test) figure(k+1) confmatplot(y_test,y_test_est) k+=2 show()
X = X[:,attribute_included] attributeNames = attributeNames[attribute_included] N, M = X.shape C = len(classNames) # K-fold crossvalidation K = 2 CV = cross_validation.StratifiedKFold(y.A.ravel().tolist(),K) k=0 for train_index, test_index in CV: # extract training and test set for current CV fold X_train, y_train = X[train_index,:], y[train_index,:] X_test, y_test = X[test_index,:], y[test_index,:] logit_classifier = LogisticRegression() logit_classifier.fit(X_train, y_train.A.ravel()) y_test_est = np.mat(logit_classifier.predict(X_test)).T p = np.mat(logit_classifier.predict_proba(X_test)[:,1]).T figure(k) rocplot(p,y_test) figure(k+1) confmatplot(y_test,y_test_est) k+=2 show()
best_index = np.argmin(err_inner_loop) logit_classifier = lm.LogisticRegression(C=clf_best_para[best_index]) logit_classifier.fit(X_train, y_train) y_est = logit_classifier.predict(X_test) y_test_outer.append(y_test) x_test_outer.append(X_test) clf_list.append(logit_classifier) best_est.append(y_est) mis_classified = np.sum(y_est != y_test) relative_error = mis_classified / len(y_test) gen_err[k] = relative_error k += 1 # show confusion matrix for model training using K-fold cross validation best_index = gen_err.argmin() figure() confmatplot(best_est[best_index], y_test_outer[best_index]) print("The mean generalization error is {0}".format(np.mean(gen_err))) # # # # #show confusion matrix for model testing using Leave one out cross validation # figure(1) # Y_est2 = clf_list[best_index].predict(X_test) # confmatplot(Y_test,Y_est2) # show() # # Decision boundaries for the multinomial regression model def nevallog(xval): return np.argmax(clf_list[best_index].predict_proba(xval),1)
best_knn = KNeighborsClassifier(n_neighbors=best_param_knn[k]) best_knn = best_knn.fit(X_train, y_train) y_est = best_knn.predict(X_test) X_test_outer.append(X_test) best_knn_list.append(best_knn) mis_classified = np.sum(y_est != y_test) relative_error = mis_classified / len(y_test) y_ESTKNN.append(y_est) y_test_outer.append(y_test) err_test_outer.append(relative_error) k += 1 print("The generalization error is {0} ".format(np.mean(err_test_outer))) best_index = np.argmin(err_test_outer) confmatplot(y_test_outer[best_index], y_ESTKNN[best_index]) show() figure(1) def neval(xval): return np.argmax(best_knn.predict_proba(xval), 1) if k_pca == 2: figure() dbplotf(X_test_outer[best_index], y_test_outer[best_index], neval, 'auto') show()
best_index_total = errors_outer.argmin() gen_error = np.mean(errors_outer).round(3) print("The generalization error is {0}% misclassification".format(gen_error*100)) figure(figsize=(6,7)); plot; bar(range(0,K),errors_outer); title('Square Errors for best performing models from the inner loop'); xticks(np.arange(K), (best_hidden_neurons[0], best_hidden_neurons[1], best_hidden_neurons[2])) xlabel('Hidden Neurons') ylabel('Square Error') # The best confusion matrix for the ann selection figure(); confmatplot(y_test_outer[best_index_total], y_est_outer[best_index_total]) # neval = np.argmax(best_clf_outer[best_index_total].predict_proba(X_test_outer[best_index_total]),1) # Doing the decision boundaries def neval(xval): return np.argmax(best_clf_outer[best_index_total].predict_proba(xval), 1) if k_pca == 2: figure(); dbplotf(X_test_outer[best_index_total], y_test_outer[best_index_total], neval, 'auto')