def main(data_path_head, data_path_tail, args): ### ********** LOAD DATA ********** ### data = load_raw_data(data_path_tail, data_path_head) ### ********** PREPROCESS DATA ********** ### data[:, :3] = PcaRotation(data[:, :3]) data[:, :3] = ScaleData(data[:, :3]) ### ********** THREE BEST CONFIGURATIONS FROM REPORT "GRID SEARCH" ******** ### rpca_testing_windows = [[225, 225], [225, 225], [125, 125]] rpca_testing_windows_types = ['rectangle', 'ellipse', 'rectangle'] rpca_testing_overlaps = [0.5, 0.8, 0.75] rpca_testing_confidence = [1.5, 2, 2.5] ### ********** PERFROM RPCA ON NEW DATASET WITH DEFINED CONFIGURATIONS ********* ### for window, window_type, overlap, confidence in zip( rpca_testing_windows, rpca_testing_windows_types, rpca_testing_overlaps, rpca_testing_confidence): # Define Naming Strings save_path = args.save_path + "/RPCA_generalization_voting" string_overlap = "{:.2f}".format(overlap).replace(".", "") classifier_name = "RPCA_{}_{}x{}_ol{}".format(window_type, window[0], window[1], string_overlap) # Perfrom rpca and save it rpca = RobustPCAGrid(window, max_iter=2000, overlap=overlap, window_type=window_type, predict_method='voting', confidence=confidence, name=classifier_name) rpca, rpca_prediction, rpca_score, rpca_err, rpca_confusion_matrix = classify( rpca, train_data=data[:, :3], train_lbls=data[:, -1], test_data=data[:, :3], test_lbls=data[:, -1], stats=args.stats, save=True, save_dir=save_path) # Create results matrix and print to stdout conf_mat = matrix_string(rpca_confusion_matrix) print("*** {} ***".format(classifier_name)) print("\trpca Score: %.6f \n %s" % (rpca_score, conf_mat))
def main(head, tail, classification_path): ### ********** LOAD DATA ********** ### data = load_raw_data(tail, head) ### ********** PREPROCESS DATA ********** ### data[:, :3] = PcaRotation(data[:, :3]) data[:, :3] = ScaleData(data[:, :3]) # Open specified file test_prediction = np.loadtxt(classification_path) filt_data = np.delete(data[:, :3], np.argwhere(test_prediction == 1)[:, 0], axis=0) reconstr = surface_reconstruction(filt_data, resolution=[500, 200]) ScatterPlot3D(reconstr, labels=np.zeros(len(reconstr)), title="RPCA: Surface Estimation") if sys.flags.interactive != 1: vispy.app.run()
def main(data_path_head, data_path_tail, args): ### ********** LOAD DATA ********** ### print("-- Loading Data --\n") data = load_raw_data(data_path_tail, data_path_head) print("Noise Ratio in data %f" % (np.count_nonzero(data[-1]) / data.shape[0])) ### ********** PREPROCESS DATA ********** ### print("\n-- Preprocessing Data --\n") # preprocess true coordinates data[:, :3] = PcaRotation(data[:, :3]) data[:, :3] = ScaleData(data[:, :3]) # preproess features data[:, 3:-2] = ScaleFeatures(data[:, 3:-2]) ### ********** CREATE FEATURES ********** ### # dist = 1 # dtp_feature = dist_to_plane(data[:,:3],dist) # k=[3,6,9,12,15] # kmean_dist_feature = knn_mean_dist(data[:,:3],[3,6,9,12,15]) # kmean_z_dist_feature = knn_mean_z_dist(data[:,:3],[3,6,9,12,15]) # kmaxd_feature= knn_max_dist(data[:,:3],k) # radius = [0.1 0.2 0.4 0.8 1] # #n_in_sphere_feature = samples_within_sphere(data[:,:3],radius) # #cent_z_sum_sphere_feature = centered_z_summation_within_sphere(data[:,0:3],radius) # # Write code to incorporate features in the dataset # data = data ### !!! ### ********** TRAIN TEST SPLIT ********** ### chosen_features = np.array([1, 2, 3]) features_incl_lbls = np.append(chosen_features, -1) train_data, train_lbls, test_data, test_lbls = split_data( data[:, features_incl_lbls], test_size=0.25) ### ********** Nearest Neighbors Classifier ********** ### if args.knn is True: print("-- Performing kNN --") k_neighbors = 1 knn = KNearestNeighbors(k=k_neighbors) knn, knn_prediction, knn_score, knn_err, knn_confusion_matrix = classify( knn, train_data, train_lbls, test_data, test_lbls, stats=args.stats) conf_mat = matrix_string(knn_confusion_matrix) print("\tkNN k=%d Score: %.6f \n %s" % (k_neighbors, knn_score, conf_mat)) ### ********** Naive Bayes Classifier********** ### if args.nb is True: print("-- Performing Naive Bayes --") nb = NaiveBayes() nb, nb_prediction, nb_score, nb_err, nb_confusion_matrix = classify( nb, train_data, train_lbls, test_data, test_lbls, save=True, stats=args.stats) conf_mat = matrix_string(nb_confusion_matrix) print("\tnb Score: %.6f \n %s" % (nb_score, conf_mat)) ### ********** Linear/Non-linear SVM Classifier********** ### if args.svm is True: print("-- Performing Linear SVM --") lin_svm = SupportVectorMachine(max_iter=200) lin_svm, lin_svm_pred, lin_svm_score, lin_svm_err, lin_confusion_matrix = classify( lin_svm, train_data, train_lbls, test_data, test_lbls, stats=args.stats) conf_mat = matrix_string(lin_confusion_matrix) print("\tLinear SVM Score: %.6f \n %s" % (lin_svm_score, conf_mat)) print("-- Performing Kernel SVM --") kern_svm = SupportVectorMachine(kernel='rbf', max_iter=1000, gamma='auto', C=0.1, tol=1e-3) kern_svm, kern_svm_pred, kern_svm_score, kern_svm_err, kern_confusion_matrix = classify( kern_svm, train_data, train_lbls, test_data, test_lbls, stats=args.stats) conf_mat = matrix_string(kern_confusion_matrix) print("\tKernel SVM Score: %.6f \n %s" % (kern_svm_score, conf_mat)) ### ********** Robust PCA ********** ### if args.rpca is True: print("-- Performing Robust PCA --") rpca = RobustPCAGrid([101, 101], max_iter=2000, overlap=0, window_type='rectangle', predict_method='voting') rpca, rpca_prediction, rpca_score, rpca_err, rpca_confusion_matrix = classify( rpca, train_data=data[:, :3], train_lbls=data[:, -1], test_data=data[:, :3], test_lbls=data[:, -1], stats=args.stats, save=False) conf_mat = '\t[' + ']\n\t['.join('\t'.join('%0.3f' % x for x in y) for y in rpca_confusion_matrix) + ']' print("\trpca Score: %.6f \n %s" % (rpca_score, conf_mat)) if args.figsshow: ScatterPlot3D(data, labels=rpca_prediction, x_feat=0, y_feat=1, z_feat=2, title="RPCA: Predictions") filt_data = np.delete(data, np.argwhere(rpca_prediction == 1)[:, 0], axis=0) ScatterPlot3D(filt_data, labels=np.zeros(len(filt_data)), title="RPCA: Noise Filtered") #reconstr = surface_reconstruction(filt_data, resolution=[500,500]) #ScatterPlot3D(reconstr,labels=np.zeros(len(reconstr)), title="RPCA: Surface Estimation") ### ********** PLOTTING DATA ********** #### """ScatterPlot3D(data, labels=training_labels, x_feat=0, y_feat=1, z_feat=2, label_feat=-1, title="Scatterplot")"""
def main(data_path_head, data_path_tail, args): ### ********** LOAD DATA ********** ### print("-- Loading Data --\n") data = load_raw_data(data_path_tail, data_path_head) ### ********** PREPROCESS DATA ********** ### print("\n-- Preprocessing Data --\n") data[:, :3] = PcaRotation(data[:, :3]) data[:, :3] = ScaleData(data[:, :3]) rpca_testing_windows = [[25, 25], [75, 75], [125, 125], [175, 175], [225, 225]] rpca_testing_rect_overlaps = [0, 0.25, 0.5, 0.75] rpca_testing_ellip_overlaps = [0.5, 0.6, 0.7, 0.8] rpca_testing_sigmas = [1.5, 2, 2.5] ### ********** Perfrom Grid search of defined parameters with Robust PCA ********** ### for sigma in rpca_testing_sigmas: save_path = args.save_path + "_std{}".format(sigma).replace(".", "") for window in rpca_testing_windows: for overlap in rpca_testing_rect_overlaps: save_path_rectangle = save_path + "/RPCA_Rect_voting" string_overlap = "{:.2f}".format(overlap).replace(".", "") classifier_name = "RPCA_Rect_{}x{}_ol{}".format( window[0], window[1], string_overlap) rpca = RobustPCAGrid(window, max_iter=2000, overlap=overlap, window_type='rectangle', predict_method='voting', name=classifier_name) rpca, rpca_prediction, rpca_score, rpca_err, rpca_confusion_matrix = classify( rpca, train_data=data[:, :3], train_lbls=data[:, -1], test_data=data[:, :3], test_lbls=data[:, -1], stats=args.stats, save=True, save_dir=save_path_rectangle) conf_mat = matrix_string(rpca_confusion_matrix) print("*** {} ***".format(classifier_name)) print("\trpca Score: %.6f \n %s" % (rpca_score, conf_mat)) for overlap in rpca_testing_ellip_overlaps: save_path_ellipse = save_path + "/RPCA_Ellipse_voting" string_overlap = "{:.2f}".format(overlap).replace(".", "") classifier_name = "RPCA_Ellipse_{}x{}_ol{}".format( window[0], window[1], string_overlap) rpca = RobustPCAGrid(window, max_iter=2000, overlap=overlap, window_type='ellipse', predict_method='voting', name=classifier_name) rpca, rpca_prediction, rpca_score, rpca_err, rpca_confusion_matrix = classify( rpca, train_data=data[:, :3], train_lbls=data[:, -1], test_data=data[:, :3], test_lbls=data[:, -1], stats=args.stats, save=True, save_dir=save_path_ellipse) conf_mat = matrix_string(rpca_confusion_matrix) print("*** {} ***".format(classifier_name)) print("\trpca Score: %.6f \n %s" % (rpca_score, conf_mat))
def main(data_path_head,data_path_tail,args): ### ********** LOAD DATA ********** ### print("-- Loading Data --\n") data = load_raw_data(data_path_tail, data_path_head) ### ********** PREPROCESS DATA ********** ### print("\n-- Preprocessing Data --\n") # preprocess true coordinates data[:,:3] = PcaRotation(data[:,:3]) data[:,:3] = ScaleData(data[:,:3]) # preproess features data[:,3:-1] = ScaleFeatures(data[:,3:-1]) ### ********** TRAIN TEST SPLIT ********** ### eiva_feature_names = ["x", "y", "z", "Dist_to_neighbour", "Dist_to_avg_surf_r80cm" , "Neighbours_in_sphere_r80cm" , "Z_sum_in_circ_r80cm" , "kNN_mean_Z_dist_n8" , "kNN_mean_dist_k8"] for feature,feature_name in enumerate(eiva_feature_names[3:]): feature=feature+3 # offset dont count x,y,z feature_incl_labels = np.append(feature,-1) train_data, train_lbls, test_data, test_lbls = split_data(data[:,feature_incl_labels],test_size=0.25) ### ********** Nearest Neighbors Classifier ********** ### if args.knn is True: print("-- Performing kNN --") for k_neighbors in 2**np.arange(9): save_path = args.save_path + "/kNN" classifier_name = "kNN_k{}_".format(k_neighbors)+"Feature{}={}".format(feature,feature_name) knn = KNearestNeighbors(k=k_neighbors,name=classifier_name) knn, knn_prediction, knn_score, knn_err, knn_confusion_matrix = classify(knn, train_data, train_lbls, test_data, test_lbls, stats=args.stats, save=True, save_dir=save_path) conf_mat = matrix_string(knn_confusion_matrix) print("\tkNN k=%d Score: %.6f \n %s" %(k_neighbors,knn_score,conf_mat)) ### ********** Naive Bayes Classifier********** ### if args.nb is True: print("-- Performing Naive Bayes --") save_path = args.save_path + "/NB" classifier_name ="NB_"+"Feature{}={}_".format(feature,feature_name) nb = NaiveBayes(name=classifier_name) nb, nb_prediction, nb_score, nb_err, nb_confusion_matrix = classify(nb, train_data, train_lbls, test_data, test_lbls, stats=args.stats, save=True, save_dir=save_path) conf_mat = matrix_string(nb_confusion_matrix) print("\tnb Score: %.6f \n %s" %(nb_score,conf_mat)) ### ********** Linear/Non-linear SVM Classifier********** ### if args.svm is True: for C in (2.**np.arange(-5,16,10)).tolist(): print(C) for tol in (10.**np.arange( -5, 0, 2 )).tolist(): print("-- Performing Linear SVM --") save_path = args.save_path + "/linSVM" classifier_name = "LinSVM_C{}_tol{}_".format(C,tol)+"Feature{}={}_".format(feature,feature_name) lin_svm = SupportVectorMachine(max_iter=200,C=C,tol=tol,name=classifier_name) lin_svm, lin_svm_pred, lin_svm_score, lin_svm_err, lin_confusion_matrix = classify(lin_svm, train_data, train_lbls, test_data, test_lbls, stats=args.stats, save=True, save_dir=save_path) conf_mat = matrix_string(lin_confusion_matrix) print("\tLinear SVM Score: %.6f \n %s" %(lin_svm_score,conf_mat)) for gamma in (2.**np.arange(-10,0,3)).tolist(): print("-- Performing Kernel SVM --") save_path = args.save_path + "/RbfSVm" classifier_name = "RbfSVM_C{}_tol{}_gamme{}".format(C,tol,gamma)+"Feature{}={}_".format(feature,feature_name) kern_svm = SupportVectorMachine(kernel='rbf', max_iter=200,gamma=gamma, C=C, tol=tol,name=classifier_name) kern_svm, kern_svm_pred, kern_svm_score, kern_svm_err, kern_confusion_matrix = classify(kern_svm, train_data, train_lbls, test_data, test_lbls, stats=args.stats, save=True, save_dir=save_path) conf_mat = matrix_string(kern_confusion_matrix) print("\tKernel SVM Score: %.6f \n %s" %(kern_svm_score,conf_mat))
# Parse Arguments args = parser.parse_args() # Path is a data file if os.path.exists(args.path): print("Test") # Get file name and path from argument head, tail = os.path.split(args.path) # Read data from file data = load_raw_data(tail, head)[:, :] ### ********** PREPROCESS DATA ********** ### print("\n-- Preprocessing Data --\n") # preprocess true coordinates data[:, :3] = PcaRotation(data[:, :3]) data[:, :3] = ScaleData(data[:, :3]) # Grid RPCA rpca = RobustPCAGrid([101, 101], max_iter=1000, overlap=0.5, window_type='rectangle', predict_method='voting') #50,50 rpca.fit(data[:, :3]) labels = rpca.predict(None) ScatterPlot3D(data[:, :3], labels=labels, title="Predicted labels") ScatterPlot3D(rpca.S, labels=data[:, -1], title="S With True labels") print(len(rpca.S_list)) #ScatterPlot3D(rpca.S_list[20],labels=data[rpca.sample_list[20],-1], title="S1 With True labels")