def data_plot(data_file, class_column=0, delimiter=' '): x_matrix, attr_num = file_reading(data_file, delimiter, True) x_matrix, y_vector = x_y_spliting(x_matrix, class_column) y_min = min(y_vector) y_max = max(y_vector) x_row, x_col = x_matrix.shape attr_len = x_col / attr_num x_matrix = x_matrix.reshape(x_row, attr_num, attr_len) for label in range(y_min, y_max): out_pdf = "asl_class_" + str(label) + ".pdf" fig = plt.figure() label_index = np.where(y_vector == label)[0] label_row = x_matrix[label_index[0], :, :] for attr in range(0, attr_num): plot_series = label_row[attr, :] plot_len = len(plot_series) stop_i = plot_len for i in range(0, plot_len): re_i = plot_len - i - 1 if plot_series[re_i] == 0: stop_i = stop_i - 1 else: break plt.plot(plot_series[0:stop_i]) fig.savefig(out_pdf, dpi=fig.dpi)
def data_checking(data_file, class_column=0, delimiter=' '): ret_str = "" x_matrix, attr_num = file_reading(data_file, delimiter, True) x_matrix, y_vector = x_y_spliting(x_matrix, class_column) ret_str = 'x_matrix shape: ' + str(x_matrix.shape) y_min = min(y_vector) y_max = max(y_vector) ret_str = ret_str + "\nclass labels from " + str(y_min) + " to " + str( y_max) #for i in range(y_min, y_max+1): # ret_str = ret_str + '\nclass '+ str(i) + ': '+str(y_vector.count(i)) unique, counts = np.unique(y_vector, return_counts=True) ret_str = ret_str + '\n' + str(dict(zip(unique, counts))) return ret_str
def norm_checking(data_file): data_matrix, attr_num = file_reading(data_file) data_x_matrix, data_y_vector = x_y_spliting(data_matrix, 0) data_row, data_col = data_x_matrix.shape attr_len = data_col / attr_num data_x_matrix = data_x_matrix.reshape(data_row, attr_num, attr_len) for row in range(0, data_row): for attr in range(0, attr_num): series = data_x_matrix[row, attr, :] mean = np.mean(series) std = np.std(series) if mean > 0.0001 or mean < -0.0001: return False if std > 1.00001 or std < 0.99999: return False return True
def run_cnn_projected_feature_analysis(feature_folder, class_id, data_folder, data_file_keyword, method="rf_lda", log_folder='./'): data_file_list = list_files(data_folder) feature_file_list = list_files(feature_folder) out_obj_folder = feature_folder[:-1] + "_" + method out_obj_folder = init_folder(out_obj_folder) class_column = 0 for train_file in data_file_list: if data_file_keyword not in train_file: continue data_key = train_file.replace('.txt', '') data_matrix, attr_num = file_reading(data_folder + train_file) train_x_matrix, train_y_vector = x_y_spliting(data_matrix, class_column) #train_y_vector = np.array([0, 0, 1, 1, 1, 1, 2, 2, 2, 3]) if class_id < 0: min_class = min(train_y_vector) max_class = max(train_y_vector) + 1 else: min_class = class_id max_class = min_class + 1 log_file = data_key + "_" + method + "_min" + str( min_class) + "_max" + str(max_class) + ".log" logger = setup_logger(log_folder + log_file) logger.info('data file: ' + train_file) out_obj_file = data_key + "_" + method + "_min" + str( min_class) + "_max" + str(max_class) + ".obj" out_obj_matrix = [] for label in range(min_class, max_class): logger.info("class: " + str(label)) feature_key = "_class" + str(label) + "_" for feature_file in feature_file_list: if data_key not in feature_file or feature_key not in feature_file: continue logger.info("feature file: " + feature_file) feature_obj = load_obj(feature_folder + feature_file) train_feature = obj_processing(feature_obj[0]) logger.info("train feature shape: " + str(train_feature.shape)) class_train_y = np.where(train_y_vector == label, 1, 0) logger.info("feature method: " + str(method)) if method == "rf_lda_sum": class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_rf_lda_analysis( train_feature, class_train_y, logger) elif method == "rf": class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_rf_analysis( train_feature, class_train_y, logger) elif method == "lda": class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_lda_analysis( train_feature, class_train_y, logger) elif method == "cpca": class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_cpca_analysis( train_feature, class_train_y, logger) if method == "cpca": class_attr_list = class_attr_imp_matrix else: logger.info("class attr imp matrix shape: " + str(class_attr_imp_matrix.shape)) class_attr_list = map_attr_imp_analysis( class_attr_imp_matrix, logger) logger.info(class_attr_list) out_obj_matrix.append(class_attr_list) out_obj_matrix = np.array(out_obj_matrix) logger.info("out obj to: " + out_obj_folder + out_obj_file) logger.info(out_obj_matrix.shape) save_obj([out_obj_matrix], out_obj_folder + out_obj_file)
def run_pure_pv_evaluation( file_keyword, parameter_file='../../parameters/pv_baseline_evaluation.txt', function_keyword="pure_pv_evaluation"): data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, method, log_folder, out_obj_folder = read_pure_feature_generation( parameter_file, function_keyword) print data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, method, log_folder, out_obj_folder file_list = list_files(data_folder) file_count = 0 for train_file in file_list: if file_keyword not in train_file: continue train_key = train_file.replace('.txt', '') file_count = file_count + 1 data_matrix, attr_num = file_reading(data_folder + train_file) train_x_matrix, train_y_vector = x_y_spliting(data_matrix, class_column) train_row, train_col = train_x_matrix.shape train_x_matrix = train_x_matrix.reshape(train_row, attr_num, attr_len) if class_id < 0: min_class = min(train_y_vector) max_class = max(train_y_vector) + 1 else: min_class = class_id max_class = min_class + 1 log_file = train_key + "_" + method + "_min" + str( min_class) + "_max" + str(max_class) + "_pure_projected.log" #logger = setup_logger('') logger = setup_logger(log_folder + log_file) print "log file: " + log_folder + log_file logger.info(train_file) out_obj_file = train_key + "_" + method + "_min" + str( min_class) + "_max" + str(max_class) + "_pure_projected.obj" out_obj_matrix = [] logger.info("min class: " + str(min_class)) logger.info("max class: " + str(max_class)) for label in range(min_class, max_class): class_train_y = np.where(train_y_vector == label, 1, 0) logger.info("label: " + str(label)) if method == 'rf_lda': class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_rf_lda_analysis( train_x_matrix, class_train_y, logger) elif method == "rf": class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_rf_analysis( train_x_matrix, class_train_y, logger) elif method == "lda": class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_lda_analysis( train_x_matrix, class_train_y, logger) logger.info("class attr imp matrix shape: " + str(class_attr_imp_matrix.shape)) class_attr_list = map_attr_imp_analysis(class_attr_imp_matrix, logger) logger.info(class_attr_list) logger.info(class_attr_list.shape) out_obj_matrix.append(class_attr_list) out_obj_matrix = np.array(out_obj_matrix) logger.info("out obj to: " + out_obj_folder + out_obj_file) logger.info(out_obj_matrix.shape) save_obj([out_obj_matrix], out_obj_folder + out_obj_file)