def cnn_load_main(parameter_file, file_keyword, function_keyword="cnn_classification"): data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file = read_all_feature_classification(parameter_file, function_keyword) print(data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file) log_folder = init_folder(log_folder) out_obj_folder = init_folder(out_obj_folder) out_model_folder = init_folder(out_model_folder) data_stru = return_data_stru(num_classes, start_class, attr_num, attr_len, class_column) file_list = list_files(data_folder) file_count = 0 class_column = 0 header = True cnn_setting = return_cnn_setting_from_file(cnn_setting_file) cnn_setting.out_obj_folder = out_obj_folder cnn_setting.out_model_folder = out_model_folder cnn_setting.full_feature_num = 400 init_folder(out_obj_folder) init_folder(out_model_folder) print (out_model_folder) model_file_list = list_files(out_model_folder) result_obj_folder = obj_folder + method +"_result_folder" result_obj_folder = init_folder(result_obj_folder) logger = setup_logger('') delimiter = ' ' loop_count = -1 saver_file_profix = "" for train_file in file_list: if file_keyword not in train_file: continue loop_count = loop_count + 1 file_key = train_file.replace('.txt', '') saver_file_profix = file_key test_file = train_file.replace('train', 'test') #train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, attr_num = train_test_file_reading(data_folder + train_file, data_folder + test_file, '', class_column, delimiter, header) data_group, attr_num = train_test_file_reading(data_folder + train_file, data_folder + test_file, '', class_column, delimiter, header) train_x_matrix = data_group.train_x_matrix train_y_vector = data_group.train_y_vector test_x_matrix = data_group.test_x_matrix test_y_vector = data_group.test_y_vector train_x_matrix = train_test_transpose(train_x_matrix, attr_num, attr_len, False) test_x_matrix = train_test_transpose(test_x_matrix, attr_num, attr_len, False) train_y_matrix = y_vector_to_matrix(train_y_vector, num_classes) test_y_matrix = y_vector_to_matrix(test_y_vector, num_classes) found_model_file = "" for model_file in model_file_list: if model_file.startswith(file_key): model_file = model_file.split('.')[0] found_model_file = out_model_folder + model_file + ".ckpt" break if found_model_file == "": raise Exception("No model object file found!!!") print(found_model_file) cnn_session, logits_out, train_x_placeholder, keep_prob_placeholder, keeped_feature_list = load_model(found_model_file, data_stru, cnn_setting, logger) last_conv_tensor = keeped_feature_list[0] train_last_conv = cnn_session.run(last_conv_tensor, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0}) test_last_conv = cnn_session.run(last_conv_tensor, feed_dict={train_x_placeholder: test_x_matrix, keep_prob_placeholder: 1.0}) drop_num = 10 print(np.squeeze(test_last_conv[1, :, :, :])) test_last_conv = top_attr_x_matrix(test_last_conv, drop_num) print(np.squeeze(test_last_conv[1, :, :, :])) train_last_conv = top_attr_x_matrix(train_last_conv, drop_num) output_y_placeholder = tf.placeholder(tf.float32, [None, num_classes]) actual = tf.argmax(output_y_placeholder, axis=1) prediction = tf.argmax(logits_out, axis=1) correct_prediction = tf.equal(actual, prediction) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) ori_pred_y_vector = cnn_session.run(prediction, feed_dict={train_x_placeholder: test_x_matrix, keep_prob_placeholder: 1.0}) test_accuracy = cnn_session.run(accuracy, feed_dict={train_x_placeholder: test_x_matrix, keep_prob_placeholder: 1.0, output_y_placeholder: test_y_matrix}) cnn_session.close() kernel_eval_matrix, ref_kernel_eval_matrix = last_conv_analysis(train_last_conv, train_y_vector) print(kernel_eval_matrix.shape) print(kernel_eval_matrix) train_ins_len = len(train_y_vector) test_ins_len = len(test_y_vector) batch_size = 100 layer_list = np.array([400]) max_epoch = 10 stop_threshold = 0.99 activation_fun = 3 std_value = 0.02 eval_method = "acc" saver_file = './test_1.save' nn_setting = nn_parameters(layer_list, batch_size, max_epoch, stop_threshold, activation_fun, std_value, eval_method, saver_file) all_pred_prob = [] for c in range(num_classes): train_y_vector_class = np.zeros((train_ins_len)) index_class = np.where(train_y_vector==c)[0] train_y_vector_class[index_class] = 1 train_y_m_class = y_vector_to_matrix(train_y_vector_class, 2) test_y_vector_class = np.zeros((test_ins_len)) index_class = np.where(test_y_vector==c)[0] test_y_vector_class[index_class] = 1 test_y_m_class = y_vector_to_matrix(test_y_vector_class, 2) keep_num = 5 kernel_index = kernel_eval_matrix[c, 0:keep_num] ref_kernel_index = ref_kernel_eval_matrix[c, 0:keep_num] print("kernel index " + str(kernel_index)) print("ref kernel index " + str(ref_kernel_index)) kernel_index = np.concatenate((kernel_index, ref_kernel_index), axis=0) print("union index " + str(kernel_index)) kernel_index = np.unique(kernel_index) print("unique index " + str(kernel_index)) kernel_index = ref_kernel_eval_matrix[c, 0:keep_num] train_x_class = train_last_conv[:, :, :, kernel_index] test_x_class = test_last_conv[:, :, :, kernel_index] print(train_x_class.shape) reshape_col = 45 * len(kernel_index) train_x_class = train_x_class.reshape((train_ins_len, reshape_col)) test_x_class = test_x_class.reshape((test_ins_len, reshape_col)) c_eval_value, c_train_time, c_test_time, c_predict_proba = run_nn(train_x_class, train_y_m_class, test_x_class, test_y_m_class, nn_setting) all_pred_prob.append(c_predict_proba[:, 1]-c_predict_proba[:, 0]) all_pred_prob = np.array(all_pred_prob) print(all_pred_prob.shape) pred_vector = np.argmax(all_pred_prob, axis=0) print(pred_vector) print(all_pred_prob[:, 0]) print(all_pred_prob[:, 1]) print(all_pred_prob[:, 2]) final_accuracy = accuracy_score(pred_vector, test_y_vector) avg_acc, ret_str = averaged_class_based_accuracy(ori_pred_y_vector, test_y_vector) print("original avg acc" + str(avg_acc)) print("original accuracy: " + str(test_accuracy)) print(ret_str) avg_acc, ret_str = averaged_class_based_accuracy(pred_vector, test_y_vector) print("avg acc" + str(avg_acc)) print("new accuracy: " + str(final_accuracy)) print(ret_str) load_result_analysis(all_pred_prob, test_y_vector) sdfds output_y_placeholder = tf.placeholder(tf.float32, [None, num_classes]) actual = tf.argmax(output_y_placeholder, axis=1) prediction = tf.argmax(logits_out, axis=1) correct_prediction = tf.equal(actual, prediction) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) test_eval_value = accuracy.eval(feed_dict={train_x_placeholder: test_x_matrix, output_y_placeholder: test_y_matrix, keep_prob_placeholder: 1.0}) print("fisrt") print(test_eval_value) conv_count = 1 drop_ratio = 0.1 #conv_variable_up_main(cnn_session, conv_count, drop_ratio) weight_name = "conv_w_" + str(0) + ":0" bias_name = "conv_b_" + str(0) + ":0" ori_weight_variable = tf.get_default_graph().get_tensor_by_name(weight_name) ori_bias_variable = tf.get_default_graph().get_tensor_by_name(bias_name) weight_variable = tf.get_default_graph().get_tensor_by_name(weight_name) bias_variable = tf.get_default_graph().get_tensor_by_name(bias_name) ori_weight_variable = cnn_session.run(weight_variable) ori_bias_variable = cnn_session.run(bias_variable) train_drop_acc = [] test_drop_acc = [] for drop_i in range(50): drop_weight_variable = np.copy(ori_weight_variable) drop_bias_variable = np.copy(ori_bias_variable) drop_index = [] drop_index.append(drop_i) up_fir_weight, up_fir_bias = conv_variable_up(drop_weight_variable, drop_bias_variable, drop_index) weight_assign = tf.assign(weight_variable, up_fir_weight) bias_assign = tf.assign(bias_variable, up_fir_bias) cnn_session.run(weight_assign) cnn_session.run(bias_assign) up_bias_variable = tf.get_default_graph().get_tensor_by_name(bias_name) up_bias_variable_val = cnn_session.run(bias_variable) train_eval_value = accuracy.eval(feed_dict={train_x_placeholder: train_x_matrix, output_y_placeholder: train_y_matrix, keep_prob_placeholder: 1.0}) train_drop_acc.append(train_eval_value) test_eval_value = accuracy.eval(feed_dict={train_x_placeholder: test_x_matrix, output_y_placeholder: test_y_matrix, keep_prob_placeholder: 1.0}) test_drop_acc.append(test_eval_value) print ("Drop " + str(drop_i)) print(train_eval_value) print(test_eval_value) print(train_drop_acc) print(train_drop_acc.argsort()) print(test_drop_acc) print(test_drop_acc.argsort()) sdfs print("HERE") fir_weight_variable_val = np.squeeze(fir_weight_variable_val) kernel_dist_val = cnn_session.run(kernel_dist) keep_index_val = cnn_session.run(keep_index) print(fir_weight_variable_val.shape) print(np.amax(fir_weight_variable_val, axis=1)) print(np.amin(fir_weight_variable_val, axis=1)) print(np.mean(fir_weight_variable_val, axis=1)) mean_row = np.mean(fir_weight_variable_val, axis=-1) print(mean_row.shape) dist_list = [] for r in range(40): row = fir_weight_variable_val[:, r] dist_list.append(np.linalg.norm(row-mean_row)) print (dist_list) print(kernel_dist_val) print(keep_index_val) print(sorted(dist_list)) print("!!!") #conv_variable_up(fir_weight_variable_val, fir_bias_variable_val) sdfsd train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, attr_num = train_test_file_reading(data_folder + train_file, data_folder + test_file, class_column, delimiter, header) train_x_matrix = train_test_transpose(train_x_matrix, attr_num, attr_len, False) test_x_matrix = train_test_transpose(test_x_matrix, attr_num, attr_len, False) train_x_matrix = test_x_matrix[0:1, :, :, :] #plot_2dmatrix(np.squeeze(train_x_matrix)[:, 0:5]) fir_out_tensor = tf.nn.conv2d(train_x_placeholder, fir_weight_variable, strides=[1, 1, 1, 1], padding='VALID') + fir_bias_variable fir_out_tensor = tf.nn.relu(fir_out_tensor) print(fir_out_tensor.get_shape()) fir_analysis_tensor = tf.reduce_max(fir_out_tensor, [1]) print(fir_analysis_tensor.get_shape()) fir_analysis_tensor = tf.reduce_max(fir_analysis_tensor, [1]) fir_analysis_tensor = tf.reduce_mean(fir_analysis_tensor, [0]) top_k_indices = tf.nn.top_k(fir_analysis_tensor, 10).indices top_k_values = tf.nn.top_k(fir_analysis_tensor, 10).values top_fir_out_tensor = tf.gather(fir_out_tensor, top_k_indices, axis=3) sec_weight_variable = tf.get_default_graph().get_tensor_by_name("conv_w_1:0") sec_bias_variable = tf.get_default_graph().get_tensor_by_name("conv_b_1:0") sec_out_tensor = tf.nn.conv2d(fir_out_tensor, sec_weight_variable, strides=[1, 1, 1, 1], padding='VALID') + sec_bias_variable sec_out_tensor = tf.nn.relu(sec_out_tensor) sec_weight_var_val = cnn_session.run(sec_weight_variable) #print(np.squeeze(sec_weight_var_val)) #sdfds #plot_2dmatrix(fir_weight_var_val[:, 4]) #sdf #print(fir_weight_var_val.T) fir_out_tensor_val = cnn_session.run(fir_out_tensor, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0}) print(fir_out_tensor_val.shape) top_fir_out_tensor = cnn_session.run(top_fir_out_tensor, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0}) print(top_fir_out_tensor.shape) fir_analysis_tensor_val = cnn_session.run(fir_analysis_tensor, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0}) print(fir_analysis_tensor.shape) top_k_indices_val = cnn_session.run(top_k_indices, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0}) top_k_values_val = cnn_session.run(top_k_values, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0}) fir_weight_variable_val = cnn_session.run(fir_weight_variable) fir_weight_variable_val = np.squeeze(fir_weight_variable_val) print(fir_weight_variable_val.shape) print(fir_analysis_tensor_val) fir_sort_in = np.argsort(fir_analysis_tensor_val) print(fir_sort_in) print(top_k_indices_val) print(top_k_values_val) plot_2dmatrix(fir_weight_variable_val[:, fir_sort_in[-10:]]) sdfd for n in range(len(fir_out_tensor_val)): for k in range(50): ret_str = "k" + str(k) + ": " kernel_max = -1 max_attr = -1 max_attr_list = [] for a in range(attr_num): attr_max = max(fir_out_tensor_val[n, :, a, k]) max_attr_list.append(attr_max) if attr_max > kernel_max: kernel_max = attr_max max_attr = a if attr_max == 0: ret_str = ret_str + str(a) + " " print(ret_str) print("max attr " + str(max_attr)) print(sorted(range(len(max_attr_list)), key=lambda k: max_attr_list[k])) print("======") print("label " + str(train_y_vector[0])) fir_out_tensor_val = cnn_session.run(sec_out_tensor, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0}) print(fir_out_tensor_val.shape) sdf for n in range(len(fir_out_tensor_val)): for k in range(40): ret_str = "k" + str(k) + ": " kernel_max = -1 max_attr = -1 max_attr_list = [] for a in range(attr_num): attr_max = max(fir_out_tensor_val[n, :, a, k]) max_attr_list.append(attr_max) if attr_max > kernel_max: kernel_max = attr_max max_attr = a if attr_max == 0: ret_str = ret_str + str(a) + " " print(ret_str) print("max attr " + str(max_attr)) print(sorted(range(len(max_attr_list)), key=lambda k: max_attr_list[k])) print("======") sdf fir_out_mean_val = cnn_session.run(fir_out_mean, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0}) #fir_out_mean_val = np.squeeze(fir_out_mean_val) print(fir_out_mean_val.shape) plot_2dmatrix(np.squeeze(fir_out_mean_val[:, :, 0:5])) sdfd plot_2dmatrix(fir_weight_var_val) min_class = min(train_y_vector) max_class = max(train_y_vector) num_classes = max_class - min_class + 1 if cnn_setting.eval_method == "accuracy": cnn_eval_key = "acc" elif num_classes > 2: cnn_eval_key = "acc_batch" else: cnn_eval_key = "f1" log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str(min_class)+"_" + str(max_class) + "_act" + str(cnn_setting.activation_fun) + "_" + cnn_eval_key + '.log' print("log file: " + log_file) logger = setup_logger(log_file, 'logger_' + str(loop_count)) logger.info('\nlog file: ' + log_file) logger.info(train_file) logger.info('cnn setting:\n ' + cnn_setting.to_string()) logger.info('method: ' + method) logger.info('============') train_x_matrix = train_test_transpose(train_x_matrix, attr_num, attr_len, False) test_x_matrix = train_test_transpose(test_x_matrix, attr_num, attr_len, False) if file_count == 0: logger.info('train matrix shape: ' + str(train_x_matrix.shape)) logger.info('train label shape: ' + str(train_y_vector.shape)) logger.info('test matrix shape: ' + str(test_x_matrix.shape)) logger.info('test label shape: ' + str(test_y_vector.shape)) logger.info(train_x_matrix[0, 0:3, 0:2, 0]) logger.info(test_x_matrix[0, 0:3, 0:2, 0]) train_y_matrix = y_vector_to_matrix(train_y_vector, num_classes) test_y_matrix = y_vector_to_matrix(test_y_vector, num_classes) cnn_eval_value, train_run_time, test_run_time, cnn_predict_proba, saver_file, feature_list_obj_file = run_cnn(train_x_matrix, train_y_matrix, test_x_matrix, test_y_matrix, data_stru, cnn_setting, saver_file_profix, logger) logger.info("Fold eval value: " + str(cnn_eval_value)) logger.info(method + ' fold training time (sec):' + str(train_run_time)) logger.info(method + ' fold testing time (sec):' + str(test_run_time)) logger.info("save obj to " + saver_file)
def backward_multitime_main(parameter_file="../../parameters/", file_keyword="train_", n_selected_features=15): function_keyword = "backward_wrapper" data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file = read_all_feature_classification( parameter_file, function_keyword) print data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file log_folder = init_folder(log_folder) out_obj_folder = init_folder(out_obj_folder) out_model_folder = init_folder(out_model_folder) data_stru = return_data_stru(num_classes, start_class, attr_num, attr_len, class_column) file_list = list_files(data_folder) file_count = 0 class_column = 0 header = True delimiter = ' ' loop_count = -1 for train_file in file_list: if file_keyword not in train_file: continue loop_count = loop_count + 1 file_key = train_file.replace('.txt', '') log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str( class_id) + '_' + method + '.log' print "log file: " + log_file logger = setup_logger(log_file, 'logger_' + str(loop_count)) logger.info('\nlog file: ' + log_file) logger.info(train_file) logger.info('method: ' + method) logger.info('============') test_file = train_file.replace('train', 'test') train_x_matrix, train_y_vector, test_x_matrix, test_y_vector = train_test_file_reading( data_folder + train_file, data_folder + test_file, class_column, delimiter, header) n_samples, n_col = train_x_matrix.shape train_x_matrix = train_x_matrix.reshape(n_samples, attr_num, attr_len) n_samples, n_col = test_x_matrix.shape test_x_matrix = test_x_matrix.reshape(n_samples, attr_num, attr_len) if file_count == 0: logger.info('train matrix shape: ' + str(train_x_matrix.shape)) logger.info('train label shape: ' + str(train_y_vector.shape)) logger.info('test matrix shape: ' + str(test_x_matrix.shape)) logger.info('test label shape: ' + str(test_y_vector.shape)) if class_id == -1: min_class = min(train_y_vector) max_class = max(train_y_vector) + 1 else: min_class = class_id max_class = class_id + 1 for c in range(min_class, max_class): logger.info("Class: " + str(c)) temp_train_y_vector = np.where(train_y_vector == c, 1, 0) temp_test_y_vector = np.where(test_y_vector == c, 1, 0) top_features = backward_multitime( train_x_matrix, temp_train_y_vector, test_x_matrix, temp_test_y_vector, n_selected_features, data_keyword, method, cnn_setting_file, logger) logger.info("Top Features For Class " + str(c) + ": " + str(top_features)) logger.info("End Of Class: " + str(c))
def forward_multitime_main(parameter_file="../../parameters/", file_keyword="train_"): function_keyword = "forward_wrapper" #data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, top_k, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file = read_feature_classification(parameter_file, function_keyword) data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, top_k, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file = read_feature_classification( parameter_file, function_keyword) print data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, top_k, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file if data_keyword == "dsa" or data_keyword == "toy": n_selected_features = 15 num_classes = 19 elif data_keyword == "rar": n_selected_features = 30 num_classes = 33 elif data_keyword == "arc" or data_keyword == "fixed_arc": n_selected_features = 30 num_classes = 18 elif data_keyword == "asl": n_selected_features = 6 num_classes = 95 else: raise Exception("Please fullfill the data basic information first!") log_folder = init_folder(log_folder) #out_obj_folder = init_folder(out_obj_folder) #out_model_folder = init_folder(out_model_folder) data_stru = return_data_stru(num_classes, start_class, attr_num, attr_len, class_column) file_list = list_files(data_folder) file_count = 0 class_column = 0 header = True delimiter = ' ' loop_count = -1 ########## ###already remove later #already_obj_folder = "../../object/" + data_keyword + "/forward_wrapper/" #already_obj_list = list_files(already_obj_folder) ###end of already remove later for train_file in file_list: if file_keyword not in train_file: continue loop_count = loop_count + 1 file_key = train_file.replace('.txt', '') #already_obj_file = "" already = False #for already_obj_file in already_obj_list: # if file_key in already_obj_file and method in already_obj_file: # already = True # break ########## ###already part #if already is True: # already_class_feature = load_obj(already_obj_folder + already_obj_file)[0] #else: # log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str(class_id) + '_' + method + '.log' # already_class_feature = None ###end of already part log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str( class_id) + '_' + method + "_top" + str( n_selected_features) + '_already' + str(already) + '.log' print "log file: " + log_file logger = setup_logger(log_file, 'logger_' + str(loop_count)) logger.info('\nlog file: ' + log_file) logger.info(train_file) logger.info('method: ' + method) logger.info('============') test_file = train_file.replace('train', 'test') train_x_matrix, train_y_vector, test_x_matrix, test_y_vector = train_test_file_reading( data_folder + train_file, data_folder + test_file, class_column, delimiter, header) n_samples, n_col = train_x_matrix.shape train_x_matrix = train_x_matrix.reshape(n_samples, attr_num, attr_len) n_samples, n_col = test_x_matrix.shape test_x_matrix = test_x_matrix.reshape(n_samples, attr_num, attr_len) if file_count == 0: logger.info('train matrix shape: ' + str(train_x_matrix.shape)) logger.info('train label shape: ' + str(train_y_vector.shape)) logger.info('test matrix shape: ' + str(test_x_matrix.shape)) logger.info('test label shape: ' + str(test_y_vector.shape)) min_class = min(train_y_vector) max_class = max(train_y_vector) + 1 for c in range(min_class, max_class): logger.info("Class: " + str(c)) already_feature = [] #if already_class_feature is not None: # class_already = already_class_feature[c, :] # for already_f in class_already: # already_feature.append(already_f) # logger.info("already features: " +file_key + " with class " + str(c) + ": " + str(already_feature)) temp_train_y_vector = np.where(train_y_vector == c, 1, 0) temp_test_y_vector = np.where(test_y_vector == c, 1, 0) #print already_feature top_features = forward_multitime( train_x_matrix, temp_train_y_vector, test_x_matrix, temp_test_y_vector, n_selected_features, data_keyword, file_key, method, cnn_setting_file, logger, already_feature) logger.info("Top Features For Class " + str(c) + ": " + str(top_features)) logger.info("End Of Class: " + str(c))
def cnn_classification_main(parameter_file, file_keyword, function_keyword="cnn_classification"): data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file = read_all_feature_classification( parameter_file, function_keyword) print(data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file) log_folder = init_folder(log_folder) out_obj_folder = init_folder(out_obj_folder) out_model_folder = init_folder(out_model_folder) file_list = list_files(data_folder) file_count = 0 class_column = 0 header = True cnn_setting = return_cnn_setting_from_file(cnn_setting_file) cnn_setting.out_obj_folder = out_obj_folder cnn_setting.out_model_folder = out_model_folder init_folder(out_obj_folder) init_folder(out_model_folder) result_obj_folder = obj_folder + method + "_result_folder" result_obj_folder = init_folder(result_obj_folder) delimiter = ' ' loop_count = -1 saver_file_profix = "" attention_type = 0 attention_type = -1 cnn_setting.attention_type = attention_type trans_bool = False # True: means ins * attr_len * 1 * attr_num # False: means ins * attr_len * attr_num * 1 for train_file in file_list: if file_keyword not in train_file: continue loop_count = loop_count + 1 file_key = train_file.replace('.txt', '') saver_file_profix = file_key + "_atten" + str(attention_type) valid_file = data_folder + train_file.replace('train', 'valid') if os.path.isfile(valid_file) is False: valid_file = '' test_file = data_folder + train_file.replace('train', 'test') if os.path.isfile(test_file) is False: test_file = '' data_group, attr_num = train_test_file_reading( data_folder + train_file, test_file, valid_file, class_column, delimiter, header) data_group_processing(data_group, attr_num, trans_bool) data_stru = data_group.gene_data_stru() data_group.data_check(data_stru.num_classes, data_stru.min_class) if cnn_setting.eval_method == "accuracy": cnn_eval_key = "acc" elif num_classes > 2: cnn_eval_key = "acc_batch" else: cnn_eval_key = "f1" log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str( data_stru.min_class ) + "_" + str(data_stru.num_classes) + "_act" + str( cnn_setting.activation_fun ) + "_" + cnn_eval_key + "_attention" + str(attention_type) + '.log' print("log file: " + log_file) logger = setup_logger(log_file, 'logger_' + str(loop_count)) logger.info('\nlog file: ' + log_file) logger.info(train_file) logger.info('cnn setting:\n ' + cnn_setting.to_string()) logger.info('method: ' + method) logger.info('============') if file_count == 0: logger.info('train matrix shape: ' + str(data_group.train_x_matrix.shape)) logger.info('train label shape: ' + str(data_group.train_y_vector.shape)) logger.info(data_group.train_x_matrix[0, 0:3, 0:2, 0]) pred_y_prob, train_run_time, test_run_time, cnn_model = run_cnn( cnn_setting, data_group, saver_file_profix, logger) pred_y_vector = np.argmax(pred_y_prob, axis=1) avg_acc, ret_str = averaged_class_based_accuracy( pred_y_vector, data_group.test_y_vector) acc_value = accuracy_score(data_group.test_y_vector, pred_y_vector, True) logger.info("Averaged acc: " + str(acc_value)) logger.info(ret_str) logger.info("Fold eval value: " + str(acc_value)) logger.info(method + ' fold training time (sec):' + str(train_run_time)) logger.info(method + ' fold testing time (sec):' + str(test_run_time)) logger.info("save obj to " + cnn_model.saver_file)
def best_forward_multitime_main(parameter_file="../../parameters/", file_keyword="train_", function_keyword="best_forward_multitime"): #data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file = read_all_feature_classification(parameter_file, function_keyword) data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, top_k, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file = read_feature_classification(parameter_file, function_keyword) print data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file function_keyword = function_keyword + "_" + method if data_keyword == "dsa" or data_keyword == "toy": n_selected_features = 15 num_classes = 19 elif data_keyword == "rar": n_selected_features = 30 num_classes = 33 elif data_keyword == "arc" or data_keyword == "fixed_arc": n_selected_features = 30 num_classes = 18 elif data_keyword == "asl": n_selected_features = 6 num_classes = 95 else: raise Exception("Please fullfill the data basic information first!") keep_k = 5 log_folder = init_folder(log_folder) #out_obj_folder = init_folder(out_obj_folder) #out_model_folder = init_folder(out_model_folder) data_stru = return_data_stru(num_classes, start_class, attr_num, attr_len, class_column) file_list = list_files(data_folder) file_count = 0 class_column = 0 header = True delimiter = ' ' loop_count = -1 for train_file in file_list: if file_keyword not in train_file: continue loop_count = loop_count + 1 file_key = train_file.replace('.txt', '') log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str(class_id) + '_' + method + "_top" + str(n_selected_features) +'.log' print "log file: " + log_file logger = setup_logger(log_file, 'logger_' + str(loop_count)) logger.info('\nlog file: ' + log_file) logger.info(train_file) logger.info('method: ' + method) logger.info('============') test_file = train_file.replace('train', 'test') train_x_matrix, train_y_vector, test_x_matrix, test_y_vector = train_test_file_reading( data_folder + train_file, data_folder + test_file, class_column, delimiter, header) n_samples, n_col = train_x_matrix.shape train_x_matrix = train_x_matrix.reshape(n_samples, attr_num, attr_len) n_samples, n_col = test_x_matrix.shape test_x_matrix = test_x_matrix.reshape(n_samples, attr_num, attr_len) if file_count == 0: logger.info('train matrix shape: ' + str(train_x_matrix.shape)) logger.info('train label shape: ' + str(train_y_vector.shape)) logger.info('test matrix shape: ' + str(test_x_matrix.shape)) logger.info('test label shape: ' + str(test_y_vector.shape)) min_class = min(train_y_vector) max_class = max(train_y_vector) + 1 for c in range(min_class, max_class): logger.info("Class: " + str(c)) temp_train_y_vector = np.where(train_y_vector == c, 1, 0) temp_test_y_vector = np.where(test_y_vector == c, 1, 0) top_features = fixed_width_forward_multitime(train_x_matrix, temp_train_y_vector, test_x_matrix, temp_test_y_vector, n_selected_features, keep_k, data_keyword, file_key, method, cnn_setting_file, logger) logger.info("Top Features For Class " +str(c) + ": " + str(top_features)) logger.info("End Of Class: " + str(c))