def run_feature_projected_classification(train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, feature_array, top_k, method, class_id=-1, logger=None): if logger is None: logger = init_logging('') train_row, attr_len, attr_num, input_map = train_x_matrix.shape test_row, attr_len, attr_num, input_map = test_x_matrix.shape real_num_classes, attr_num = feature_array.shape all_predict_matrix = np.zeros(test_row * real_num_classes).reshape( test_row, real_num_classes) feature_col = attr_len * top_k * input_map if class_id == -1: min_class = min(train_y_vector) max_class = max(train_y_vector) + 1 else: min_class = class_id max_class = class_id + 1 n_neighbors = 1 samples_leaf = 20 prob = True all_f1_value = [] all_train_time = [] all_test_time = [] #min_class = 9 for i in range(min_class, max_class): logger.info('class: ' + str(i)) temp_train_y_vector = np.where(train_y_vector == i, 1, 0) temp_test_y_vector = np.where(test_y_vector == i, 1, 0) fold_positive_len = len(np.where(temp_train_y_vector == 1)[0]) fold_negative_len = len(temp_train_y_vector) - fold_positive_len logger.info("=====") logger.info("positive class labels length: " + str(fold_positive_len)) logger.info("negative class labels length: " + str(fold_negative_len)) class_feature = feature_array[i] class_feature = class_feature[0:top_k] logger.info("feature list: " + str(class_feature)) temp_train_x_matrix = train_x_matrix[:, :, class_feature, :] temp_test_x_matrix = test_x_matrix[:, :, class_feature, :] temp_train_x_matrix = temp_train_x_matrix.reshape( train_row, feature_col) temp_test_x_matrix = temp_test_x_matrix.reshape(test_row, feature_col) if method == 'knn': class_accuracy, class_predict_y, class_predict_prob, class_train_time, class_test_time = run_knn( temp_train_x_matrix, temp_train_y_vector, temp_test_x_matrix, temp_test_y_vector, n_neighbors, prob) elif method == 'rf': class_accuracy, class_predict_y, class_predict_prob, class_train_time, class_test_time = run_rf( temp_train_x_matrix, temp_train_y_vector, temp_test_x_matrix, temp_test_y_vector, samples_leaf, prob) elif method == 'libsvm': class_accuracy, class_predict_y, class_predict_prob, class_train_time, class_test_time = run_libsvm( temp_train_x_matrix, temp_train_y_vector, temp_test_x_matrix, temp_test_y_vector, logger, prob, '', True) class_accuracy, precision, recall, class_f1, tp, fp, tn, fn = f1_value_precision_recall_accuracy( class_predict_y, temp_test_y_vector, 1) logger.info(method + " f1 for class " + str(i) + ": " + str(class_f1)) logger.info(method + " accuracy for class " + str(i) + ": " + str(class_accuracy)) all_f1_value.append(class_f1) all_train_time.append(class_train_time) all_test_time.append(class_test_time) all_predict_matrix[:, i] = class_predict_prob[:, 1] #if i > 2: # break all_accuracy, all_predict_y = predict_matrix_with_prob_to_predict_accuracy( all_predict_matrix, test_y_vector) return all_accuracy, all_f1_value, all_predict_y, all_train_time, all_test_time, all_predict_matrix
def run_feature_projected_cnn(train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, data_stru, cnn_setting, feature_dict, top_k, saver_file_profix='', class_id=-1, logger=None): if logger is None: logger = init_logging('') method = 'cnn' real_num_classes = data_stru.num_classes data_stru.num_classes = 2 cnn_setting.num_classes = 2 cnn_setting.feature_method = 'none' num_classes = 2 train_row, attr_len, attr_num, input_map = train_x_matrix.shape test_row, attr_len, attr_num, input_map = test_x_matrix.shape all_predict_matrix = np.zeros(test_row * real_num_classes).reshape( test_row, real_num_classes) saver_file = '' if class_id == -1: min_class = min(train_y_vector) max_class = max(train_y_vector) + 1 else: min_class = class_id max_class = class_id + 1 saver_file_profix = saver_file_profix + '_class' keep_saver_file = '' all_train_time = 0 all_test_time = 0 all_f1_value = [] all_train_time = [] all_test_time = [] for i in range(min_class, max_class): logger.info('class: ' + str(i)) temp_train_y_vector = np.where(train_y_vector == i, 1, 0) temp_test_y_vector = np.where(test_y_vector == i, 1, 0) class_saver_profix = saver_file_profix + str(i) fold_positive_len = len(np.where(temp_train_y_vector == 1)[0]) fold_negative_len = len(temp_train_y_vector) - fold_positive_len logger.info("=====") logger.info("positive class labels length: " + str(fold_positive_len)) logger.info("negative class labels length: " + str(fold_negative_len)) class_feature = feature_dict[i] class_feature = class_feature[0:top_k] print("class: " + str(i)) print("number of features: " + str(top_k)) print("Top features list: " + str(class_feature)) logger.info("Top feature list: " + str(class_feature)) temp_train_x_matrix = train_x_matrix[:, :, class_feature, :] temp_test_x_matrix = test_x_matrix[:, :, class_feature, :] temp_train_y_matrix = y_vector_to_matrix(temp_train_y_vector, num_classes) temp_test_y_matrix = y_vector_to_matrix(temp_test_y_vector, num_classes) if i == min_class: train_x_placeholder, output_y_placeholder, predict_y_prob, keep_prob_placeholder, keeped_feature_list, saver_file = cnn_set_flow_graph( data_stru, cnn_setting, input_map, False, logger) keep_saver_file = saver_file saver_file = cnn_setting.temp_obj_folder + class_saver_profix + keep_saver_file + "_top" + str( top_k) print saver_file class_eval_value, class_train_time, class_test_time, class_predict_prob, fold_saver_file, fold_obj_file = cnn_train( temp_train_x_matrix, temp_train_y_matrix, temp_test_x_matrix, temp_test_y_matrix, num_classes, cnn_setting, train_x_placeholder, output_y_placeholder, predict_y_prob, keep_prob_placeholder, keeped_feature_list, saver_file, logger) class_predict_y = np.argmax(class_predict_prob, axis=1) class_accuracy, precision, recall, class_f1, tp, fp, tn, fn = f1_value_precision_recall_accuracy( class_predict_y, temp_test_y_vector, 1) if str(class_eval_value) == 'nan': class_eval_value = 0 class_f1 = 0 logger.info(method + " f1 for class " + str(i) + ": " + str(class_f1)) logger.info(method + " accuracy for class " + str(i) + ": " + str(class_accuracy)) logger.info(method + ' model saved: ' + fold_saver_file) all_f1_value.append(class_f1) all_train_time.append(class_train_time) all_test_time.append(class_test_time) all_predict_matrix[:, i] = class_predict_prob[:, 1] #if i > 2: # break all_accuracy, all_predict_y = predict_matrix_with_prob_to_predict_accuracy( all_predict_matrix, test_y_vector) data_stru.num_classes = real_num_classes return all_accuracy, all_f1_value, all_predict_y, all_train_time, all_test_time, all_predict_matrix