Exemplo n.º 1
0
def results_from_folder(folder_name,
                        file_keyword,
                        acc_keyword,
                        train_time_keyword,
                        test_time_keyword,
                        fold_count=10):
    file_list = list_files(folder_name)
    file_count = 0
    acc_list = []
    train_list = []
    test_list = []
    for fold_id in range(fold_count):
        fold_key = "train_" + str(fold_id) + "_"
        for file_name in file_list:
            if file_name.startswith('.'):
                continue
            if fold_key not in file_name:
                continue
            if file_keyword not in file_name:
                continue
            print file_name
            file_count = file_count + 1
            acc_value, train_time, test_time = results_from_file(
                folder_name + file_name, acc_keyword, train_time_keyword,
                test_time_keyword)
            if len(acc_list) > fold_id:
                acc_list[fold_id] = acc_value
                train_list[fold_id] = train_time
                test_list[fold_id] = test_time
            else:
                acc_list.append(acc_value)
                train_list.append(train_time)
                test_list.append(test_time)
    print np.average(acc_list)
Exemplo n.º 2
0
def results_from_folder(folder_name, file_keyword, num_classes, line_keyword, bias=0):
    file_list = list_files(folder_name)
    value_matrix = []
    file_count = -1
    file_count_vector = []
    file_count_vector.append(file_count)
    accuracy_vector = []
    train_time_vector = []
    test_time_vector = []
    for file_name in file_list:
        if file_name.startswith('.'):
            continue
        if file_keyword not in file_name:
            continue
        print(file_name)
        file_count = file_count + 1
        file_count_vector.append(file_name.split('_')[2])
        value_vector, accuracy, train_time, test_time = results_from_file(folder_name+file_name, line_keyword, bias)
        print(np.array(value_vector).shape)
        for add in range(len(value_vector), num_classes):
            value_vector.append(-1)
        value_matrix.append(value_vector)
        accuracy_vector.append(accuracy)
        train_time_vector.append(train_time)
        test_time_vector.append(test_time)
    value_matrix = np.array(value_matrix)
    #file_count_vector = np.array(file_count_vector).astype(int)
    acc_time_matrix = []
    acc_time_matrix.append(train_time_vector)
    acc_time_matrix.append(test_time_vector)
    acc_time_matrix.append(accuracy_vector)
    return value_matrix, file_count, np.array(acc_time_matrix)
Exemplo n.º 3
0
def results_from_folder(folder_name, out_obj_folder, file_keyword, num_classes,
                        line_keyword):
    file_list = list_files(folder_name)
    file_count = 0
    for file_name in file_list:
        if file_name.startswith('.'):
            continue
        if file_keyword not in file_name:
            continue
        print file_name
        file_count = file_count + 1
        feature_matrix = results_from_file(folder_name + file_name,
                                           line_keyword)
        print feature_matrix.shape
        out_obj_file = file_name.split('.')[0] + "_top15.out"
        save_obj([feature_matrix], out_obj_folder + out_obj_file)
Exemplo n.º 4
0
def iwb_processing_main(data_folder):
    attr_num = 22
    output_folder = data_folder + "raw/"
    init_folder(output_folder)
    file_list = list_files(data_folder)
    train_file = ""
    test_file = ""
    for file_name in file_list:
        if "TRAIN" in file_name:
            train_file = file_name
        if "TEST" in file_name:
            test_file = file_name
    if train_file == "" or test_file == "":
        raise Exception("file missing")

    train_x_matrix, train_y_str_vector = read_iwb_data(
        data_folder + train_file, attr_num)
    test_x_matrix, test_y_str_vector = read_iwb_data(data_folder + test_file,
                                                     attr_num)
    train_row, train_attr, attr_len = train_x_matrix.shape
    test_row, attr_num, attr_len = test_x_matrix.shape
    print(train_x_matrix.shape)
    print(test_x_matrix.shape)

    # train_zeros = []
    # test_zeros = []
    # for i in range(attr_num):
    #     train_data = train_x_matrix[:, i, :]
    #     print(np.amax(train_data))
    #     if np.amax(train_data) == 0:
    #         train_zeros.append(i)
    #     test_data = test_x_matrix[:, i, :]
    #     if np.amax(test_data) == 0:
    #         test_zeros.append(i)
    # print(train_zeros)
    # print(test_zeros)
    plot_2dmatrix(train_x_matrix[0, 0:2, :].T)
    train_x_matrix = train_x_matrix.reshape(train_row, (attr_num * attr_len))
    test_x_matrix = test_x_matrix.reshape(test_row, (attr_num * attr_len))
    label_list = np.unique(train_y_str_vector)
    train_y_vector = str_v_to_num_vector(train_y_str_vector, label_list)
    test_y_vector = str_v_to_num_vector(test_y_str_vector, label_list)

    train_out_file = "train_0.txt"
    test_out_file = "test_0.txt"
Exemplo n.º 5
0
def run_dcpc_main(data_folder,
                  class_column,
                  num_classes,
                  obj_folder,
                  threshold,
                  logger=None):
    if logger == None:
        logger = init_logging('')

    file_list = list_files(data_folder)
    overall_time = 0

    file_count = 0
    out_obj_dict = {}
    for train_file in file_list:
        if "train_" not in train_file:
            continue
        logger.info(train_file)
        out_obj_file = train_file.replace('.txt', '_dcpc.obj')
        file_count = file_count + 1

        test_file = train_file.replace('train_', 'test_')

        x_matrix, y_vector = file_read_split(data_folder + train_file)
        min_class = min(y_vector)
        max_class = max(y_vector) + 1
        #logger.info("x matrix tran after shape: " + str(x_matrix.shape))
        #x_matrix = x_matrix.transpose((0, 2, 1))
        logger.info("x matrix tran after shape: " + str(x_matrix.shape))
        for label in range(min_class, max_class):
            label_index = np.where(y_vector == label)[0]
            label_x_matrix = x_matrix[label_index, :, :]
            logger.info("class: " + str(label))
            print "class: " + str(label)
            logger.info("x matrix tran before shape: " +
                        str(label_x_matrix.shape))
            label_dcpc = computeDCPC(label_x_matrix, threshold)
            logger.info("class: " + str(label) + " dcpc shape: " +
                        str(label_dcpc.shape))
            out_obj_dict[label] = label_dcpc
        logger.info("dcpc out obj: " + str(obj_folder + out_obj_file))
        save_obj([out_obj_dict], obj_folder + out_obj_file)
Exemplo n.º 6
0
def run_dcpc_processing(dcpc_folder, num_classes, method=0, logger=None):
    logger.info('obj folder:' + dcpc_folder)
    dcpc_list = list_files(dcpc_folder)
    logger.info(dcpc_list)
    score_folder = dcpc_folder[:-1] + "_score/"
    score_folder = init_folder(score_folder)
    for dcpc_obj in dcpc_list:
        dcpc = load_obj(dcpc_folder + dcpc_obj)[0]
        if method == 0:
            out_label_array = []
            out_label_dict = {}
            for label in range(0, num_classes):
                logger.info('class: ' + str(label))
                label_dcpc = dcpc[label]
                logger.info("dcpc shape: " + str(label_dcpc.shape))
                attr_score = clever_rank(label_dcpc, logger)
                logger.info(attr_score)
                sorted_dict = sorted(attr_score.items(),
                                     key=operator.itemgetter(1),
                                     reverse=True)
                sorted_attr = []
                for item in sorted_dict:
                    sorted_attr.append(item[0])
                #label_array = []
                #for label in range(0, num_classes):
                #    class_array = sorted_attr
                #    label_array.append(class_array)
                out_label_array.append(sorted_attr)
                out_label_dict[label] = attr_score
                logger.info(sorted_attr)
                logger.info(attr_score)
            save_obj([out_label_array, out_label_dict],
                     score_folder + dcpc_obj)

            logger.info("score obj: " + score_folder + dcpc_obj)

    return score_folder
Exemplo n.º 7
0
def pv_cnn_generation_main(parameter_file,
                           file_keyword,
                           function_keyword="pv_cnn_generation"):
    data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file = read_pv_cnn_generation(
        parameter_file, function_keyword)

    print data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file

    log_folder = init_folder(log_folder)
    out_obj_folder = init_folder(out_obj_folder)
    out_model_folder = init_folder(out_model_folder)

    data_stru = return_data_stru(num_classes, start_class, attr_num, attr_len,
                                 class_column)

    file_list = list_files(data_folder)
    obj_list = list_files(obj_folder)
    file_count = 0

    class_column = 0
    header = True

    cnn_setting = return_cnn_setting_from_file(cnn_setting_file)
    cnn_setting.out_obj_folder = out_obj_folder
    cnn_setting.out_model_folder = out_model_folder
    cnn_setting.feature_method = 'save'
    cnn_setting.eval_method = 'f1'
    init_folder(out_obj_folder)
    init_folder(out_model_folder)

    result_obj_folder = obj_folder + method + "_result_folder"
    result_obj_folder = init_folder(result_obj_folder)

    delimiter = ' '
    loop_count = -1
    for train_file in file_list:
        if file_keyword not in train_file:
            continue
        loop_count = loop_count + 1
        file_key = train_file.replace('.txt', '')
        log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str(
            class_id) + '_' + method + '.log'

        print "log file: " + log_file

        logger = setup_logger(log_file, 'logger_' + str(loop_count))
        logger.info('\nlog file: ' + log_file)
        logger.info(train_file)
        #logger.info('cnn setting:\n ' + cnn_setting.to_string())
        logger.info('method: ' + method)
        logger.info('============')

        test_file = train_file.replace('train', 'test')

        train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, attr_num = train_test_file_reading_with_attrnum(
            data_folder + train_file, data_folder + test_file, class_column,
            delimiter, header)
        if file_count == 0:
            logger.info('train matrix shape: ' + str(train_x_matrix.shape))
            logger.info('train label shape: ' + str(train_y_vector.shape))
            logger.info('test matrix shape: ' + str(test_x_matrix.shape))
            logger.info('test label shape: ' + str(test_y_vector.shape))

        train_x_matrix = train_test_transpose(train_x_matrix, attr_num,
                                              attr_len, False)
        test_x_matrix = train_test_transpose(test_x_matrix, attr_num, attr_len,
                                             False)
        # Call the projected feature function here, just need to set feature_dict = None
        feature_dict = None
        top_k = -1
        model_save_file = file_key + '_count' + str(file_count) + '_' + method

        if method == 'fcn':
            fold_accuracy, fold_f1_value, fold_predict_y, fold_train_time, fold_test_time, fold_predict_matrix = run_feature_projected_ijcnn_fcn(
                train_x_matrix, train_y_vector, test_x_matrix, test_y_vector,
                data_stru, cnn_setting, feature_dict, top_k, model_save_file,
                class_id, logger)
        else:
            fold_accuracy, fold_f1_value, fold_predict_y, fold_train_time, fold_test_time, fold_predict_matrix = run_feature_projected_cnn(
                train_x_matrix, train_y_vector, test_x_matrix, test_y_vector,
                data_stru, cnn_setting, feature_dict, top_k, model_save_file,
                class_id, logger)

        logger.info("Fold F1: " + str(fold_f1_value))
        logger.info(method + ' fold training time (sec):' +
                    str(fold_train_time))
        logger.info(method + ' fold testing time (sec):' + str(fold_test_time))
        logger.info(method + ' fold accuracy: ' + str(fold_accuracy))
        logger.info("save obj to " + result_obj_folder + file_key +
                    "_all_feature_" + method + "_result.ckpt")
        save_obj([
            fold_accuracy, fold_f1_value, fold_predict_y, fold_train_time,
            fold_test_time, fold_predict_matrix
        ], result_obj_folder + file_key + "_all_feature_" + method +
                 "_result.ckpt")
def cnn_classification_main(parameter_file,
                            file_keyword,
                            function_keyword="cnn_classification"):
    data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file = read_all_feature_classification(
        parameter_file, function_keyword)

    print(data_keyword, data_folder, attr_num, attr_len, num_classes,
          start_class, class_column, class_id, obj_folder, method, log_folder,
          out_obj_folder, out_model_folder, cnn_setting_file)

    log_folder = init_folder(log_folder)
    out_obj_folder = init_folder(out_obj_folder)
    out_model_folder = init_folder(out_model_folder)

    file_list = list_files(data_folder)
    file_count = 0

    class_column = 0
    header = True

    cnn_setting = return_cnn_setting_from_file(cnn_setting_file)
    cnn_setting.out_obj_folder = out_obj_folder
    cnn_setting.out_model_folder = out_model_folder
    init_folder(out_obj_folder)
    init_folder(out_model_folder)

    result_obj_folder = obj_folder + method + "_result_folder"
    result_obj_folder = init_folder(result_obj_folder)

    delimiter = ' '
    loop_count = -1
    saver_file_profix = ""
    attention_type = 0
    attention_type = -1
    cnn_setting.attention_type = attention_type
    trans_bool = False  # True: means ins * attr_len * 1 * attr_num
    # False: means ins * attr_len * attr_num * 1
    for train_file in file_list:
        if file_keyword not in train_file:
            continue
        loop_count = loop_count + 1
        file_key = train_file.replace('.txt', '')
        saver_file_profix = file_key + "_atten" + str(attention_type)
        valid_file = data_folder + train_file.replace('train', 'valid')
        if os.path.isfile(valid_file) is False:
            valid_file = ''

        test_file = data_folder + train_file.replace('train', 'test')
        if os.path.isfile(test_file) is False:
            test_file = ''

        data_group, attr_num = train_test_file_reading(
            data_folder + train_file, test_file, valid_file, class_column,
            delimiter, header)
        data_group_processing(data_group, attr_num, trans_bool)
        data_stru = data_group.gene_data_stru()
        data_group.data_check(data_stru.num_classes, data_stru.min_class)
        if cnn_setting.eval_method == "accuracy":
            cnn_eval_key = "acc"
        elif num_classes > 2:
            cnn_eval_key = "acc_batch"
        else:
            cnn_eval_key = "f1"
        log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str(
            data_stru.min_class
        ) + "_" + str(data_stru.num_classes) + "_act" + str(
            cnn_setting.activation_fun
        ) + "_" + cnn_eval_key + "_attention" + str(attention_type) + '.log'

        print("log file: " + log_file)

        logger = setup_logger(log_file, 'logger_' + str(loop_count))
        logger.info('\nlog file: ' + log_file)
        logger.info(train_file)
        logger.info('cnn setting:\n ' + cnn_setting.to_string())
        logger.info('method: ' + method)
        logger.info('============')

        if file_count == 0:
            logger.info('train matrix shape: ' +
                        str(data_group.train_x_matrix.shape))
            logger.info('train label shape: ' +
                        str(data_group.train_y_vector.shape))

        logger.info(data_group.train_x_matrix[0, 0:3, 0:2, 0])
        pred_y_prob, train_run_time, test_run_time, cnn_model = run_cnn(
            cnn_setting, data_group, saver_file_profix, logger)
        pred_y_vector = np.argmax(pred_y_prob, axis=1)
        avg_acc, ret_str = averaged_class_based_accuracy(
            pred_y_vector, data_group.test_y_vector)
        acc_value = accuracy_score(data_group.test_y_vector, pred_y_vector,
                                   True)
        logger.info("Averaged acc: " + str(acc_value))
        logger.info(ret_str)
        logger.info("Fold eval value: " + str(acc_value))
        logger.info(method + ' fold training time (sec):' +
                    str(train_run_time))
        logger.info(method + ' fold testing time (sec):' + str(test_run_time))
        logger.info("save obj to " + cnn_model.saver_file)
Exemplo n.º 9
0
# Our libraries
    path.append(program_dir)
    path.append(submission_dir)
    # path.append (root_dir + "baselines")
    import data_io  # general purpose input/output functions
    from data_io import vprint  # print only in verbose mode
    from data_manager import DataManager  # load/save data and get info about them
    from internal_rep.complexity import complexity  # complexity measure
    # from best.complexity import complexity

    should_pass_submission_dir = 'program_dir' in inspect.getfullargspec(
        complexity).args

    if debug_mode >= 4:
        print('File structure')
        data_io.list_files('..')

    if debug_mode >= 4:  # Show library version and directory structure
        data_io.show_dir(".")

    # Move old results and create a new output directory (useful if you run locally)
    if save_previous_results:
        data_io.mvdir(output_dir, output_dir + '_' + the_date)
    data_io.mkdir(output_dir)

    #### INVENTORY DATA (and sort dataset names alphabetically)
    datanames = os.listdir(input_dir)
    # change input dir to compensate for the single file unzipping
    if 'input_data' in datanames:
        input_dir = os.path.join(input_dir, 'input_data')
        datanames = os.listdir(input_dir)
Exemplo n.º 10
0
def run_channel_mask_main(data_folder,
                          log_folder,
                          obj_folder,
                          shap_k=10,
                          shap_min=2,
                          shap_max=3,
                          file_key="train_",
                          fun_key="_mask_gene"):
    file_list = list_files(data_folder)
    file_count = 0
    for train_file in file_list:
        if file_key not in train_file:
            continue
        this_keyword = train_file.replace('.txt', '')
        log_file = this_keyword + fun_key + "_shapNum" + str(
            shap_k) + "_shapMin" + str(shap_min) + "_shapMax" + str(
                shap_max) + "_all_class.log"
        out_obj_file = this_keyword + fun_key + "_shapNum" + str(
            shap_k) + "_shapMin" + str(shap_min) + "_shapMax" + str(shap_max)
        logger = setup_logger(log_folder + log_file)
        print "log file: " + log_folder + log_file
        print "obj file: " + obj_folder + out_obj_file
        logger.info(log_folder + log_file)
        out_obj_dict = {}
        file_count = file_count + 1
        test_file = train_file.replace('train_', 'test_')
        train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, attr_num = train_test_file_reading_with_attrnum(
            data_folder + train_file, data_folder + test_file)

        train_row, train_col = train_x_matrix.shape
        test_row, test_col = test_x_matrix.shape
        attr_len = train_col / attr_num
        train_x_matrix = train_x_matrix.reshape(train_row, attr_num, attr_len)
        test_x_matrix = test_x_matrix.reshape(test_row, attr_num, attr_len)
        logger.info("train x matrix: " + str(train_x_matrix.shape))
        logger.info("test x matrix: " + str(test_x_matrix.shape))

        train_keep_len = matrix_keep_len_gene(train_x_matrix)
        test_keep_len = matrix_keep_len_gene(test_x_matrix)

        min_class = min(train_y_vector)
        max_class = max(train_y_vector) + 1
        num_classes = max_class - min_class
        logger.info("x matrix tran after shape: " + str(train_x_matrix.shape))
        for label in range(min_class, max_class):
            label = max_class - label - 1
            label_train_y_vector = np.where(train_y_vector == label, 1, 0)
            label_test_y_vector = np.where(test_y_vector == label, 1, 0)
            label_train_y_matrix = y_vector_to_matrix(label_train_y_vector, 2)
            label_test_y_matrix = y_vector_to_matrix(label_test_y_vector, 2)
            logger.info("class: " + str(label))
            test_eval_value, mask_value = run_channel_mask(
                train_x_matrix, label_train_y_matrix, train_keep_len,
                test_x_matrix, label_test_y_matrix, test_keep_len, shap_k,
                shap_min, shap_max, logger)
            logger.info("final for class " + str(label))
            logger.info("final acc: " + str(test_eval_value))
            logger.info("final mask: " + str(mask_value.shape))
            logger.info("out obj saved to " + obj_folder + out_obj_file +
                        "_class" + str(label) + ".obj")
            save_obj([mask_value], obj_folder + out_obj_file + "_class" +
                     str(label) + ".obj")
Exemplo n.º 11
0
def multi_proj_feature_classification(
        parameter_file,
        file_keyword,
        function_keyword="multi_proj_feature_classification"):
    data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, top_k, method, log_folder, cnn_obj_folder, cnn_temp_folder, cnn_setting_file = read_feature_classification(
        parameter_file, function_keyword)
    log_folder = init_folder(log_folder)
    if method == 'cnn':
        return projected_cnn_classification_main(parameter_file, file_keyword)

    else:
        # Need to check the rest
        return False

    print data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, top_k, method, log_folder, cnn_obj_folder, cnn_temp_folder, cnn_setting_file
    data_stru = return_data_stru(num_classes, start_class, attr_num, attr_len,
                                 class_column)
    print obj_folder
    file_list = list_files(data_folder)
    obj_list = list_files(obj_folder)

    class_column = 0
    header = True

    save_obj_folder = obj_folder[:-1] + "_" + method + "_out"
    save_obj_folder = init_folder(save_obj_folder)

    delimiter = ' '
    loop_count = -1
    for train_file in file_list:
        if file_keyword not in train_file:
            continue
        loop_count = loop_count + 1
        file_key = train_file.replace('.txt', '')
        log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str(
            class_id) + '_top' + str(top_k) + '_' + method + '.log'

        print "log file: " + log_file
        logger = setup_logger(log_file, 'logger_' + str(loop_count))
        logger.info('\nlog file: ' + log_file)
        logger.info(train_file)
        logger.info('method: ' + method)
        logger.info('============')

        found_obj_file = ''
        for obj_file in obj_list:
            if file_key in obj_file:
                found_obj_file = obj_file
                break
        if found_obj_file == '':
            raise Exception('No obj file found')

        print found_obj_file
        found_obj_file = obj_folder + found_obj_file

        feature_array = load_obj(found_obj_file)[0]
        feature_array = np.array(feature_array)
        logger.info("feature array shape: " + str(feature_array.shape))

        test_file = train_file.replace('train', 'test')

        train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, attr_num = train_test_file_reading_with_attrnum(
            data_folder + train_file, data_folder + test_file, class_column,
            delimiter, header)

        if loop_count == 0:
            logger.info('train matrix shape: ' + str(train_x_matrix.shape))
            logger.info('train label shape: ' + str(train_y_vector.shape))
            logger.info('test matrix shape: ' + str(test_x_matrix.shape))
            logger.info('test label shape: ' + str(test_y_vector.shape))

        train_x_matrix = train_test_transpose(train_x_matrix, attr_num,
                                              attr_len, False)
        test_x_matrix = train_test_transpose(test_x_matrix, attr_num, attr_len,
                                             False)

        data_stru.attr_num = top_k
        fold_accuracy, fold_f1_value, fold_predict_y, fold_train_time, fold_test_time, fold_predict_matrix = run_feature_projected_classification(
            train_x_matrix, train_y_vector, test_x_matrix, test_y_vector,
            feature_array, top_k, method, class_id, logger)

        logger.info("Fold F1: " + str(fold_f1_value))
        logger.info(method + ' fold training time (sec):' +
                    str(fold_train_time))
        logger.info(method + ' fold testing time (sec):' + str(fold_test_time))
        logger.info(method + ' fold accuracy: ' + str(fold_accuracy))
        logger.info("save obj to " + save_obj_folder + file_key + "_" +
                    method + "_project_" + method + "_result.ckpt")
        save_obj([
            fold_accuracy, fold_f1_value, fold_predict_y, fold_train_time,
            fold_test_time, fold_predict_matrix
        ], save_obj_folder + file_key + "_" + method + "_project_" + method +
                 "_result.ckpt")
Exemplo n.º 12
0
def cnn_load_main(parameter_file, file_keyword, function_keyword="cnn_classification"):
    data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file = read_all_feature_classification(parameter_file, function_keyword)

    print(data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file)

    log_folder = init_folder(log_folder)
    out_obj_folder = init_folder(out_obj_folder)
    out_model_folder = init_folder(out_model_folder)
    
    data_stru = return_data_stru(num_classes, start_class, attr_num, attr_len, class_column)

    file_list = list_files(data_folder)
    file_count = 0

    class_column = 0
    header = True

    cnn_setting = return_cnn_setting_from_file(cnn_setting_file)
    cnn_setting.out_obj_folder = out_obj_folder
    cnn_setting.out_model_folder = out_model_folder
    cnn_setting.full_feature_num = 400
    init_folder(out_obj_folder)
    init_folder(out_model_folder)
    
    print (out_model_folder)
    model_file_list = list_files(out_model_folder)

    result_obj_folder = obj_folder + method +"_result_folder"
    result_obj_folder = init_folder(result_obj_folder)

    logger = setup_logger('')

    delimiter = ' '
    loop_count = -1
    saver_file_profix = ""
    for train_file in file_list:
        if file_keyword not in train_file:
            continue
        loop_count = loop_count + 1
        file_key = train_file.replace('.txt', '')
        saver_file_profix = file_key
        test_file = train_file.replace('train', 'test')

        #train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, attr_num = train_test_file_reading(data_folder + train_file, data_folder + test_file, '', class_column, delimiter, header)
        data_group, attr_num = train_test_file_reading(data_folder + train_file, data_folder + test_file, '', class_column, delimiter, header)
        train_x_matrix = data_group.train_x_matrix
        train_y_vector = data_group.train_y_vector
        test_x_matrix = data_group.test_x_matrix
        test_y_vector = data_group.test_y_vector

        train_x_matrix = train_test_transpose(train_x_matrix, attr_num, attr_len, False)
        test_x_matrix = train_test_transpose(test_x_matrix, attr_num, attr_len, False)
        train_y_matrix = y_vector_to_matrix(train_y_vector, num_classes)
        test_y_matrix = y_vector_to_matrix(test_y_vector, num_classes)

        found_model_file = ""
        for model_file in model_file_list:
            if model_file.startswith(file_key):
                model_file = model_file.split('.')[0]
                found_model_file = out_model_folder + model_file + ".ckpt"
                break
        if found_model_file == "":
            raise Exception("No model object file found!!!")
        print(found_model_file)
        cnn_session, logits_out, train_x_placeholder, keep_prob_placeholder, keeped_feature_list = load_model(found_model_file, data_stru, cnn_setting, logger)

        last_conv_tensor = keeped_feature_list[0]
        train_last_conv = cnn_session.run(last_conv_tensor, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0})
        test_last_conv = cnn_session.run(last_conv_tensor, feed_dict={train_x_placeholder: test_x_matrix, keep_prob_placeholder: 1.0})
        drop_num = 10
        print(np.squeeze(test_last_conv[1, :, :, :]))
        test_last_conv = top_attr_x_matrix(test_last_conv, drop_num)
        print(np.squeeze(test_last_conv[1, :, :, :]))
        train_last_conv = top_attr_x_matrix(train_last_conv, drop_num)

        output_y_placeholder = tf.placeholder(tf.float32, [None, num_classes])
        actual = tf.argmax(output_y_placeholder, axis=1)
        prediction = tf.argmax(logits_out, axis=1)
        correct_prediction = tf.equal(actual, prediction)
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        ori_pred_y_vector = cnn_session.run(prediction, feed_dict={train_x_placeholder: test_x_matrix, keep_prob_placeholder: 1.0})
        test_accuracy = cnn_session.run(accuracy, feed_dict={train_x_placeholder: test_x_matrix, keep_prob_placeholder: 1.0, output_y_placeholder: test_y_matrix})
        cnn_session.close()
        
        kernel_eval_matrix, ref_kernel_eval_matrix = last_conv_analysis(train_last_conv, train_y_vector)
        print(kernel_eval_matrix.shape)
        print(kernel_eval_matrix)
        train_ins_len = len(train_y_vector)
        test_ins_len = len(test_y_vector)
        batch_size = 100
        layer_list = np.array([400])
        max_epoch = 10
        stop_threshold = 0.99
        activation_fun = 3
        std_value = 0.02
        eval_method = "acc"
        saver_file = './test_1.save'
        nn_setting = nn_parameters(layer_list, batch_size, max_epoch, stop_threshold, activation_fun, std_value, eval_method, saver_file)
        all_pred_prob = []
        for c in range(num_classes):
            train_y_vector_class = np.zeros((train_ins_len))
            index_class = np.where(train_y_vector==c)[0]
            train_y_vector_class[index_class] = 1
            train_y_m_class = y_vector_to_matrix(train_y_vector_class, 2)

            test_y_vector_class = np.zeros((test_ins_len))
            index_class = np.where(test_y_vector==c)[0]
            test_y_vector_class[index_class] = 1
            test_y_m_class = y_vector_to_matrix(test_y_vector_class, 2)
            keep_num = 5
            kernel_index = kernel_eval_matrix[c, 0:keep_num]
            ref_kernel_index = ref_kernel_eval_matrix[c, 0:keep_num]
            print("kernel index " + str(kernel_index))
            print("ref kernel index " + str(ref_kernel_index))
            kernel_index = np.concatenate((kernel_index, ref_kernel_index), axis=0)
            print("union index " + str(kernel_index))
            kernel_index = np.unique(kernel_index)
            print("unique index " + str(kernel_index))

            kernel_index = ref_kernel_eval_matrix[c, 0:keep_num]
            train_x_class = train_last_conv[:, :, :, kernel_index]
            test_x_class = test_last_conv[:, :, :, kernel_index]
            print(train_x_class.shape)
            reshape_col = 45 * len(kernel_index)
            train_x_class = train_x_class.reshape((train_ins_len, reshape_col))
            test_x_class = test_x_class.reshape((test_ins_len, reshape_col))
            
            c_eval_value, c_train_time, c_test_time, c_predict_proba = run_nn(train_x_class, train_y_m_class, test_x_class, test_y_m_class, nn_setting)
            all_pred_prob.append(c_predict_proba[:, 1]-c_predict_proba[:, 0])
        all_pred_prob = np.array(all_pred_prob)
        print(all_pred_prob.shape)
        pred_vector = np.argmax(all_pred_prob, axis=0)
        print(pred_vector)
        print(all_pred_prob[:, 0])
        print(all_pred_prob[:, 1])
        print(all_pred_prob[:, 2])

        final_accuracy = accuracy_score(pred_vector, test_y_vector)

        avg_acc, ret_str = averaged_class_based_accuracy(ori_pred_y_vector, test_y_vector)
        print("original avg acc" + str(avg_acc))
        print("original accuracy: " + str(test_accuracy))
        print(ret_str)
        avg_acc, ret_str = averaged_class_based_accuracy(pred_vector, test_y_vector)
        print("avg acc" + str(avg_acc))
        print("new accuracy: " + str(final_accuracy))
        print(ret_str)

        load_result_analysis(all_pred_prob, test_y_vector)

        sdfds
        output_y_placeholder = tf.placeholder(tf.float32, [None, num_classes])
        actual = tf.argmax(output_y_placeholder, axis=1)
        prediction = tf.argmax(logits_out, axis=1)
        correct_prediction = tf.equal(actual, prediction)
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        test_eval_value = accuracy.eval(feed_dict={train_x_placeholder: test_x_matrix, output_y_placeholder: test_y_matrix, keep_prob_placeholder: 1.0})
        print("fisrt")
        print(test_eval_value)

        


        conv_count = 1
        drop_ratio = 0.1

        #conv_variable_up_main(cnn_session, conv_count, drop_ratio)


        weight_name = "conv_w_" + str(0) + ":0"
        bias_name = "conv_b_" + str(0) + ":0"
        ori_weight_variable = tf.get_default_graph().get_tensor_by_name(weight_name)
        ori_bias_variable = tf.get_default_graph().get_tensor_by_name(bias_name)
        weight_variable = tf.get_default_graph().get_tensor_by_name(weight_name)
        bias_variable = tf.get_default_graph().get_tensor_by_name(bias_name)
        ori_weight_variable = cnn_session.run(weight_variable)
        ori_bias_variable = cnn_session.run(bias_variable)
        train_drop_acc = []
        test_drop_acc = []
        for drop_i in range(50):
            drop_weight_variable = np.copy(ori_weight_variable)
            drop_bias_variable = np.copy(ori_bias_variable)
            drop_index = []
            drop_index.append(drop_i)
            
            up_fir_weight, up_fir_bias = conv_variable_up(drop_weight_variable, drop_bias_variable, drop_index)
            weight_assign = tf.assign(weight_variable, up_fir_weight)
            bias_assign = tf.assign(bias_variable, up_fir_bias)
            cnn_session.run(weight_assign)
            cnn_session.run(bias_assign)
            up_bias_variable = tf.get_default_graph().get_tensor_by_name(bias_name)
            up_bias_variable_val = cnn_session.run(bias_variable)
            train_eval_value = accuracy.eval(feed_dict={train_x_placeholder: train_x_matrix, output_y_placeholder: train_y_matrix, keep_prob_placeholder: 1.0})
            train_drop_acc.append(train_eval_value)
            test_eval_value = accuracy.eval(feed_dict={train_x_placeholder: test_x_matrix, output_y_placeholder: test_y_matrix, keep_prob_placeholder: 1.0})
            test_drop_acc.append(test_eval_value)
            print ("Drop " + str(drop_i))
            print(train_eval_value)
            print(test_eval_value)
        
        print(train_drop_acc)
        print(train_drop_acc.argsort())
        print(test_drop_acc)
        print(test_drop_acc.argsort())

        sdfs
        print("HERE")



        fir_weight_variable_val = np.squeeze(fir_weight_variable_val)
        kernel_dist_val = cnn_session.run(kernel_dist)
        keep_index_val = cnn_session.run(keep_index)
        print(fir_weight_variable_val.shape)
        print(np.amax(fir_weight_variable_val, axis=1))
        print(np.amin(fir_weight_variable_val, axis=1))
        print(np.mean(fir_weight_variable_val, axis=1))
        mean_row = np.mean(fir_weight_variable_val, axis=-1)
        print(mean_row.shape)
        dist_list = []
        for r in range(40):
            row = fir_weight_variable_val[:, r]
            dist_list.append(np.linalg.norm(row-mean_row))
        print (dist_list)
        print(kernel_dist_val)
        print(keep_index_val)
        print(sorted(dist_list))
        print("!!!")
        #conv_variable_up(fir_weight_variable_val, fir_bias_variable_val)
        
        sdfsd

        train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, attr_num = train_test_file_reading(data_folder + train_file, data_folder + test_file, class_column, delimiter, header)
        train_x_matrix = train_test_transpose(train_x_matrix, attr_num, attr_len, False)
        test_x_matrix = train_test_transpose(test_x_matrix, attr_num, attr_len, False)

        train_x_matrix = test_x_matrix[0:1, :, :, :]
        #plot_2dmatrix(np.squeeze(train_x_matrix)[:, 0:5])
        
        fir_out_tensor = tf.nn.conv2d(train_x_placeholder, fir_weight_variable, strides=[1, 1, 1, 1], padding='VALID') + fir_bias_variable
        fir_out_tensor = tf.nn.relu(fir_out_tensor)

        print(fir_out_tensor.get_shape())
        fir_analysis_tensor = tf.reduce_max(fir_out_tensor, [1])
        print(fir_analysis_tensor.get_shape())
        fir_analysis_tensor = tf.reduce_max(fir_analysis_tensor, [1])
        fir_analysis_tensor = tf.reduce_mean(fir_analysis_tensor, [0])

        top_k_indices = tf.nn.top_k(fir_analysis_tensor, 10).indices
        top_k_values = tf.nn.top_k(fir_analysis_tensor, 10).values
        top_fir_out_tensor = tf.gather(fir_out_tensor, top_k_indices, axis=3)

        sec_weight_variable = tf.get_default_graph().get_tensor_by_name("conv_w_1:0")
        sec_bias_variable = tf.get_default_graph().get_tensor_by_name("conv_b_1:0")
        sec_out_tensor = tf.nn.conv2d(fir_out_tensor, sec_weight_variable, strides=[1, 1, 1, 1], padding='VALID') + sec_bias_variable
        sec_out_tensor = tf.nn.relu(sec_out_tensor)
        sec_weight_var_val = cnn_session.run(sec_weight_variable)
        #print(np.squeeze(sec_weight_var_val))
        #sdfds

        #plot_2dmatrix(fir_weight_var_val[:, 4])
        #sdf
        #print(fir_weight_var_val.T)
        fir_out_tensor_val = cnn_session.run(fir_out_tensor, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0})
        print(fir_out_tensor_val.shape)

        top_fir_out_tensor = cnn_session.run(top_fir_out_tensor, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0})
        print(top_fir_out_tensor.shape)

        fir_analysis_tensor_val = cnn_session.run(fir_analysis_tensor, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0})
        print(fir_analysis_tensor.shape)

        top_k_indices_val = cnn_session.run(top_k_indices, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0})
        top_k_values_val = cnn_session.run(top_k_values, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0})
        fir_weight_variable_val = cnn_session.run(fir_weight_variable)
        fir_weight_variable_val = np.squeeze(fir_weight_variable_val)
        print(fir_weight_variable_val.shape)
        print(fir_analysis_tensor_val)
        fir_sort_in = np.argsort(fir_analysis_tensor_val)
        print(fir_sort_in)
        print(top_k_indices_val)
        print(top_k_values_val)
        plot_2dmatrix(fir_weight_variable_val[:, fir_sort_in[-10:]])
        sdfd





        for n in range(len(fir_out_tensor_val)):
            for k in range(50):
                ret_str = "k" + str(k) + ": "
                kernel_max = -1
                max_attr = -1
                max_attr_list = []
                for a in range(attr_num):
                    attr_max = max(fir_out_tensor_val[n, :, a, k])
                    max_attr_list.append(attr_max)
                    if attr_max > kernel_max:
                        kernel_max = attr_max
                        max_attr = a
                    if attr_max == 0:
                        ret_str = ret_str + str(a) + " "
                print(ret_str)
                print("max attr " + str(max_attr))
                print(sorted(range(len(max_attr_list)), key=lambda k: max_attr_list[k]))
                print("======")
        print("label " + str(train_y_vector[0]))
        fir_out_tensor_val = cnn_session.run(sec_out_tensor, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0})
        print(fir_out_tensor_val.shape)

        sdf

        for n in range(len(fir_out_tensor_val)):
            for k in range(40):
                ret_str = "k" + str(k) + ": "
                kernel_max = -1
                max_attr = -1
                max_attr_list = []
                for a in range(attr_num):
                    attr_max = max(fir_out_tensor_val[n, :, a, k])
                    max_attr_list.append(attr_max)
                    if attr_max > kernel_max:
                        kernel_max = attr_max
                        max_attr = a
                    if attr_max == 0:
                        ret_str = ret_str + str(a) + " "
                print(ret_str)
                print("max attr " + str(max_attr))
                print(sorted(range(len(max_attr_list)), key=lambda k: max_attr_list[k]))
                print("======")
        sdf

        fir_out_mean_val = cnn_session.run(fir_out_mean, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0})
        #fir_out_mean_val = np.squeeze(fir_out_mean_val)
        print(fir_out_mean_val.shape)

        plot_2dmatrix(np.squeeze(fir_out_mean_val[:, :, 0:5]))

        sdfd
        plot_2dmatrix(fir_weight_var_val)
        



        
        min_class = min(train_y_vector)
        max_class = max(train_y_vector)
        num_classes = max_class - min_class + 1
        if cnn_setting.eval_method == "accuracy":
            cnn_eval_key = "acc"
        elif num_classes > 2:
            cnn_eval_key = "acc_batch"
        else:
            cnn_eval_key = "f1"
        log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str(min_class)+"_" + str(max_class) + "_act" + str(cnn_setting.activation_fun) + "_" + cnn_eval_key + '.log'
    
        print("log file: " + log_file)
    
        logger = setup_logger(log_file, 'logger_' + str(loop_count))
        logger.info('\nlog file: ' + log_file)
        logger.info(train_file)
        logger.info('cnn setting:\n ' + cnn_setting.to_string())
        logger.info('method: ' + method)
        logger.info('============')

        train_x_matrix = train_test_transpose(train_x_matrix, attr_num, attr_len, False)
        test_x_matrix = train_test_transpose(test_x_matrix, attr_num, attr_len, False)
        if file_count == 0:
            logger.info('train matrix shape: ' + str(train_x_matrix.shape))
            logger.info('train label shape: ' + str(train_y_vector.shape))
            logger.info('test matrix shape: ' + str(test_x_matrix.shape))
            logger.info('test label shape: ' + str(test_y_vector.shape))

        logger.info(train_x_matrix[0, 0:3, 0:2, 0])
        logger.info(test_x_matrix[0, 0:3, 0:2, 0])

        train_y_matrix = y_vector_to_matrix(train_y_vector, num_classes)
        test_y_matrix = y_vector_to_matrix(test_y_vector, num_classes)










        cnn_eval_value, train_run_time, test_run_time, cnn_predict_proba, saver_file, feature_list_obj_file = run_cnn(train_x_matrix, train_y_matrix, test_x_matrix, test_y_matrix, data_stru, cnn_setting, saver_file_profix, logger)

        logger.info("Fold eval value: " + str(cnn_eval_value))
        logger.info(method + ' fold training time (sec):' + str(train_run_time))
        logger.info(method + ' fold testing time (sec):' + str(test_run_time))
        logger.info("save obj to " + saver_file)
Exemplo n.º 13
0
def backward_multitime_main(parameter_file="../../parameters/",
                            file_keyword="train_",
                            n_selected_features=15):
    function_keyword = "backward_wrapper"
    data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file = read_all_feature_classification(
        parameter_file, function_keyword)
    print data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file

    log_folder = init_folder(log_folder)
    out_obj_folder = init_folder(out_obj_folder)
    out_model_folder = init_folder(out_model_folder)

    data_stru = return_data_stru(num_classes, start_class, attr_num, attr_len,
                                 class_column)

    file_list = list_files(data_folder)

    file_count = 0

    class_column = 0
    header = True

    delimiter = ' '
    loop_count = -1
    for train_file in file_list:
        if file_keyword not in train_file:
            continue
        loop_count = loop_count + 1
        file_key = train_file.replace('.txt', '')
        log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str(
            class_id) + '_' + method + '.log'

        print "log file: " + log_file

        logger = setup_logger(log_file, 'logger_' + str(loop_count))
        logger.info('\nlog file: ' + log_file)
        logger.info(train_file)
        logger.info('method: ' + method)
        logger.info('============')

        test_file = train_file.replace('train', 'test')

        train_x_matrix, train_y_vector, test_x_matrix, test_y_vector = train_test_file_reading(
            data_folder + train_file, data_folder + test_file, class_column,
            delimiter, header)
        n_samples, n_col = train_x_matrix.shape
        train_x_matrix = train_x_matrix.reshape(n_samples, attr_num, attr_len)
        n_samples, n_col = test_x_matrix.shape
        test_x_matrix = test_x_matrix.reshape(n_samples, attr_num, attr_len)
        if file_count == 0:
            logger.info('train matrix shape: ' + str(train_x_matrix.shape))
            logger.info('train label shape: ' + str(train_y_vector.shape))
            logger.info('test matrix shape: ' + str(test_x_matrix.shape))
            logger.info('test label shape: ' + str(test_y_vector.shape))

        if class_id == -1:
            min_class = min(train_y_vector)
            max_class = max(train_y_vector) + 1
        else:
            min_class = class_id
            max_class = class_id + 1
        for c in range(min_class, max_class):
            logger.info("Class: " + str(c))
            temp_train_y_vector = np.where(train_y_vector == c, 1, 0)
            temp_test_y_vector = np.where(test_y_vector == c, 1, 0)
            top_features = backward_multitime(
                train_x_matrix, temp_train_y_vector, test_x_matrix,
                temp_test_y_vector, n_selected_features, data_keyword, method,
                cnn_setting_file, logger)
            logger.info("Top Features For Class " + str(c) + ": " +
                        str(top_features))
            logger.info("End Of Class: " + str(c))
Exemplo n.º 14
0
def mask_evaluation_main(log_folder,
                         obj_folder,
                         out_obj_folder,
                         obj_keyword,
                         shap_k=-1,
                         shap_min=-1,
                         shap_max=-1,
                         func_key="arxiv_mask_gene"):
    log_folder = log_folder + func_key
    log_folder = init_folder(log_folder)
    log_file = obj_keyword + "_allclass_" + func_key + ".log"
    #logger = setup_logger('')
    logger = setup_logger(log_folder + log_file)
    logger.info("log folder: " + log_folder)
    logger.info("obj folder: " + obj_folder)
    obj_file_list = list_files(obj_folder)

    if shap_k != -1:
        obj_sec_key = "shapNum" + str(shap_k) + "_shapMin" + str(
            shap_min) + "_shapMax" + str(shap_max)
    else:
        obj_sec_key = ".obj"
    min_class = 100
    max_class = -1
    output_array = []

    for obj_file in obj_file_list:
        if obj_keyword not in obj_file:
            continue
        if "_class" not in obj_file:
            continue
        if obj_sec_key not in obj_file:
            continue
        class_key = obj_file.split('_')[-1]
        class_key = class_key.replace('class', '').replace('.obj', '')
        logger.info("obj file:" + obj_file)
        logger.info("class key: " + class_key)
        class_key = int(class_key)
        if min_class > class_key:
            min_class = class_key
        if max_class < class_key:
            max_class = class_key
        shap_mask = load_obj(obj_folder + obj_file)[0]
        if len(shap_mask) == 0:
            continue
        shap_mask = numpy.array(shap_mask)
        shap_mask = numpy.squeeze(shap_mask)
        logger.info("shap_mask shape: " + str(shap_mask.shape))
        #shap_num, attr_num = shap_mask.shape

        shap_mask = numpy.absolute(shap_mask)
        shap_mask = numpy.sum(shap_mask, axis=0)
        logger.info(shap_mask)
        sort_index = numpy.argsort(shap_mask)
        imp_value = 0
        norm_imp = numpy.zeros(len(shap_mask))
        for index in sort_index:
            norm_imp[index] = imp_value
            imp_value = imp_value + 1
        shap_mask_index = numpy.argsort(norm_imp)[::-1]
        logger.info(shap_mask_index)
        logger.info("====")
        output_array.append(shap_mask_index)
        logger.info("shap_mask final shape: " + str(shap_mask.shape))
    output_array = numpy.array(output_array)
    obj_file = obj_keyword + "_min" + str(min_class) + "_max" + str(
        max_class) + "out.obj"
    logger.info("final output obj shape: " + str(output_array.shape))
    logger.info(output_array)
    save_obj([output_array], out_obj_folder + obj_file)
Exemplo n.º 15
0
def forward_multitime_main(parameter_file="../../parameters/",
                           file_keyword="train_"):
    function_keyword = "forward_wrapper"
    #data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, top_k, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file = read_feature_classification(parameter_file, function_keyword)
    data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, top_k, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file = read_feature_classification(
        parameter_file, function_keyword)
    print data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, top_k, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file

    if data_keyword == "dsa" or data_keyword == "toy":
        n_selected_features = 15
        num_classes = 19
    elif data_keyword == "rar":
        n_selected_features = 30
        num_classes = 33
    elif data_keyword == "arc" or data_keyword == "fixed_arc":
        n_selected_features = 30
        num_classes = 18
    elif data_keyword == "asl":
        n_selected_features = 6
        num_classes = 95
    else:
        raise Exception("Please fullfill the data basic information first!")

    log_folder = init_folder(log_folder)
    #out_obj_folder = init_folder(out_obj_folder)
    #out_model_folder = init_folder(out_model_folder)

    data_stru = return_data_stru(num_classes, start_class, attr_num, attr_len,
                                 class_column)

    file_list = list_files(data_folder)

    file_count = 0

    class_column = 0
    header = True

    delimiter = ' '
    loop_count = -1

    ##########
    ###already remove later
    #already_obj_folder = "../../object/" + data_keyword + "/forward_wrapper/"
    #already_obj_list = list_files(already_obj_folder)
    ###end of already remove later
    for train_file in file_list:
        if file_keyword not in train_file:
            continue
        loop_count = loop_count + 1
        file_key = train_file.replace('.txt', '')
        #already_obj_file = ""
        already = False
        #for already_obj_file in already_obj_list:
        #    if file_key in already_obj_file and method in already_obj_file:
        #        already = True
        #        break

        ##########
        ###already part
        #if already is True:
        #    already_class_feature = load_obj(already_obj_folder + already_obj_file)[0]
        #else:
        #    log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str(class_id) + '_' + method + '.log'
        #    already_class_feature = None
        ###end of already part

        log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str(
            class_id) + '_' + method + "_top" + str(
                n_selected_features) + '_already' + str(already) + '.log'
        print "log file: " + log_file

        logger = setup_logger(log_file, 'logger_' + str(loop_count))
        logger.info('\nlog file: ' + log_file)
        logger.info(train_file)
        logger.info('method: ' + method)
        logger.info('============')

        test_file = train_file.replace('train', 'test')

        train_x_matrix, train_y_vector, test_x_matrix, test_y_vector = train_test_file_reading(
            data_folder + train_file, data_folder + test_file, class_column,
            delimiter, header)
        n_samples, n_col = train_x_matrix.shape
        train_x_matrix = train_x_matrix.reshape(n_samples, attr_num, attr_len)
        n_samples, n_col = test_x_matrix.shape
        test_x_matrix = test_x_matrix.reshape(n_samples, attr_num, attr_len)
        if file_count == 0:
            logger.info('train matrix shape: ' + str(train_x_matrix.shape))
            logger.info('train label shape: ' + str(train_y_vector.shape))
            logger.info('test matrix shape: ' + str(test_x_matrix.shape))
            logger.info('test label shape: ' + str(test_y_vector.shape))

        min_class = min(train_y_vector)
        max_class = max(train_y_vector) + 1
        for c in range(min_class, max_class):
            logger.info("Class: " + str(c))
            already_feature = []
            #if already_class_feature is not None:
            #    class_already = already_class_feature[c, :]
            #    for already_f in class_already:
            #        already_feature.append(already_f)
            #    logger.info("already features: " +file_key + " with class " + str(c) + ": " + str(already_feature))
            temp_train_y_vector = np.where(train_y_vector == c, 1, 0)
            temp_test_y_vector = np.where(test_y_vector == c, 1, 0)
            #print already_feature
            top_features = forward_multitime(
                train_x_matrix, temp_train_y_vector, test_x_matrix,
                temp_test_y_vector, n_selected_features, data_keyword,
                file_key, method, cnn_setting_file, logger, already_feature)
            logger.info("Top Features For Class " + str(c) + ": " +
                        str(top_features))
            logger.info("End Of Class: " + str(c))
Exemplo n.º 16
0
def best_forward_multitime_main(parameter_file="../../parameters/", file_keyword="train_", function_keyword="best_forward_multitime"):
    #data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file = read_all_feature_classification(parameter_file, function_keyword)
    data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, top_k, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file = read_feature_classification(parameter_file, function_keyword)

    print data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file
    function_keyword = function_keyword + "_" + method
    if data_keyword == "dsa" or data_keyword == "toy":
        n_selected_features = 15
        num_classes = 19
    elif data_keyword == "rar":
        n_selected_features = 30
        num_classes = 33
    elif data_keyword == "arc" or data_keyword == "fixed_arc":
        n_selected_features = 30
        num_classes = 18
    elif data_keyword == "asl":
        n_selected_features = 6
        num_classes = 95
    else:
        raise Exception("Please fullfill the data basic information first!")

    keep_k = 5

    log_folder = init_folder(log_folder)
    #out_obj_folder = init_folder(out_obj_folder)
    #out_model_folder = init_folder(out_model_folder)
    
    data_stru = return_data_stru(num_classes, start_class, attr_num, attr_len, class_column)

    file_list = list_files(data_folder)

    file_count = 0

    class_column = 0
    header = True

    delimiter = ' '
    loop_count = -1

    for train_file in file_list:
        if file_keyword not in train_file:
            continue
        loop_count = loop_count + 1
        file_key = train_file.replace('.txt', '')
        log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str(class_id) + '_' + method + "_top" + str(n_selected_features) +'.log'
        print "log file: " + log_file
    
        logger = setup_logger(log_file, 'logger_' + str(loop_count))
        logger.info('\nlog file: ' + log_file)
        logger.info(train_file)
        logger.info('method: ' + method)
        logger.info('============')
        

        test_file = train_file.replace('train', 'test')
        
        train_x_matrix, train_y_vector, test_x_matrix, test_y_vector = train_test_file_reading(
            data_folder + train_file, data_folder + test_file, class_column, delimiter, header)
        n_samples, n_col = train_x_matrix.shape
        train_x_matrix = train_x_matrix.reshape(n_samples, attr_num, attr_len)
        n_samples, n_col = test_x_matrix.shape
        test_x_matrix = test_x_matrix.reshape(n_samples, attr_num, attr_len)
        if file_count == 0:
            logger.info('train matrix shape: ' + str(train_x_matrix.shape))
            logger.info('train label shape: ' + str(train_y_vector.shape))
            logger.info('test matrix shape: ' + str(test_x_matrix.shape))
            logger.info('test label shape: ' + str(test_y_vector.shape))
        
        min_class = min(train_y_vector)
        
        max_class = max(train_y_vector) + 1
        for c in range(min_class, max_class):
            logger.info("Class: " + str(c))
            temp_train_y_vector = np.where(train_y_vector == c, 1, 0)
            temp_test_y_vector = np.where(test_y_vector == c, 1, 0)
            top_features = fixed_width_forward_multitime(train_x_matrix, temp_train_y_vector, test_x_matrix, temp_test_y_vector, n_selected_features, keep_k, data_keyword, file_key, method, cnn_setting_file, logger)
            logger.info("Top Features For Class " +str(c) + ": " + str(top_features))
            logger.info("End Of Class: " + str(c))
Exemplo n.º 17
0
def run_load_predict_cnn(fold_keyword, model_saved_folder, feature_array, top_k, test_x_matrix, test_y_vector, data_stru, cnn_setting, group_all=True, save_obj_folder="./", logger=None):
    if logger is None:
        logger = init_logging('')
    
    real_num_classes = data_stru.num_classes
    model_list = list_files(model_saved_folder)
    data_stru.num_classes = 2
    
    load_time = 0
    test_time = 0
    multi_predict = []
    for c in range(real_num_classes):
        logger.info("Class: " + str(c))
        class_keyword = "class" + str(c) + "_"
        found_model_file = ""
        for model_file in model_list:
            if ".index" not in model_file:
                continue
            if fold_keyword not in model_file:
                continue
            if class_keyword not in model_file:
                continue
            found_model_file = model_file.replace(".index", "")
            print (found_model_file)
            break
    
        if found_model_file == "":
            raise Exception("Model for " + class_keyword + " and " + fold_keyword + " Not Found!!!")
        else:
            found_model_file = model_saved_folder + found_model_file
        class_feature = feature_array[c]
        class_feature = class_feature[0:top_k]
        logger.info("model file: " + str(model_saved_folder + found_model_file))
        logger.info("feature list: " + str(class_feature))
        
        temp_test_x_matrix = test_x_matrix[:, :, class_feature, :]
        logger.info("In run_load_predict_cnn: " + str(temp_test_x_matrix.shape))
        start_time = time.time()
        cnn_session, predict_y_proba, train_x_placeholder, keep_prob_placeholder = load_model(found_model_file, data_stru, cnn_setting, group_all, logger)
        load_time = load_time + time.time() - start_time
        start_time = time.time()
        cnn_predict_proba = load_model_predict(cnn_session, temp_test_x_matrix, predict_y_proba, train_x_placeholder, keep_prob_placeholder)
        #print (cnn_predict_proba[0:10, :])
        test_time = test_time + time.time() - start_time
        multi_predict.append(cnn_predict_proba[:, 1])
        cnn_session.close()
    
    multi_predict = np.array(multi_predict)
    #print multi_predict[0:2, 5:11]
    multi_predict_vector = np.argmax(multi_predict, axis=0)
    save_obj_file = save_obj_folder + fold_keyword + "_" + str(top_k) + ".out"
    save_obj([multi_predict], save_obj_file)
    logger.info("output obj saved to: " + save_obj_file)
    logger.info("multi predict matrix shape: " + str(multi_predict.shape))
    logger.info("multi predict vector shape: " + str(multi_predict_vector.shape))
    #print (str(multi_predict_vector[0:10]))
    logger.info("test y vector: " + str(test_y_vector.shape))
    #print (str(test_y_vector[0:10]))
    acc = accuracy_score(test_y_vector, multi_predict_vector)
    data_stru.num_classes = real_num_classes
    acc1, f1_list = multiple_f1_value_precision_recall_accuracy(multi_predict_vector, test_y_vector, logger)
    if acc != acc1:
        raise Exception("check accuracy")
    return acc, f1_list, load_time, test_time
Exemplo n.º 18
0
def multi_projected_cnn_classification_main(parameter_file, file_keyword, function_keyword="multi_proj_classification"):
    data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, top_k, method, log_folder, cnn_obj_folder, cnn_temp_folder, cnn_setting_file = read_feature_classification(parameter_file, function_keyword)

    obj_keyword = obj_folder.split('/')[-2]
    
    model_saved_folder = "../../object/" + data_keyword + "/projected_classification/" + obj_keyword + "_top" + str(top_k) + "_cnn_model_folder/"
    print obj_keyword
    print cnn_obj_folder
    print model_saved_folder
    top_keyword = "_top" + str(top_k) + "."
    group_all = False

    log_folder = init_folder(log_folder)
    #cnn_obj_folder = init_folder(cnn_obj_folder)
    #cnn_temp_folder = init_folder(cnn_temp_folder)
    
    data_stru = return_data_stru(num_classes, start_class, attr_num, attr_len, class_column)

    file_list = list_files(data_folder)
    obj_list = list_files(obj_folder)
    file_count = 0

    class_column = 0
    header = True

    cnn_setting = return_cnn_setting_from_file(cnn_setting_file)
    cnn_setting.save_obj_folder = cnn_obj_folder
    cnn_setting.temp_obj_folder = cnn_temp_folder
    cnn_setting.eval_method = 'f1'
    #init_folder(cnn_obj_folder)
    #init_folder(cnn_temp_folder) 

    save_obj_folder = "../../object/" + data_keyword + "/" + function_keyword + "/" + obj_keyword + "/" 
    save_obj_folder = init_folder(save_obj_folder)

    delimiter = ' '
    loop_count = -1
    for train_file in file_list:
        if file_keyword not in train_file:
            continue
        loop_count = loop_count + 1
        file_key = train_file.replace('.txt', '')
        log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str(class_id) + '_top' + str(top_k) + '_' + method + '.log'
    
        print "log file: " + log_file
    
        logger = setup_logger(log_file, 'logger_' + str(loop_count))
        logger.info('\nlog file: ' + log_file)
        logger.info(train_file)
        logger.info('cnn setting:\n ' + cnn_setting.to_string())
        logger.info('method: ' + method)
        logger.info('============')
        found_obj_file = ''
        for obj_file in obj_list:
            if file_key in obj_file:
                found_obj_file = obj_file
                break
        if found_obj_file == '':
            raise Exception('No obj file found')
        #
        found_obj_file = obj_folder + found_obj_file

        feature_dict = load_obj(found_obj_file)[0]
        feature_dict = np.array(feature_dict)
        logger.info("feature array shape: " + str(feature_dict.shape))
        
        test_file = train_file.replace('train', 'test')

        train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, attr_num = train_test_file_reading_with_attrnum(
            data_folder + train_file, data_folder + test_file, class_column, delimiter, header)
        

        train_x_matrix = train_test_transpose(train_x_matrix, attr_num, attr_len, False)
        test_x_matrix = train_test_transpose(test_x_matrix, attr_num, attr_len, False)

        if file_count == 0:
            logger.info('train matrix shape: ' + str(train_x_matrix.shape))
            logger.info('train label shape: ' + str(train_y_vector.shape))
            logger.info('test matrix shape: ' + str(test_x_matrix.shape))
            logger.info('test label shape: ' + str(test_y_vector.shape))
            logger.info("topk: " + str(top_k) )
        data_stru.attr_num = top_k
        fold_accuracy, fold_f1_list, fold_load_time, fold_test_time = run_load_predict_cnn(file_key, model_saved_folder, feature_dict, top_k, test_x_matrix, test_y_vector, data_stru, cnn_setting, group_all, save_obj_folder, logger)

        logger.info("Fold ACC: " + str(fold_accuracy))
        logger.info("Fold F1 list: " + str(fold_f1_list))
        logger.info(method + ' fold training time (sec):' + str(fold_load_time))
        logger.info(method + ' fold testing time (sec):' + str(fold_test_time))
Exemplo n.º 19
0
def run_pure_pv_evaluation(
        file_keyword,
        parameter_file='../../parameters/pv_baseline_evaluation.txt',
        function_keyword="pure_pv_evaluation"):
    data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, method, log_folder, out_obj_folder = read_pure_feature_generation(
        parameter_file, function_keyword)

    print data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, method, log_folder, out_obj_folder

    file_list = list_files(data_folder)

    file_count = 0
    for train_file in file_list:
        if file_keyword not in train_file:
            continue
        train_key = train_file.replace('.txt', '')
        file_count = file_count + 1

        data_matrix, attr_num = file_reading(data_folder + train_file)
        train_x_matrix, train_y_vector = x_y_spliting(data_matrix,
                                                      class_column)
        train_row, train_col = train_x_matrix.shape
        train_x_matrix = train_x_matrix.reshape(train_row, attr_num, attr_len)
        if class_id < 0:
            min_class = min(train_y_vector)
            max_class = max(train_y_vector) + 1
        else:
            min_class = class_id
            max_class = min_class + 1
        log_file = train_key + "_" + method + "_min" + str(
            min_class) + "_max" + str(max_class) + "_pure_projected.log"

        #logger = setup_logger('')
        logger = setup_logger(log_folder + log_file)
        print "log file: " + log_folder + log_file
        logger.info(train_file)
        out_obj_file = train_key + "_" + method + "_min" + str(
            min_class) + "_max" + str(max_class) + "_pure_projected.obj"
        out_obj_matrix = []
        logger.info("min class: " + str(min_class))
        logger.info("max class: " + str(max_class))
        for label in range(min_class, max_class):
            class_train_y = np.where(train_y_vector == label, 1, 0)
            logger.info("label: " + str(label))
            if method == 'rf_lda':
                class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_rf_lda_analysis(
                    train_x_matrix, class_train_y, logger)
            elif method == "rf":
                class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_rf_analysis(
                    train_x_matrix, class_train_y, logger)
            elif method == "lda":
                class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_lda_analysis(
                    train_x_matrix, class_train_y, logger)
            logger.info("class attr imp matrix shape: " +
                        str(class_attr_imp_matrix.shape))
            class_attr_list = map_attr_imp_analysis(class_attr_imp_matrix,
                                                    logger)
            logger.info(class_attr_list)
            logger.info(class_attr_list.shape)
            out_obj_matrix.append(class_attr_list)

        out_obj_matrix = np.array(out_obj_matrix)
        logger.info("out obj to: " + out_obj_folder + out_obj_file)
        logger.info(out_obj_matrix.shape)
        save_obj([out_obj_matrix], out_obj_folder + out_obj_file)
Exemplo n.º 20
0
def nn_classification_main(parameter_file,
                           file_keyword,
                           function_keyword="nn_classification"):
    data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, nn_setting_file = read_all_feature_classification(
        parameter_file, function_keyword)

    print(data_keyword, data_folder, attr_num, attr_len, num_classes,
          start_class, class_column, class_id, obj_folder, method, log_folder,
          out_obj_folder, out_model_folder, nn_setting_file)

    log_folder = init_folder(log_folder)
    out_obj_folder = init_folder(out_obj_folder)
    out_model_folder = init_folder(out_model_folder)

    data_stru = return_data_stru(num_classes, start_class, attr_num, attr_len,
                                 class_column)

    file_list = list_files(data_folder)
    file_count = 0

    class_column = 0
    header = True

    nn_setting_file = "../../parameters/nn_model_parameter.txt"
    nn_setting, nn_key = return_nn_setting_from_file(nn_setting_file)

    result_obj_folder = obj_folder + method + "_result_folder"
    result_obj_folder = init_folder(result_obj_folder)

    delimiter = ' '
    loop_count = -1
    saver_file_profix = ""
    for train_file in file_list:
        if file_keyword not in train_file:
            continue
        loop_count = loop_count + 1
        file_key = train_file.replace('.txt', '')
        saver_file_profix = file_key
        test_file = train_file.replace('train', 'test')

        train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, attr_num = train_test_file_reading_with_attrnum(
            data_folder + train_file, data_folder + test_file, class_column,
            delimiter, header)
        min_class = min(train_y_vector)
        max_class = max(train_y_vector)
        num_classes = max_class - min_class + 1
        if nn_setting.eval_method == "accuracy":
            nn_eval_key = "acc"
        elif num_classes > 2:
            nn_eval_key = "acc_batch"
        else:
            nn_eval_key = "f1"
        log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str(
            min_class) + "_" + str(max_class) + "_act" + str(
                nn_setting.activation_fun) + "_" + nn_eval_key + '.log'

        print("log file: " + log_file)

        logger = setup_logger(log_file, 'logger_' + str(loop_count))
        logger.info('\nlog file: ' + log_file)
        logger.info(train_file)
        logger.info('nn setting:\n ' + nn_setting.to_string())
        logger.info('method: ' + method)
        logger.info('============')
        #train_y_vector[50:80] = 1
        #test_y_vector[30:40] = 1

        if file_count == 0:
            logger.info('train matrix shape: ' + str(train_x_matrix.shape))
            logger.info('train label shape: ' + str(train_y_vector.shape))
            logger.info('test matrix shape: ' + str(test_x_matrix.shape))
            logger.info('test label shape: ' + str(test_y_vector.shape))

        logger.info(train_x_matrix[0, 0:3])
        logger.info(test_x_matrix[0, 0:3])

        train_y_matrix = y_vector_to_matrix(train_y_vector, num_classes)
        test_y_matrix = y_vector_to_matrix(test_y_vector, num_classes)

        feature_dict = None
        top_k = -1
        #model_save_file = file_key + '_count' + str(file_count) + '_' + method
        nn_eval_value, train_run_time, test_run_time, nn_predict_proba = run_nn(
            train_x_matrix, train_y_matrix, test_x_matrix, test_y_matrix,
            nn_setting, logger)

        logger.info("Fold eval value: " + str(nn_eval_value))
        logger.info(method + ' fold training time (sec):' +
                    str(train_run_time))
        logger.info(method + ' fold testing time (sec):' + str(test_run_time))
Exemplo n.º 21
0
def run_cnn_projected_feature_analysis(feature_folder,
                                       class_id,
                                       data_folder,
                                       data_file_keyword,
                                       method="rf_lda",
                                       log_folder='./'):
    data_file_list = list_files(data_folder)
    feature_file_list = list_files(feature_folder)
    out_obj_folder = feature_folder[:-1] + "_" + method
    out_obj_folder = init_folder(out_obj_folder)
    class_column = 0

    for train_file in data_file_list:
        if data_file_keyword not in train_file:
            continue
        data_key = train_file.replace('.txt', '')
        data_matrix, attr_num = file_reading(data_folder + train_file)
        train_x_matrix, train_y_vector = x_y_spliting(data_matrix,
                                                      class_column)
        #train_y_vector = np.array([0, 0, 1, 1, 1, 1, 2, 2, 2, 3])
        if class_id < 0:
            min_class = min(train_y_vector)
            max_class = max(train_y_vector) + 1
        else:
            min_class = class_id
            max_class = min_class + 1
        log_file = data_key + "_" + method + "_min" + str(
            min_class) + "_max" + str(max_class) + ".log"
        logger = setup_logger(log_folder + log_file)
        logger.info('data file: ' + train_file)
        out_obj_file = data_key + "_" + method + "_min" + str(
            min_class) + "_max" + str(max_class) + ".obj"
        out_obj_matrix = []
        for label in range(min_class, max_class):
            logger.info("class: " + str(label))
            feature_key = "_class" + str(label) + "_"
            for feature_file in feature_file_list:
                if data_key not in feature_file or feature_key not in feature_file:
                    continue
                logger.info("feature file: " + feature_file)
                feature_obj = load_obj(feature_folder + feature_file)
                train_feature = obj_processing(feature_obj[0])
                logger.info("train feature shape: " + str(train_feature.shape))
                class_train_y = np.where(train_y_vector == label, 1, 0)
                logger.info("feature method: " + str(method))
                if method == "rf_lda_sum":
                    class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_rf_lda_analysis(
                        train_feature, class_train_y, logger)
                elif method == "rf":
                    class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_rf_analysis(
                        train_feature, class_train_y, logger)
                elif method == "lda":
                    class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_lda_analysis(
                        train_feature, class_train_y, logger)
                elif method == "cpca":
                    class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_cpca_analysis(
                        train_feature, class_train_y, logger)
                if method == "cpca":
                    class_attr_list = class_attr_imp_matrix
                else:
                    logger.info("class attr imp matrix shape: " +
                                str(class_attr_imp_matrix.shape))
                    class_attr_list = map_attr_imp_analysis(
                        class_attr_imp_matrix, logger)
                logger.info(class_attr_list)
                out_obj_matrix.append(class_attr_list)
        out_obj_matrix = np.array(out_obj_matrix)
        logger.info("out obj to: " + out_obj_folder + out_obj_file)
        logger.info(out_obj_matrix.shape)
        save_obj([out_obj_matrix], out_obj_folder + out_obj_file)
Exemplo n.º 22
0
    #obj_folder = '../../object/dsa/all_feature_classification/fcn_obj_folder/'
    #obj_file = 'train_0_count0_fcn_class0_c8_1_c5_1_c3_1global_p112_1.ckpt'
    #obj_vector = load_obj(obj_folder + obj_file)
    obj_vector = load_obj(obj_file)[0]
    print obj_vector.shape
    print obj_vector
    sdfs
    print np.array(obj_vector[0]).shape
    print np.array(obj_vector[0]).shape
    print len(obj_vector)
    print np.array(obj_vector[0]).shape
    #print np.array(obj_vector[0][1]).shape
    #print np.array(obj_vector[0][8]).shape
    #print np.array(obj_vector[1]).shape
    sdfds
    obj_list = list_files(obj_folder)
    acc_vector = []
    train_vector = []
    test_vector = []
    obj_count = 0
    for obj_file in obj_list:
        print obj_file
        obj_vector = load_obj(obj_folder + obj_file)
        #print obj_vector[0]
        #print obj_vector[3]
        acc_vector.append(float(obj_vector[0]))
        train_vector.append((obj_vector[3]))
        test_vector.append((obj_vector[4]))
        obj_count = obj_count + 1

    acc_vector = np.array(acc_vector)
def run_z_norm_main(data_folder,
                    file_keyword="train_",
                    logger=None,
                    class_column=0,
                    delimiter=' ',
                    header=True):
    if logger is None:
        logger = setup_logger('')

    if data_folder.endswith('/'):
        out_folder = data_folder[:-1] + "_z_norm/"
    else:
        out_folder = data_folder + "_z_norm/"
    out_folder = init_folder(out_folder)
    file_list = list_files(data_folder)
    file_count = 0
    for train_file in file_list:
        if file_keyword not in train_file:
            continue
        logger.info(train_file)
        test_file = train_file.replace('train', 'test')
        train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, attr_num = train_test_file_reading_with_attrnum(
            data_folder + train_file, data_folder + test_file, class_column,
            delimiter, header)

        #train_x_matrix = train_x_matrix[0:20, :]
        #test_x_matrix = test_x_matrix[0:20, :]
        #train_y_vector = train_y_vector[0:20]
        #test_y_vector = test_y_vector[0:20]

        train_row, train_col = train_x_matrix.shape
        test_row, test_col = test_x_matrix.shape
        attr_len = train_col / attr_num
        train_x_matrix = train_x_matrix.reshape(train_row, attr_num, attr_len)
        test_x_matrix = test_x_matrix.reshape(test_row, attr_num, attr_len)

        norm_train_matrix = run_z_normalization(train_x_matrix)
        norm_test_matrix = run_z_normalization(test_x_matrix)
        if file_count == 0:
            logger.info("Before norm")
            logger.info('train matrix shape: ' + str(train_x_matrix.shape))
            logger.info('test matrix shape: ' + str(test_x_matrix.shape))
            logger.info("After norm")
            logger.info('train matrix shape: ' + str(norm_train_matrix.shape))
            logger.info('test matrix shape: ' + str(norm_test_matrix.shape))
        norm_train_matrix = norm_train_matrix.reshape(train_row, train_col)
        norm_test_matrix = norm_test_matrix.reshape(test_row, test_col)
        train_y_vector = train_y_vector.reshape(len(train_y_vector), 1)
        test_y_vector = test_y_vector.reshape(len(test_y_vector), 1)
        norm_train_matrix = np.hstack((train_y_vector, norm_train_matrix))
        norm_test_matrix = np.hstack((test_y_vector, norm_test_matrix))
        if file_count == 0:
            logger.info("before write to file")
            logger.info('train matrix shape: ' + str(norm_train_matrix.shape))
            logger.info('test matrix shape: ' + str(norm_test_matrix.shape))
        file_writing(norm_train_matrix, out_folder + train_file, attr_num)
        file_writing(norm_test_matrix, out_folder + test_file, attr_num)
        if norm_checking(out_folder + train_file) is False or norm_checking(
                out_folder + test_file) is False:
            logger.info("ERROR!!!")
            raise Exception("ERROR!!!")
            return False
        file_count = file_count + 1
Exemplo n.º 24
0
def global_classification_main(parameter_file, file_keyword):
    function_keyword = "global_classification"
    data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, top_k, method, log_folder, cnn_obj_folder, cnn_temp_folder, cnn_setting_file = read_feature_classification(
        parameter_file, function_keyword)

    data_stru = return_data_stru(num_classes, start_class, attr_num, attr_len,
                                 class_column)

    file_list = list_files(data_folder)
    obj_list = list_files(obj_folder)
    file_count = 0

    class_column = 0
    header = True

    cnn_setting = return_cnn_setting_from_file(cnn_setting_file)
    cnn_setting.save_obj_folder = cnn_obj_folder
    cnn_setting.temp_obj_folder = cnn_temp_folder
    cnn_setting.eval_method = 'f1'
    init_folder(cnn_obj_folder)
    init_folder(cnn_temp_folder)

    all_result_matrix = np.zeros((10, num_classes))

    train_file_vector = []
    prediction_matrix = []
    f1_value_matrix = []
    accuracy_vector = []
    delimiter = ' '
    all_accuracy = 0
    all_train_time = 0
    all_test_time = 0
    loop_count = -1
    for train_file in file_list:
        if file_keyword not in train_file:
            continue
        loop_count = loop_count + 1
        file_key = train_file.replace('.txt', '')
        log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str(
            class_id) + '_top' + str(top_k) + '_' + method + '.log'

        print "log file: " + log_file

        logger = setup_logger(log_file, 'logger_' + str(loop_count))
        logger.info('\nlog file: ' + log_file)
        logger.info(train_file)
        logger.info('cnn setting:\n ' + cnn_setting.to_string())
        logger.info('method: ' + method)
        logger.info('============')
        continue
        found_obj_file = ''
        for obj_file in obj_list:
            if file_key in obj_file:
                found_obj_file = obj_file
                break
        if found_obj_file == '':
            raise Exception('No obj file found')

        print found_obj_file
        print cnn_setting.save_obj_folder + file_key + "_" + method + "_projected_result.ckpt"
        #
        found_obj_file = obj_folder + found_obj_file

        feature_dict = load_obj(found_obj_file)[0]
        feature_dict = np.array(feature_dict)
        logger.info("feature array shape: " + str(feature_dict.shape))

        test_file = train_file.replace('train', 'test')

        train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, attr_num = train_test_file_reading_with_attrnum(
            data_folder + train_file, data_folder + test_file, class_column,
            delimiter, header)

        if file_count == 0:
            logger.info('train matrix shape: ' + str(train_x_matrix.shape))
            logger.info('train label shape: ' + str(train_y_vector.shape))
            logger.info('test matrix shape: ' + str(test_x_matrix.shape))
            logger.info('test label shape: ' + str(test_y_vector.shape))

        train_x_matrix = train_test_transpose(train_x_matrix, attr_num,
                                              attr_len, False)
        test_x_matrix = train_test_transpose(test_x_matrix, attr_num, attr_len,
                                             False)
        data_stru.attr_num = top_k
        fold_accuracy, fold_avg_eval, fold_predict_y, fold_train_time, fold_test_time, fold_predict_matrix = run_feature_projected_cnn(
            train_x_matrix, train_y_vector, test_x_matrix, test_y_vector,
            data_stru, cnn_setting, feature_dict, top_k,
            file_key + '_count' + str(file_count), class_id, logger)

        prediction_matrix.append(fold_predict_y)
        logger.info("Fold F1: " + str(fold_f1_value_list))
        accuracy_vector.append(fold_accuracy)
        all_accuracy = all_accuracy + fold_accuracy
        all_train_time = all_train_time + fold_train_time
        all_test_time = all_test_time + fold_test_time
        logger.info(method + ' fold accuracy: ' + str(fold_accuracy))
        logger.info(method + ' fold training time (sec):' +
                    str(fold_train_time))
        logger.info(method + ' fold testing time (sec):' + str(fold_test_time))
        save_obj([
            fold_accuracy, fold_avg_eval, fold_predict_y, fold_train_time,
            fold_test_time, fold_predict_matrix
        ], save_obj_folder + file_key + "_" + method +
                 "_global_cnn_result.ckpt")