def pd_read_csv_data_from_dir(self, input_dir, extension_str='.csv'):
        '''
		Input:
			input_dir: '/input/dir'
		Output:
			feature_ary = np.array
			data_x = np.array
			data_y = np.array

		'''
        file_obj = FileList()

        file_ary = file_obj.find_file(input_dir, extension_str)

        df_list = []
        for temp_file in file_ary:
            temp_df = self.pd_read_csv_data(temp_file)

            df_list.append(temp_df)

        result_df = pd.concat(df_list, axis=0)

        ## reset all index
        result_df = result_df.set_index(np.arange(result_df.shape[0]))

        return result_df
    def get_csv_data_from_dir(self,
                              input_dir,
                              outcome_index=26,
                              extension_str='.csv'):
        '''
		Input:
			input_dir: '/input/dir'
		Output:
			feature_ary = np.array
			data_x = np.array
			data_y = np.array

		'''
        file_obj = FileList()

        file_ary = file_obj.find_file(input_dir, extension_str)
        data_x = []
        data_y = []

        data_x = np.array(data_x)
        data_y = np.array(data_y)

        for temp_index, temp_file in enumerate(file_ary):
            features_ary, temp_data_x, temp_data_y = self.read_csv_data(
                temp_file, outcome_index)

            if temp_index == 0:
                data_x = temp_data_x
                data_y = temp_data_y
            else:
                data_x = np.concatenate((data_x, temp_data_x), axis=0)
                data_y = np.concatenate((data_y, temp_data_y), axis=0)

            print(temp_file)
            print(len(temp_data_x))
            print(len(data_x))

        return features_ary, data_x, data_y
def gpr_file_test():
    file_path_ary = [
        '/home/ryan/smb_data/CytoOneArray/RD/完成報告',
        '/home/ryan/smb_data/CytoOneArray/RD/審查中報告',
        '/home/ryan/smb_data/brank_data/For Brank/GPR'
    ]
    # file_path_ary = ["/home/ryan/smb_data/brank_data/For Brank/GPR"]

    # file_path = '/home/ryan/smb_data/CytoOneArray/RD/完成報告/華聯/2014'
    file_ext = 'gpr'
    gpr_file_list = '/home/ryan/src_dir/CytoOA_AI/data/gpr_file_list.txt'
    missing_file_list = '/home/ryan/src_dir/CytoOA_AI/data/missing_file_list.txt'
    match_file_list = '/home/ryan/src_dir/CytoOA_AI/data/match_file_list.txt'
    file_obj = FileList()
    file_ary = []
    for file_path in file_path_ary:
        file_ary += file_obj.find_file(file_path, file_ext)

    fh_writer = open(gpr_file_list, 'w')
    fh_missing = open(missing_file_list, 'w')
    fh_match = open(match_file_list, 'w')

    all_id = []
    array_id_2_path_dict = {}

    for temp_file in file_ary:
        # print(temp_file)

        fh_writer.write(temp_file + "\n")
        file_name = file_obj.get_gpr_code(temp_file)
        # print(file_name)

        ### recording array id to file path
        array_id_2_path_dict[file_name[0]] = temp_file
        all_id += file_name

    fh_writer.close()
    # print(all_id)
    gpr_id_dict = list_2_dict(all_id)

    ###
    excel_reader = ExcelReader()
    input_file = '/home/ryan/src_dir/CytoOA_AI/data/Cyto_Report_summary2.xlsx'
    # input_file = '/home/ryan/src_dir/CytoOA_AI/data/Cyto_Report_summary.xls'
    excel_df = excel_reader.read_excel(input_file)

    excel_gpr_id = get_gpr_id_from_excel_df(excel_df)

    excel_grp_dict = list_2_dict(excel_gpr_id)

    hit_count = 0
    miss_count = 0
    total_count = 0
    miss_id = []
    for key, value in excel_grp_dict.items():

        if key in gpr_id_dict:
            hit_count += 1
            fh_match.write(str(key) + "\t" + array_id_2_path_dict[key] + "\n")
        else:
            miss_count += 1
            miss_id.append(key)
            fh_missing.write(str(key) + "\n")
        total_count += 1

    print("Missing id = ")
    print(miss_id)

    print("Hit count = {}".format(hit_count))
    print("Miss_count = {}".format(miss_count))
    print("Total_count = {}".format(total_count))

    fh_writer.close()
    fh_missing.close()
    fh_match.close()