def GetAllFeature(short_table, long_table, QRS_table, long_pid_list, short_pid_list, center_waves): ''' get all features, with feature name, need precomputed center_waves input: data: short_table, long_table, QRS_table, center_waves pid: long_pid_list, short_pid_list output: out_feature: 8528 rows 1. centerwave_feature 2. long_feature 3. qrs_feature 4. short_stat_wave_feature ''' feature_list = [] centerwave_names, centerwave_feature = get_centerwave_feature(center_waves) long_names, long_feature = get_long_feature(long_table) qrs_names, qrs_feature = get_qrs_feature(QRS_table) shortstat_name, short_stat_wave_feature = get_short_stat_wave_feature( short_table, short_pid_list, long_pid_list) feature_list.extend(centerwave_names[:len(centerwave_feature[0])]) feature_list.extend(long_names[:len(long_feature[0])]) feature_list.extend(qrs_names[:len(qrs_feature[0])]) feature_list.extend(shortstat_name[:len(short_stat_wave_feature[0])]) print('centerwave_feature shape: ', len(centerwave_feature[0])) print('long_feature shape: ', len(long_feature[0])) print('qrs_feature shape: ', len(qrs_feature[0])) print('short_stat_wave_feature shape: ', len(short_stat_wave_feature[0])) out_feature = CombineFeatures( centerwave_feature, CombineFeatures(long_feature, CombineFeatures(qrs_feature, short_stat_wave_feature))) print('out_feature shape: ', len(out_feature[0])) return feature_list, out_feature
def GetAllFeature_test(short_table, long_table, QRS_table, long_pid_list, short_pid_list): ''' get all features for test, without feature name, do not need precomputed center_waves input: data: short_table, long_table, QRS_table pid: long_pid_list, short_pid_list output: out_feature: 8528 rows 1. centerwave_feature 2. long_feature 3. qrs_feature 4. short_stat_wave_feature ''' center_waves = get_short_centerwave(short_table, short_pid_list, long_pid_list) _, centerwave_feature = get_centerwave_feature(center_waves) _, long_feature = get_long_feature(long_table) _, qrs_feature = get_qrs_feature(QRS_table) _, short_stat_wave_feature = get_short_stat_wave_feature( short_table, short_pid_list, long_pid_list) out_feature = CombineFeatures( centerwave_feature, CombineFeatures(long_feature, CombineFeatures(qrs_feature, short_stat_wave_feature))) ### TODO: potential bug, if last column all 0, may cause bug in xgboost # for feat in out_feature: # if feat[-1] == 0.0: # feat[-1] = 0.00000001 return out_feature
from features_qrs import get_qrs_feature from preprocess_do_not_save_version import get_preprocessed_data import csv if __name__ == '__main__': with open( '/home/hanhaochen/physionet_challenge2020_pytorch/features/Preprocess_Data/REFERENCE.csv', 'r') as f_r: reader = csv.reader(f_r) result = list(reader) for i in range(1): print('#######################' + result[i][0] + '######################') ecg_data = result[i:i + 1] patient_info, short_data0, short_data1, short_data2, short_data3, short_data4, short_data5, short_data6, short_data7, short_data8, short_data9, short_data10, short_data11, long_data0, long_data1, long_data2, long_data3, long_data4, long_data5, long_data6, long_data7, long_data8, long_data9, long_data10, long_data11, qrs_info, long_pid, short_pid0, short_pid1, short_pid2, short_pid3, short_pid4, short_pid5, short_pid6, short_pid7, short_pid8, short_pid9, short_pid10, short_pid11 = get_preprocessed_data( ecg_data) #each_feature = GetAllFeature_test(patient_info,short_data0,short_data1,short_data2,short_data3,short_data4,short_data5,short_data6,short_data7,short_data8,short_data9,short_data10,short_data11,long_data0,long_data1,long_data2,long_data3,long_data4,long_data5,long_data6,long_data7,long_data8,long_data9,long_data10,long_data11,qrs_info, long_pid, short_pid0,short_pid1,short_pid2,short_pid3,short_pid4,short_pid5,short_pid6,short_pid7,short_pid8,short_pid9,short_pid10,short_pid11) feature_name, feature_data = get_qrs_feature(qrs_info) print('name', len(feature_name)) print('data', len(feature_data[0]))
def GetAllFeature_test(patient_info, short_table0, short_table1, short_table2, short_table3, short_table4, short_table5, short_table6, short_table7, short_table8, short_table9, short_table10, short_table11, long_table0, long_table1, long_table2, long_table3, long_table4, long_table5, long_table6, long_table7, long_table8, long_table9, long_table10, long_table11, QRS_table, long_pid_list, short_pid_0, short_pid_1, short_pid_2, short_pid_3, short_pid_4, short_pid_5, short_pid_6, short_pid_7, short_pid_8, short_pid_9, short_pid_10, short_pid_11): ''' get all features for test, without feature name, do not need precomputed center_waves input: data: short_table, long_table, QRS_table pid: long_pid_list, short_pid_list output: out_feature: 8528 rows 1. centerwave_feature 2. long_feature 3. qrs_feature 4. short_stat_wave_feature ''' ''' center_waves_lead1 = get_short_centerwave(short_table1, short_pid_list1, long_pid_list) center_waves_lead2 = get_short_centerwave(short_table2, short_pid_list2, long_pid_list) center_waves_lead3 = get_short_centerwave(short_table3, short_pid_list3, long_pid_list) center_waves_lead4 = get_short_centerwave(short_table4, short_pid_list4, long_pid_list) center_waves_lead5 = get_short_centerwave(short_table5, short_pid_list5, long_pid_list) center_waves_lead6 = get_short_centerwave(short_table6, short_pid_list6, long_pid_list) center_waves_lead7 = get_short_centerwave(short_table7, short_pid_list7, long_pid_list) center_waves_lead8 = get_short_centerwave(short_table8, short_pid_list8, long_pid_list) center_waves_lead9 = get_short_centerwave(short_table9, short_pid_list9, long_pid_list) center_waves_lead10 = get_short_centerwave(short_table10, short_pid_list10, long_pid_list) center_waves_lead11 = get_short_centerwave(short_table11, short_pid_list11, long_pid_list) center_waves_lead12 = get_short_centerwave(short_table12, short_pid_list12, long_pid_list) _, centerwave_feature_1 = get_centerwave_feature(center_waves_lead1) _, centerwave_feature_2 = get_centerwave_feature(center_waves_lead2) _, centerwave_feature_3 = get_centerwave_feature(center_waves_lead3) _, centerwave_feature_4 = get_centerwave_feature(center_waves_lead4) _, centerwave_feature_5 = get_centerwave_feature(center_waves_lead5) _, centerwave_feature_6 = get_centerwave_feature(center_waves_lead6) _, centerwave_feature_7 = get_centerwave_feature(center_waves_lead7) _, centerwave_feature_8 = get_centerwave_feature(center_waves_lead8) _, centerwave_feature_9 = get_centerwave_feature(center_waves_lead9) _, centerwave_feature_10 = get_centerwave_feature(center_waves_lead10) _, centerwave_feature_11 = get_centerwave_feature(center_waves_lead11) _, centerwave_feature_12 = get_centerwave_feature(center_waves_lead12) ''' _, short_stat_wave_feature_0 = get_short_stat_wave_feature( short_table0, short_pid_0, long_pid_list) _, short_stat_wave_feature_1 = get_short_stat_wave_feature( short_table1, short_pid_1, long_pid_list) _, short_stat_wave_feature_2 = get_short_stat_wave_feature( short_table2, short_pid_2, long_pid_list) _, short_stat_wave_feature_3 = get_short_stat_wave_feature_from_avR( short_table3, short_pid_3, long_pid_list) _, short_stat_wave_feature_4 = get_short_stat_wave_feature( short_table4, short_pid_4, long_pid_list) _, short_stat_wave_feature_5 = get_short_stat_wave_feature( short_table5, short_pid_5, long_pid_list) _, short_stat_wave_feature_6 = get_short_stat_wave_feature( short_table6, short_pid_6, long_pid_list) _, short_stat_wave_feature_7 = get_short_stat_wave_feature( short_table7, short_pid_7, long_pid_list) _, short_stat_wave_feature_8 = get_short_stat_wave_feature( short_table8, short_pid_8, long_pid_list) _, short_stat_wave_feature_9 = get_short_stat_wave_feature( short_table9, short_pid_9, long_pid_list) _, short_stat_wave_feature_10 = get_short_stat_wave_feature( short_table10, short_pid_10, long_pid_list) _, short_stat_wave_feature_11 = get_short_stat_wave_feature( short_table11, short_pid_11, long_pid_list) _, long_feature_0 = get_long_feature(long_table0) _, long_feature_1 = get_long_feature(long_table1) _, long_feature_2 = get_long_feature(long_table2) _, long_feature_3 = get_long_feature(long_table3) _, long_feature_4 = get_long_feature(long_table4) _, long_feature_5 = get_long_feature(long_table5) _, long_feature_6 = get_long_feature(long_table6) _, long_feature_7 = get_long_feature(long_table7) _, long_feature_8 = get_long_feature(long_table8) _, long_feature_9 = get_long_feature(long_table9) _, long_feature_10 = get_long_feature(long_table10) _, long_feature_11 = get_long_feature(long_table11) _, qrs_feature = get_qrs_feature(QRS_table) patient_info = np.array(patient_info) ''' out_feature = CombineFeatures(centerwave_feature, CombineFeatures(long_feature, CombineFeatures(qrs_feature, short_stat_wave_feature))) ### TODO: potential bug, if last column all 0, may cause bug in xgboost # for feat in out_feature: # if feat[-1] == 0.0: # feat[-1] = 0.00000001 ''' # all features have the same row number:6877 final_list = [] ###combine all features ''' all_feature_name_list = [ 'qrs_feature','centerwave_feature_1','centerwave_feature_1','centerwave_feature_2','centerwave_feature_3', 'centerwave_feature_4','centerwave_feature_5','centerwave_feature_6','centerwave_feature_7', 'centerwave_feature_8','centerwave_feature_9','centerwave_feature_10','centerwave_feature_11', 'centerwave_feature_12','long_feature_1','long_feature_2','long_feature_3','long_feature_4', 'long_feature_5','long_feature_6','long_feature_7','long_feature_8','long_feature_9', 'long_feature_10','long_feature_11','long_feature_12','short_stat_wave_feature_1', 'short_stat_wave_feature_2','short_stat_wave_feature_3','short_stat_wave_feature_4', 'short_stat_wave_feature_5','short_stat_wave_feature_6','short_stat_wave_feature_7', 'short_stat_wave_feature_8','short_stat_wave_feature_9','short_stat_wave_feature_10', 'short_stat_wave_feature_11','short_stat_wave_feature_12' ] ''' all_feature_name_list = [ 'patient_info', 'qrs_feature', 'long_feature_0', 'long_feature_1', 'long_feature_2', 'long_feature_3', 'long_feature_4', 'long_feature_5', 'long_feature_6', 'long_feature_7', 'long_feature_8', 'long_feature_9', 'long_feature_10', 'long_feature_11', 'short_stat_wave_feature_0', 'short_stat_wave_feature_1', 'short_stat_wave_feature_2', 'short_stat_wave_feature_3', 'short_stat_wave_feature_4', 'short_stat_wave_feature_5', 'short_stat_wave_feature_6', 'short_stat_wave_feature_7', 'short_stat_wave_feature_8', 'short_stat_wave_feature_9', 'short_stat_wave_feature_10', 'short_stat_wave_feature_11' ] for each_feature in all_feature_name_list: print(each_feature, len(eval(each_feature)[0])) final_list.extend(eval(each_feature)[0]) # hhc_feature = pd.DataFrame(data = final_array) # hhc_feature.to_csv(result_name+'_OUT_FEATURE.csv', sep=',', header=True, index=False) print('feature length', len(final_list)) if len(final_list) != 6233: f = open('bad_length' + patient_info[0] + '.csv', 'w', encoding='utf-8', newline='') return [final_list]
def GetAllFeature_test(patient_info, short_table0, short_table1, short_table2, short_table3, short_table4, short_table5, short_table6, short_table7, short_table8, short_table9, short_table10, short_table11, long_table0, long_table1, long_table2, long_table3, long_table4, long_table5, long_table6, long_table7, long_table8, long_table9, long_table10, long_table11, QRS_table, long_pid_list, short_pid_0, short_pid_1, short_pid_2, short_pid_3, short_pid_4, short_pid_5, short_pid_6, short_pid_7, short_pid_8, short_pid_9, short_pid_10, short_pid_11): ''' get all features for test, without feature name, do not need precomputed center_waves input: data: short_table, long_table, QRS_table pid: long_pid_list, short_pid_list output: out_feature: 8528 rows 1. centerwave_feature 2. long_feature 3. qrs_feature 4. short_stat_wave_feature ''' ''' center_waves_lead1 = get_short_centerwave(short_table1, short_pid_list1, long_pid_list) center_waves_lead2 = get_short_centerwave(short_table2, short_pid_list2, long_pid_list) center_waves_lead3 = get_short_centerwave(short_table3, short_pid_list3, long_pid_list) center_waves_lead4 = get_short_centerwave(short_table4, short_pid_list4, long_pid_list) center_waves_lead5 = get_short_centerwave(short_table5, short_pid_list5, long_pid_list) center_waves_lead6 = get_short_centerwave(short_table6, short_pid_list6, long_pid_list) center_waves_lead7 = get_short_centerwave(short_table7, short_pid_list7, long_pid_list) center_waves_lead8 = get_short_centerwave(short_table8, short_pid_list8, long_pid_list) center_waves_lead9 = get_short_centerwave(short_table9, short_pid_list9, long_pid_list) center_waves_lead10 = get_short_centerwave(short_table10, short_pid_list10, long_pid_list) center_waves_lead11 = get_short_centerwave(short_table11, short_pid_list11, long_pid_list) center_waves_lead12 = get_short_centerwave(short_table12, short_pid_list12, long_pid_list) _, centerwave_feature_1 = get_centerwave_feature(center_waves_lead1) _, centerwave_feature_2 = get_centerwave_feature(center_waves_lead2) _, centerwave_feature_3 = get_centerwave_feature(center_waves_lead3) _, centerwave_feature_4 = get_centerwave_feature(center_waves_lead4) _, centerwave_feature_5 = get_centerwave_feature(center_waves_lead5) _, centerwave_feature_6 = get_centerwave_feature(center_waves_lead6) _, centerwave_feature_7 = get_centerwave_feature(center_waves_lead7) _, centerwave_feature_8 = get_centerwave_feature(center_waves_lead8) _, centerwave_feature_9 = get_centerwave_feature(center_waves_lead9) _, centerwave_feature_10 = get_centerwave_feature(center_waves_lead10) _, centerwave_feature_11 = get_centerwave_feature(center_waves_lead11) _, centerwave_feature_12 = get_centerwave_feature(center_waves_lead12) ''' _, short_stat_wave_feature_0 = get_short_stat_wave_feature( short_table0, short_pid_0, long_pid_list) _, short_stat_wave_feature_1 = get_short_stat_wave_feature( short_table1, short_pid_1, long_pid_list) _, short_stat_wave_feature_2 = get_short_stat_wave_feature( short_table2, short_pid_2, long_pid_list) _, short_stat_wave_feature_3 = get_short_stat_wave_feature_from_avR( short_table3, short_pid_3, long_pid_list) _, short_stat_wave_feature_4 = get_short_stat_wave_feature( short_table4, short_pid_4, long_pid_list) _, short_stat_wave_feature_5 = get_short_stat_wave_feature( short_table5, short_pid_5, long_pid_list) _, short_stat_wave_feature_6 = get_short_stat_wave_feature( short_table6, short_pid_6, long_pid_list) _, short_stat_wave_feature_7 = get_short_stat_wave_feature( short_table7, short_pid_7, long_pid_list) _, short_stat_wave_feature_8 = get_short_stat_wave_feature( short_table8, short_pid_8, long_pid_list) _, short_stat_wave_feature_9 = get_short_stat_wave_feature( short_table9, short_pid_9, long_pid_list) _, short_stat_wave_feature_10 = get_short_stat_wave_feature( short_table10, short_pid_10, long_pid_list) _, short_stat_wave_feature_11 = get_short_stat_wave_feature( short_table11, short_pid_11, long_pid_list) _, long_feature_0 = get_long_feature(long_table0) _, long_feature_1 = get_long_feature(long_table1) _, long_feature_2 = get_long_feature(long_table2) _, long_feature_3 = get_long_feature(long_table3) _, long_feature_4 = get_long_feature(long_table4) _, long_feature_5 = get_long_feature(long_table5) _, long_feature_6 = get_long_feature(long_table6) _, long_feature_7 = get_long_feature(long_table7) _, long_feature_8 = get_long_feature(long_table8) _, long_feature_9 = get_long_feature(long_table9) _, long_feature_10 = get_long_feature(long_table10) _, long_feature_11 = get_long_feature(long_table11) _, qrs_feature = get_qrs_feature(QRS_table) patient_info = np.array(patient_info) ''' out_feature = CombineFeatures(centerwave_feature, CombineFeatures(long_feature, CombineFeatures(qrs_feature, short_stat_wave_feature))) ### TODO: potential bug, if last column all 0, may cause bug in xgboost # for feat in out_feature: # if feat[-1] == 0.0: # feat[-1] = 0.00000001 ''' # all features have the same row number:6877 final_array_row = qrs_feature.shape[0] final_array_column = patient_info.shape[1] + long_feature_0.shape[1] * 12 + qrs_feature.shape[1] + \ short_stat_wave_feature_0.shape[1] * 12 print('row,column' + str(final_array_row) + '.' + str(final_array_column)) final_array = np.zeros((final_array_row, final_array_column), dtype=object) ###combine all features ''' all_feature_name_list = [ 'qrs_feature','centerwave_feature_1','centerwave_feature_1','centerwave_feature_2','centerwave_feature_3', 'centerwave_feature_4','centerwave_feature_5','centerwave_feature_6','centerwave_feature_7', 'centerwave_feature_8','centerwave_feature_9','centerwave_feature_10','centerwave_feature_11', 'centerwave_feature_12','long_feature_1','long_feature_2','long_feature_3','long_feature_4', 'long_feature_5','long_feature_6','long_feature_7','long_feature_8','long_feature_9', 'long_feature_10','long_feature_11','long_feature_12','short_stat_wave_feature_1', 'short_stat_wave_feature_2','short_stat_wave_feature_3','short_stat_wave_feature_4', 'short_stat_wave_feature_5','short_stat_wave_feature_6','short_stat_wave_feature_7', 'short_stat_wave_feature_8','short_stat_wave_feature_9','short_stat_wave_feature_10', 'short_stat_wave_feature_11','short_stat_wave_feature_12' ] ''' all_feature_name_list = [ 'patient_info', 'qrs_feature', 'long_feature_0', 'long_feature_1', 'long_feature_2', 'long_feature_3', 'long_feature_4', 'long_feature_5', 'long_feature_6', 'long_feature_7', 'long_feature_8', 'long_feature_9', 'long_feature_10', 'long_feature_11', 'short_stat_wave_feature_0', 'short_stat_wave_feature_1', 'short_stat_wave_feature_2', 'short_stat_wave_feature_3', 'short_stat_wave_feature_4', 'short_stat_wave_feature_5', 'short_stat_wave_feature_6', 'short_stat_wave_feature_7', 'short_stat_wave_feature_8', 'short_stat_wave_feature_9', 'short_stat_wave_feature_10', 'short_stat_wave_feature_11' ] column_sum = 0 for each_feature in all_feature_name_list: final_array[:, column_sum:column_sum + eval(each_feature).shape[1]] = eval(each_feature) column_sum += eval(each_feature).shape[1] # hhc_feature = pd.DataFrame(data = final_array) # hhc_feature.to_csv(result_name+'_OUT_FEATURE.csv', sep=',', header=True, index=False) return final_array.tolist()