def process_episode(output_dir, root_patient_folder, episode_ind, label_type, pheno_map=None): try: episode_folder = os.path.join(root_patient_folder, "episode" + str(episode_ind + 1)) episode = pd.read_csv( os.path.join(root_patient_folder, "episode" + str(episode_ind + 1) + ".csv")) except: print("episode csv file or folder missing for subject :" + str(root_patient_folder)) return stays = read_stays(root_patient_folder) try: subject_id = stays["SUBJECT_ID"].loc[stays["ICUSTAY_ID"] == episode.Icustay.iloc[0]].iloc[0] hadm_id = stays["HADM_ID"].loc[stays["ICUSTAY_ID"] == episode.Icustay.iloc[0]].iloc[0] except: print("cannot match subject or admission for subject :" + str(root_patient_folder)) return pati_abrv = "pati." + str(subject_id) + "+" + str(hadm_id) write_bio_edges(episode.Age[0], episode.Gender[0], episode.Ethnicity[0], pati_abrv, output_dir) write_diagnoses(root_patient_folder, hadm_id, output_dir, type=label_type, pheno_map=pheno_map) for table in filter(lambda x: ".csv " not in x, os.listdir(episode_folder)): table_abrv_name = table_name_map[(table.split(".")[0].split("_")[0])] if "timeseries" in table: with open(os.path.join(episode_folder, table)) as tsfile: ts_lines = tsfile.readlines() write_seq_event(ts_lines, output_dir, pati_abrv, table_abrv_name) else: table_events = dataframe_from_csv(os.path.join( episode_folder, table), index_col=None) write_static_event(table_events, output_dir, table_abrv_name)
var_map = read_itemid_to_variable_map(args.variable_map_file) variables = var_map.VARIABLE.unique() for subject_dir in tqdm(os.listdir(args.subjects_root_path), desc='Iterating over subjects'): dn = os.path.join(args.subjects_root_path, subject_dir) try: subject_id = int(subject_dir) if not os.path.isdir(dn): raise Exception except: continue try: # reading tables of this subject stays = read_stays(os.path.join(args.subjects_root_path, subject_dir)) diagnoses = read_diagnoses( os.path.join(args.subjects_root_path, subject_dir)) events = read_events(os.path.join(args.subjects_root_path, subject_dir)) except: sys.stderr.write( 'Error reading from disk for subject: {}\n'.format(subject_id)) continue episodic_data = assemble_episodic_data(stays, diagnoses) # cleaning and converting to time series events = map_itemids_to_variables(events, var_map) events = clean_events(events) if events.shape[0] == 0:
for subject_dir in os.listdir(args.subjects_root_path): dn = os.path.join(args.subjects_root_path, subject_dir) try: subject_id = int(subject_dir) if not os.path.isdir(dn): raise Exception except: continue sys.stdout.write('Subject {}: '.format(subject_id)) sys.stdout.flush() try: sys.stdout.write('reading...') sys.stdout.flush() stays = read_stays(os.path.join(args.subjects_root_path, subject_dir)) diagnoses = read_diagnoses(os.path.join(args.subjects_root_path, subject_dir)) events = read_events(os.path.join(args.subjects_root_path, subject_dir)) except: sys.stdout.write('error reading from disk!\n') continue else: sys.stdout.write('got {0} stays, {1} diagnoses, {2} events...'.format(stays.shape[0], diagnoses.shape[0], events.shape[0])) sys.stdout.flush() episodic_data = assemble_episodic_data(stays, diagnoses) sys.stdout.write('cleaning and converting to time series...') sys.stdout.flush() events = map_itemids_to_variables(events, var_map) events = clean_events(events)