def read_icd_diagnoses_table(mimic3_path): codes = dataframe_from_csv(os.path.join(mimic3_path, 'D_ICD_DIAGNOSES.csv')) codes = codes[['ICD9_CODE', 'SHORT_TITLE', 'LONG_TITLE']] diagnoses = dataframe_from_csv(os.path.join(mimic3_path, 'DIAGNOSES_ICD.csv')) diagnoses = diagnoses.merge(codes, how='inner', left_on='ICD9_CODE', right_on='ICD9_CODE') diagnoses[['SUBJECT_ID', 'HADM_ID', 'SEQ_NUM']] = diagnoses[['SUBJECT_ID', 'HADM_ID', 'SEQ_NUM']].astype(int) return diagnoses
def read_icd_diagnoses_table(mimic3_path): """ concatenate 2 csv into 1 DataFrame `D_ICD_DIAGNOSES.csv` : 'ICD9_CODE', 'SHORT_TITLE', 'LONG_TITLE' & `DIAGNOSES_ICD.csv` : 'SUBJECT_ID', 'HADM_ID', 'SEQ_NUM' """ codes = dataframe_from_csv(os.path.join(mimic3_path, 'D_ICD_DIAGNOSES.csv')) codes = codes[['ICD9_CODE', 'SHORT_TITLE', 'LONG_TITLE']] diagnoses = dataframe_from_csv( os.path.join(mimic3_path, 'DIAGNOSES_ICD.csv')) # merging 2 DF diagnoses = diagnoses.merge(codes, how='inner', left_on='ICD9_CODE', right_on='ICD9_CODE') # dtype : int diagnoses[['SUBJECT_ID', 'HADM_ID', 'SEQ_NUM']] = diagnoses[['SUBJECT_ID', 'HADM_ID', 'SEQ_NUM']].astype(int) return diagnoses
def read_admissions_table(mimic3_path): admits = dataframe_from_csv(os.path.join(mimic3_path, 'ADMISSIONS.csv')) admits = admits[['SUBJECT_ID', 'HADM_ID', 'ADMITTIME', 'DISCHTIME', 'DEATHTIME', 'ETHNICITY', 'DIAGNOSIS']] admits.ADMITTIME = pd.to_datetime(admits.ADMITTIME) admits.DISCHTIME = pd.to_datetime(admits.DISCHTIME) admits.DEATHTIME = pd.to_datetime(admits.DEATHTIME) return admits
def read_stays(subject_path): stays = dataframe_from_csv(os.path.join(subject_path, 'stays.csv'), index_col=None) stays.INTIME = pd.to_datetime(stays.INTIME) stays.OUTTIME = pd.to_datetime(stays.OUTTIME) stays.DOB = pd.to_datetime(stays.DOB) stays.DOD = pd.to_datetime(stays.DOD) stays.DEATHTIME = pd.to_datetime(stays.DEATHTIME) stays.sort_values(by=['INTIME', 'OUTTIME'], inplace=True) return stays
def read_events(subject_path, remove_null=True): events = dataframe_from_csv(os.path.join(subject_path, 'events.csv'), index_col=None) if remove_null: events = events[events.VALUE.notnull()] events.CHARTTIME = pd.to_datetime(events.CHARTTIME) events.HADM_ID = events.HADM_ID.fillna(value=-1).astype(int) events.ICUSTAY_ID = events.ICUSTAY_ID.fillna(value=-1).astype(int) events.VALUEUOM = events.VALUEUOM.fillna('').astype(str) # events.sort_values(by=['CHARTTIME', 'ITEMID', 'ICUSTAY_ID'], inplace=True) return events
def read_icustays_table(mimic3_path): """ from `mimi3_path` read `ICUSTAYS.csv` as DataFrame and only return ['SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID', 'DBSOURCE', 'FIRST_CAREUNIT','LAST_CAREUNIT', 'FIRST_WARDID', 'LAST_WARDID','INTIME', 'OUTTIME','LOS'] `date` convert to dtype `datetime` """ stays = dataframe_from_csv(os.path.join(mimic3_path, 'ICUSTAYS.csv')) stays.INTIME = pd.to_datetime(stays.INTIME) stays.OUTTIME = pd.to_datetime(stays.OUTTIME) return stays
def read_itemid_to_variable_map(fn, variable_column='LEVEL2'): var_map = dataframe_from_csv(fn, index_col=None).fillna('').astype(str) # var_map[variable_column] = var_map[variable_column].apply(lambda s: s.lower()) var_map.COUNT = var_map.COUNT.astype(int) var_map = var_map[(var_map[variable_column] != '') & (var_map.COUNT > 0)] var_map = var_map[(var_map.STATUS == 'ready')] var_map.ITEMID = var_map.ITEMID.astype(int) var_map = var_map[[variable_column, 'ITEMID', 'MIMIC LABEL']].set_index('ITEMID') return var_map.rename( { variable_column: 'VARIABLE', 'MIMIC LABEL': 'MIMIC_LABEL' }, axis=1)
def read_patients_table(mimic3_path): """ from `mimi3_path` read `PATENTS.csv` as DataFrame and only return 'SUBJECT_ID', 'GENDER', 'DOB', 'DOD' these 4 columns `date` convert to dtype `datetime` """ pats = dataframe_from_csv(os.path.join(mimic3_path, 'PATIENTS.csv')) pats = pats[['SUBJECT_ID', 'GENDER', 'DOB', 'DOD']] pats.DOB = pd.to_datetime(pats.DOB) pats.DOD = pd.to_datetime(pats.DOD) return pats
def read_admissions_table(mimic3_path): """ from `mimi3_path` read `ADMISSIONS.csv` as DataFrame and only return 'SUBJECT_ID', 'HADM_ID', 'ADMITTIME', 'DISCHTIME', 'DEATHTIME', 'ETHNICITY', 'DIAGNOSIS' these 7 columns `date` convert to dtype `datetime` """ admits = dataframe_from_csv(os.path.join(mimic3_path, 'ADMISSIONS.csv')) admits = admits[[ 'SUBJECT_ID', 'HADM_ID', 'ADMITTIME', 'DISCHTIME', 'DEATHTIME', 'ETHNICITY', 'DIAGNOSIS' ]] admits.ADMITTIME = pd.to_datetime(admits.ADMITTIME) admits.DISCHTIME = pd.to_datetime(admits.DISCHTIME) admits.DEATHTIME = pd.to_datetime(admits.DEATHTIME) return admits
def read_icustays_table(mimic3_path): stays = dataframe_from_csv(os.path.join(mimic3_path, 'ICUSTAYS.csv')) stays.INTIME = pd.to_datetime(stays.INTIME) stays.OUTTIME = pd.to_datetime(stays.OUTTIME) return stays
def read_patients_table(mimic3_path): pats = dataframe_from_csv(os.path.join(mimic3_path, 'PATIENTS.csv')) pats = pats[['SUBJECT_ID', 'GENDER', 'DOB', 'DOD']] pats.DOB = pd.to_datetime(pats.DOB) pats.DOD = pd.to_datetime(pats.DOD) return pats
def read_diagnoses(subject_path): return dataframe_from_csv(os.path.join(subject_path, 'diagnoses.csv'), index_col=None)
phenotypes = add_hcup_ccs_2015_groups( diagnoses, yaml.load(open(args.phenotype_definitions, 'r'))) make_phenotype_label_matrix(phenotypes, stays).to_csv(os.path.join(args.output_path, 'phenotype_labels.csv'), index=False, quoting=csv.QUOTE_NONNUMERIC) if args.test: pat_idx = np.random.choice(patients.shape[0], size=1000) patients = patients.iloc[pat_idx] stays = stays.merge(patients[['SUBJECT_ID']], left_on='SUBJECT_ID', right_on='SUBJECT_ID') args.event_tables = [args.event_tables[0]] print('Using only', stays.shape[0], 'stays and only', args.event_tables[0], 'table') subjects = stays.SUBJECT_ID.unique() break_up_stays_by_subject(stays, args.output_path, subjects=subjects) break_up_diagnoses_by_subject(phenotypes, args.output_path, subjects=subjects) items_to_keep = set([ int(itemid) for itemid in dataframe_from_csv(args.itemids_file)['ITEMID'].unique() ]) if args.itemids_file else None for table in args.event_tables: read_events_table_and_break_up_by_subject(args.mimic3_path, table, args.output_path, items_to_keep=items_to_keep, subjects_to_keep=subjects)