Пример #1
0
def read_icd_diagnoses_table(mimic3_path):
    codes = dataframe_from_csv(os.path.join(mimic3_path, 'D_ICD_DIAGNOSES.csv'))
    codes = codes[['ICD9_CODE', 'SHORT_TITLE', 'LONG_TITLE']]
    diagnoses = dataframe_from_csv(os.path.join(mimic3_path, 'DIAGNOSES_ICD.csv'))
    diagnoses = diagnoses.merge(codes, how='inner', left_on='ICD9_CODE', right_on='ICD9_CODE')
    diagnoses[['SUBJECT_ID', 'HADM_ID', 'SEQ_NUM']] = diagnoses[['SUBJECT_ID', 'HADM_ID', 'SEQ_NUM']].astype(int)
    return diagnoses
Пример #2
0
def read_icd_diagnoses_table(mimic3_path):
    """
    concatenate 2 csv into 1 DataFrame
    
        `D_ICD_DIAGNOSES.csv`  : 'ICD9_CODE', 'SHORT_TITLE', 'LONG_TITLE'
                & 
        `DIAGNOSES_ICD.csv`    : 'SUBJECT_ID', 'HADM_ID', 'SEQ_NUM'
        
    """
    codes = dataframe_from_csv(os.path.join(mimic3_path,
                                            'D_ICD_DIAGNOSES.csv'))
    codes = codes[['ICD9_CODE', 'SHORT_TITLE', 'LONG_TITLE']]

    diagnoses = dataframe_from_csv(
        os.path.join(mimic3_path, 'DIAGNOSES_ICD.csv'))
    # merging 2 DF
    diagnoses = diagnoses.merge(codes,
                                how='inner',
                                left_on='ICD9_CODE',
                                right_on='ICD9_CODE')
    # dtype : int
    diagnoses[['SUBJECT_ID', 'HADM_ID',
               'SEQ_NUM']] = diagnoses[['SUBJECT_ID', 'HADM_ID',
                                        'SEQ_NUM']].astype(int)
    return diagnoses
Пример #3
0
def read_admissions_table(mimic3_path):
    admits = dataframe_from_csv(os.path.join(mimic3_path, 'ADMISSIONS.csv'))
    admits = admits[['SUBJECT_ID', 'HADM_ID', 'ADMITTIME', 'DISCHTIME', 'DEATHTIME', 'ETHNICITY', 'DIAGNOSIS']]
    admits.ADMITTIME = pd.to_datetime(admits.ADMITTIME)
    admits.DISCHTIME = pd.to_datetime(admits.DISCHTIME)
    admits.DEATHTIME = pd.to_datetime(admits.DEATHTIME)
    return admits
Пример #4
0
def read_stays(subject_path):
    stays = dataframe_from_csv(os.path.join(subject_path, 'stays.csv'), index_col=None)
    stays.INTIME = pd.to_datetime(stays.INTIME)
    stays.OUTTIME = pd.to_datetime(stays.OUTTIME)
    stays.DOB = pd.to_datetime(stays.DOB)
    stays.DOD = pd.to_datetime(stays.DOD)
    stays.DEATHTIME = pd.to_datetime(stays.DEATHTIME)
    stays.sort_values(by=['INTIME', 'OUTTIME'], inplace=True)
    return stays
Пример #5
0
def read_events(subject_path, remove_null=True):
    events = dataframe_from_csv(os.path.join(subject_path, 'events.csv'), index_col=None)
    if remove_null:
        events = events[events.VALUE.notnull()]
    events.CHARTTIME = pd.to_datetime(events.CHARTTIME)
    events.HADM_ID = events.HADM_ID.fillna(value=-1).astype(int)
    events.ICUSTAY_ID = events.ICUSTAY_ID.fillna(value=-1).astype(int)
    events.VALUEUOM = events.VALUEUOM.fillna('').astype(str)
    # events.sort_values(by=['CHARTTIME', 'ITEMID', 'ICUSTAY_ID'], inplace=True)
    return events
Пример #6
0
def read_icustays_table(mimic3_path):
    """
    from `mimi3_path` read  `ICUSTAYS.csv`  as DataFrame and only return
    
        ['SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID', 'DBSOURCE', 'FIRST_CAREUNIT','LAST_CAREUNIT', 'FIRST_WARDID', 'LAST_WARDID','INTIME', 'OUTTIME','LOS']
         
         `date` convert to dtype `datetime`
    """

    stays = dataframe_from_csv(os.path.join(mimic3_path, 'ICUSTAYS.csv'))
    stays.INTIME = pd.to_datetime(stays.INTIME)
    stays.OUTTIME = pd.to_datetime(stays.OUTTIME)
    return stays
def read_itemid_to_variable_map(fn, variable_column='LEVEL2'):
    var_map = dataframe_from_csv(fn, index_col=None).fillna('').astype(str)
    # var_map[variable_column] = var_map[variable_column].apply(lambda s: s.lower())
    var_map.COUNT = var_map.COUNT.astype(int)
    var_map = var_map[(var_map[variable_column] != '') & (var_map.COUNT > 0)]
    var_map = var_map[(var_map.STATUS == 'ready')]
    var_map.ITEMID = var_map.ITEMID.astype(int)
    var_map = var_map[[variable_column, 'ITEMID',
                       'MIMIC LABEL']].set_index('ITEMID')
    return var_map.rename(
        {
            variable_column: 'VARIABLE',
            'MIMIC LABEL': 'MIMIC_LABEL'
        }, axis=1)
Пример #8
0
def read_patients_table(mimic3_path):
    """
    from `mimi3_path` read  `PATENTS.csv`  as DataFrame and only return
    
        'SUBJECT_ID', 'GENDER', 'DOB', 'DOD'
         
         these 4 columns 
         
         `date` convert to dtype `datetime`
    """
    pats = dataframe_from_csv(os.path.join(mimic3_path, 'PATIENTS.csv'))
    pats = pats[['SUBJECT_ID', 'GENDER', 'DOB', 'DOD']]
    pats.DOB = pd.to_datetime(pats.DOB)
    pats.DOD = pd.to_datetime(pats.DOD)
    return pats
Пример #9
0
def read_admissions_table(mimic3_path):
    """
    from `mimi3_path` read  `ADMISSIONS.csv`  as DataFrame and only return
    
        'SUBJECT_ID', 'HADM_ID', 'ADMITTIME', 'DISCHTIME', 'DEATHTIME', 'ETHNICITY', 'DIAGNOSIS'
         
         these 7 columns 
         
         `date` convert to dtype `datetime`
    """
    admits = dataframe_from_csv(os.path.join(mimic3_path, 'ADMISSIONS.csv'))
    admits = admits[[
        'SUBJECT_ID', 'HADM_ID', 'ADMITTIME', 'DISCHTIME', 'DEATHTIME',
        'ETHNICITY', 'DIAGNOSIS'
    ]]
    admits.ADMITTIME = pd.to_datetime(admits.ADMITTIME)
    admits.DISCHTIME = pd.to_datetime(admits.DISCHTIME)
    admits.DEATHTIME = pd.to_datetime(admits.DEATHTIME)
    return admits
Пример #10
0
def read_icustays_table(mimic3_path):
    stays = dataframe_from_csv(os.path.join(mimic3_path, 'ICUSTAYS.csv'))
    stays.INTIME = pd.to_datetime(stays.INTIME)
    stays.OUTTIME = pd.to_datetime(stays.OUTTIME)
    return stays
Пример #11
0
def read_patients_table(mimic3_path):
    pats = dataframe_from_csv(os.path.join(mimic3_path, 'PATIENTS.csv'))
    pats = pats[['SUBJECT_ID', 'GENDER', 'DOB', 'DOD']]
    pats.DOB = pd.to_datetime(pats.DOB)
    pats.DOD = pd.to_datetime(pats.DOD)
    return pats
Пример #12
0
def read_diagnoses(subject_path):
    return dataframe_from_csv(os.path.join(subject_path, 'diagnoses.csv'),
                              index_col=None)
Пример #13
0
phenotypes = add_hcup_ccs_2015_groups(
    diagnoses, yaml.load(open(args.phenotype_definitions, 'r')))
make_phenotype_label_matrix(phenotypes,
                            stays).to_csv(os.path.join(args.output_path,
                                                       'phenotype_labels.csv'),
                                          index=False,
                                          quoting=csv.QUOTE_NONNUMERIC)

if args.test:
    pat_idx = np.random.choice(patients.shape[0], size=1000)
    patients = patients.iloc[pat_idx]
    stays = stays.merge(patients[['SUBJECT_ID']],
                        left_on='SUBJECT_ID',
                        right_on='SUBJECT_ID')
    args.event_tables = [args.event_tables[0]]
    print('Using only', stays.shape[0], 'stays and only', args.event_tables[0],
          'table')

subjects = stays.SUBJECT_ID.unique()
break_up_stays_by_subject(stays, args.output_path, subjects=subjects)
break_up_diagnoses_by_subject(phenotypes, args.output_path, subjects=subjects)
items_to_keep = set([
    int(itemid)
    for itemid in dataframe_from_csv(args.itemids_file)['ITEMID'].unique()
]) if args.itemids_file else None
for table in args.event_tables:
    read_events_table_and_break_up_by_subject(args.mimic3_path,
                                              table,
                                              args.output_path,
                                              items_to_keep=items_to_keep,
                                              subjects_to_keep=subjects)