stays = add_age_to_icustays(stays)
stays = add_inunit_mortality_to_icustays(stays)
stays = add_inhospital_mortality_to_icustays(stays)
stays = filter_icustays_on_age(stays)
if args.verbose:
    print('REMOVE PATIENTS AGE < 18:', stays.ICUSTAY_ID.unique().shape[0], stays.HADM_ID.unique().shape[0],
          stays.SUBJECT_ID.unique().shape[0])

stays.to_csv(os.path.join(args.output_path, 'all_stays.csv'), index=False)
diagnoses = read_icd_diagnoses_table(args.mimic3_path)
diagnoses = filter_diagnoses_on_stays(diagnoses, stays)
diagnoses.to_csv(os.path.join(args.output_path, 'all_diagnoses.csv'), index=False)
count_icd_codes(diagnoses, output_path=os.path.join(args.output_path, 'diagnosis_counts.csv'))

phenotypes = add_hcup_ccs_2015_groups(diagnoses, yaml.load(open(args.phenotype_definitions, 'r')))
make_phenotype_label_matrix(phenotypes, stays).to_csv(os.path.join(args.output_path, 'phenotype_labels.csv'),
                                                      index=False, quoting=csv.QUOTE_NONNUMERIC)

if args.test:
    pat_idx = np.random.choice(patients.shape[0], size=1000)
    patients = patients.iloc[pat_idx]
    stays = stays.merge(patients[['SUBJECT_ID']], left_on='SUBJECT_ID', right_on='SUBJECT_ID')
    args.event_tables = [args.event_tables[0]]
    print('Using only', stays.shape[0], 'stays and only', args.event_tables[0], 'table')

subjects = stays.SUBJECT_ID.unique()
break_up_stays_by_subject(stays, args.output_path, subjects=subjects, verbose=args.verbose)
break_up_diagnoses_by_subject(phenotypes, args.output_path, subjects=subjects, verbose=args.verbose)
items_to_keep = set(
    [int(itemid) for itemid in dataframe_from_csv(args.itemids_file)['ITEMID'].unique()]) if args.itemids_file else None
for table in args.event_tables:
    read_events_table_and_break_up_by_subject(args.mimic3_path, table, args.output_path, items_to_keep=items_to_keep,
示例#2
0
          stays.SUBJECT_ID.unique().shape[0])

stays.to_csv(os.path.join(args.output_path, 'all_stays.csv'), index=False)
diagnoses = read_icd_diagnoses_table(args.mimic3_path)
diagnoses = filter_diagnoses_on_stays(diagnoses, stays)
diagnoses.to_csv(os.path.join(args.output_path, 'all_diagnoses.csv'),
                 index=False)
count_icd_codes(diagnoses,
                output_path=os.path.join(args.output_path,
                                         'diagnosis_counts.csv'))

phenotypes = add_hcup_ccs_2015_groups(
    diagnoses, yaml.load(open(args.phenotype_definitions, 'r')))
make_phenotype_label_matrix(phenotypes,
                            stays).to_csv(os.path.join(args.output_path,
                                                       'phenotype_labels.csv'),
                                          index=False,
                                          quoting=csv.QUOTE_NONNUMERIC)

if args.test:
    pat_idx = np.random.choice(patients.shape[0], size=1000)
    patients = patients.iloc[pat_idx]
    stays = stays.merge(patients[['SUBJECT_ID']],
                        left_on='SUBJECT_ID',
                        right_on='SUBJECT_ID')
    args.event_tables = [args.event_tables[0]]
    print('Using only', stays.shape[0], 'stays and only', args.event_tables[0],
          'table')

subjects = stays.SUBJECT_ID.unique()
break_up_stays_by_subject(stays,
示例#3
0
diagnoses_df = read_icd_diagnoses_table_df(args.mimic3_path, sqlContext)
diagnoses = filter_diagnoses_on_stays(diagnoses, stays)
diagnoses_df = filter_diagnoses_on_stays_df(diagnoses_df, stays_df)
# diagnoses.to_csv(os.path.join(args.output_path, 'all_diagnoses.csv'), index=False)
codes_df = count_icd_codes(diagnoses,
                           output_path=os.path.join(args.output_path,
                                                    'diagnosis_counts.csv'))
# codes_df = count_icd_codes_df(diagnoses_df, output_path=os.path.join(args.output_path, 'diagnosis_counts.csv'))
#
phenotypes = add_hcup_ccs_2015_groups(
    diagnoses, yaml.load(open(args.phenotype_definitions, 'r')))
# phenotypes_df = add_hcup_ccs_2015_groups(diagnoses, yaml.load(open(args.phenotype_definitions, 'r')))
# phenotypes_df = spark.createDataFrame(phenotypes_df)
# make_phenotype_label_matrix(phenotypes, stays).to_csv(os.path.join(args.output_path, 'phenotype_labels.csv'),
#                                                       index=False, quoting=csv.QUOTE_NONNUMERIC)
phenotypes_label_df = make_phenotype_label_matrix(phenotypes, stays)
phenotypes_df = spark.createDataFrame(phenotypes)
diagnoses_df = spark.createDataFrame(diagnoses)
codes_df = spark.createDataFrame(codes_df)
# stays_df = spark.createDataFrame(stays)
#
# if args.test:
#     pat_idx = np.random.choice(patients.shape[0], size=1000)
#     patients = patients.iloc[pat_idx]
#     stays = stays.merge(patients[['SUBJECT_ID']], left_on='SUBJECT_ID', right_on='SUBJECT_ID')
#     args.event_tables = [args.event_tables[0]]
#     print('Using only', stays.shape[0], 'stays and only', args.event_tables[0], 'table')

subjects = stays.SUBJECT_ID.unique()

start = time.time()