def main(): from keras.models import Model from sklearn.model_selection import KFold from data_extraction.utils import normalize_data_mort as normalize_data from models import data_reader parser = argparse.ArgumentParser() args = parser.parse_args() config = Config(args) df_data = data_extraction_mortality(config) print(f"Data shape: {df_data.shape}") print(f"Unique PIDs: {len(df_data.patientunitstayid.unique())}") all_idx = np.array(list(df_data['patientunitstayid'].unique())) skf = KFold(n_splits=config.k_fold) train_idx, test_idx = next(skf.split(all_idx)) train_idx = all_idx[train_idx] test_idx = all_idx[test_idx] train, test = normalize_data(config, df_data, train_idx, test_idx) train_gen, train_steps, (X_test, Y_test), max_time_step_test \ = data_reader.read_data(config, train, test, val=False) batch = next(train_gen) print("Printing training data shape:") for b in batch: print("\t len", len(b)) for b_i in b[:10]: print("\t\t", b_i.shape) if len(b) > 10: print("\t\t...") print("Test data shape:") print(X_test.shape, Y_test.shape)
def train_phen(config): from data_extraction.utils import normalize_data_phe as normalize_data from data_extraction.data_extraction_phenotyping import data_extraction_phenotyping df_data, df_label = data_extraction_phenotyping(config) df = df_data.merge(df_label.drop(columns=['itemoffset']), on='patientunitstayid') all_idx = np.array(list(df['patientunitstayid'].unique())) phen_auc = [] phen_aucs = [] skf = KFold(n_splits=config.k_fold) for fold_id, (train_idx, test_idx) in enumerate(skf.split(all_idx)): print('Running Fold {}...'.format(fold_id+1)) train_idx = all_idx[train_idx] test_idx = all_idx[test_idx] train, test = normalize_data(config, df,train_idx, test_idx) train_gen, train_steps, (X_test, Y_test), max_time_step_test = data_reader.read_data(config, train, test, val=False) model = network(config, 200, output_dim=25, activation='sigmoid') history = model.fit_generator(train_gen,steps_per_epoch=25, epochs=config.epochs,verbose=1,shuffle=True) if config.num and config.cat: if config.ohe: x_cat = X_test[:, :, :7].astype(int) x_nc = X_test[:,:,7:] print("Please wait, One-hot encoding ...") one_hot = np.zeros((x_cat.shape[0], x_cat.shape[1], 429), dtype=np.int) x_cat = (np.eye(config.n_cat_class)[x_cat].sum(2) > 0).astype(int) probas_phen = model.predict([x_nc, x_cat]) #todo Replace np.eye with faster function else: probas_phen = model.predict([X_test[:,:,7:],X_test[:,:,:7]]) elif config.cat: if config.ohe: x_cat = X_test[:, :, :].astype(int) print("Please wait, One-hot encoding ...") one_hot = np.zeros((x_cat.shape[0], x_cat.shape[1], 429), dtype=np.int) x_cat = (np.eye(config.n_cat_class)[x_cat].sum(2) > 0).astype(int) probas_phen = model.predict([x_cat]) #todo Replace np.eye with faster function else: probas_phen = model.predict([X_test]) else: probas_phen = model.predict([X_test]) phen_auc = evaluation.multi_label_metrics(Y_test,probas_phen) phen_aucs.append(phen_auc) aucs_mean = np.mean(np.array(phen_aucs),axis=0) aucs_std = np.std(np.array(phen_aucs),axis=0) for i in range(len(config.col_phe)): print("{0} : {1:0.3f} +- {2:0.3f}".format(config.col_phe[i],aucs_mean[i],aucs_std[i])) return {'AUROC mean': aucs_mean, 'AUROC std': aucs_std}
def rlos(config): from data_extraction.utils import normalize_data_rlos as normalize_data from data_extraction.data_extraction_rlos import data_extraction_rlos df_data = data_extraction_rlos(config) all_idx = np.array(list(df_data['patientunitstayid'].unique())) r2s= [] mses = [] maes = [] skf = KFold(n_splits=2) for train_idx, test_idx in skf.split(all_idx): train_idx = all_idx[train_idx] test_idx = all_idx[test_idx] if config.num and config.cat: train, test = normalize_data(config, df_data,train_idx, test_idx, cat=config.cat, num=config.num) train_gen, train_steps, (X_test, Y_test), max_time_step_test = data_reader.data_reader_for_model_rlos(config, train, test,numerical=config.num, categorical=config.cat, batch_size=1024, val=False) # elif config.num and not config.cat: # train, test = normalize_data(config, df_data,train_idx, test_idx, cat=config.cat, num=config.num) # train_gen, train_steps, (X_test, Y_test), max_time_step_test = data_reader.data_reader_for_model_rlos(config, train, test,numerical=config.num, categorical=config.cat, batch_size=1024, val=False) # elif not config.num and config.cat: # train, test = normalize_data(config, df_data,train_idx, test_idx, cat=config.cat, num=config.num) # train_gen, train_steps, (X_test, Y_test), max_time_step_test = data_reader.data_reader_for_model_rlos(config, train, test, numerical=config.num, categorical=config.cat, batch_size=1024, val=False) model = base_dec(input_size=200, catg_len=429, embedding_dim=5, numerical=config.num, categorical=config.cat,ann=config.ann) history = model.fit_generator(train_gen,steps_per_epoch=25, epochs=config.epochs,verbose=1,shuffle=True) if config.num and config.cat: probas_rlos = model.predict([X_test[:,:,7:],X_test[:,:,:7]]) # elif config.num and not config.cat: # probas_rlos = model.predict([X_test]) # elif not config.num and config.cat: # probas_rlos = model.predict([X_test]) r2,mse,mae = evaluation.regression_metrics(Y_test,probas_rlos,max_time_step_test) r2s.append(r2) mses.append(mse) maes.append(mae) meanr2s = np.mean(r2s) meanmses = np.mean(mses) meanmaes = np.mean(maes) stdr2s = np.std(r2s) stdmses = np.std(mses) stdmaes = np.std(maes) print("===========================RLOS=============================") print("R2 total: {0:0.3f} +- {1:0.3f} ".format(meanr2s,stdr2s)) print("MSE total: {0:0.3f} +- {1:0.3f}".format(meanmses,stdmses)) print("MAE total:{0:0.3f} +- {1:0.3f}".format(meanmaes,stdmaes))
def _get_bench_data(cfg): df_data = data_extraction_mortality(bm_config) all_idx = np.array(list(df_data['patientunitstayid'].unique())) skf = KFold(n_splits=bm_config.k_fold) folds = skf.split(all_idx) train_idx, test_idx = next(folds) train_idx = all_idx[train_idx] test_idx = all_idx[test_idx] train, test = normalize_data(bm_config, df_data, train_idx, test_idx) _, _, (X_test, Y_test), _ = data_reader.read_data(bm_config, train, test, val=False) return X_test, Y_test
def train_rlos(config): from data_extraction.utils import normalize_data_rlos as normalize_data from data_extraction.data_extraction_rlos import data_extraction_rlos df_data = data_extraction_rlos(config) all_idx = np.array(list(df_data['patientunitstayid'].unique())) r2s = [] mses = [] maes = [] skf = KFold(n_splits=config.k_fold) for fold_id, (train_idx, test_idx) in enumerate(skf.split(all_idx)): print('Running Fold {}...'.format(fold_id + 1)) train_idx = all_idx[train_idx] test_idx = all_idx[test_idx] train, test = normalize_data(config, df_data, train_idx, test_idx) train_gen, train_steps, ( X_test, Y_test), max_time_step_test = data_reader.read_data(config, train, test, val=False) model = network(config, 200, output_dim=1, activation='relu') history = model.fit_generator(train_gen, steps_per_epoch=25, epochs=config.epochs, verbose=1, shuffle=True) if config.num and config.cat: if config.ohe: x_cat = X_test[:, :, :7].astype(int) x_nc = X_test[:, :, 7:] print("Please wait, One-hot encoding ...") one_hot = np.zeros((x_cat.shape[0], x_cat.shape[1], 429), dtype=np.int) x_cat = (np.eye(config.n_cat_class)[x_cat].sum(2) > 0).astype(int) probas_rlos = model.predict([x_nc, x_cat]) #todo Replace np.eye with faster function else: probas_rlos = model.predict( [X_test[:, :, 7:], X_test[:, :, :7]]) elif config.cat: if config.ohe: x_cat = X_test[:, :, :].astype(int) print("Please wait, One-hot encoding ...") one_hot = np.zeros((x_cat.shape[0], x_cat.shape[1], 429), dtype=np.int) x_cat = (np.eye(config.n_cat_class)[x_cat].sum(2) > 0).astype(int) probas_rlos = model.predict([x_cat]) #todo Replace np.eye with faster function else: probas_rlos = model.predict([X_test]) else: probas_rlos = model.predict([X_test]) r2, mse, mae = evaluation.regression_metrics(Y_test, probas_rlos, max_time_step_test) r2s.append(r2) mses.append(mse) maes.append(mae) meanr2s = np.mean(r2s) meanmses = np.mean(mses) meanmaes = np.mean(maes) stdr2s = np.std(r2s) stdmses = np.std(mses) stdmaes = np.std(maes) print("===========================RLOS=============================") print("R2 total: {0:0.3f} +- {1:0.3f} ".format(meanr2s, stdr2s)) print("MSE total: {0:0.3f} +- {1:0.3f}".format(meanmses, stdmses)) print("MAE total:{0:0.3f} +- {1:0.3f}".format(meanmaes, stdmaes)) return { 'R2 mean': meanr2s, 'R2 std': stdr2s, 'MSE mean': meanmses, 'MSE std': stdmses, 'MAE mean': meanmaes, 'MAE std': stdmaes }
def train_dec(config): from data_extraction.utils import normalize_data_dec as normalize_data from data_extraction.data_extraction_decompensation import data_extraction_decompensation as extract_data df_data = extract_data(config) cvscores_dec = [] tprs_dec = [] aucs_dec = [] mean_fpr_dec = np.linspace(0, 1, 100) i_dec = 0 ppvs_dec = [] npvs_dec = [] aucprs_dec = [] mccs_dec = [] specat90_dec = [] all_idx = np.array(list(df_data['patientunitstayid'].unique())) skf = KFold(n_splits=config.k_fold) for fold_id, (train_idx, test_idx) in enumerate(skf.split(all_idx)): print('Running Fold {}...'.format(fold_id + 1)) train_idx = all_idx[train_idx] test_idx = all_idx[test_idx] train, test = normalize_data(config, df_data, train_idx, test_idx) train_gen, train_steps, ( X_test, Y_test), max_time_step_test = data_reader.read_data(config, train, test, val=False) model = network(config, 200, output_dim=1, activation='sigmoid') history = model.fit_generator(train_gen, steps_per_epoch=25, epochs=config.epochs, verbose=1, shuffle=True) if config.num and config.cat: if config.ohe: x_cat = X_test[:, :, :7].astype(int) x_nc = X_test[:, :, 7:] print("Please wait, One-hot encoding ...") one_hot = np.zeros((x_cat.shape[0], x_cat.shape[1], 429), dtype=np.int) x_cat = (np.eye(config.n_cat_class)[x_cat].sum(2) > 0).astype(int) probas_dec = model.predict([x_nc, x_cat]) #todo Replace np.eye with faster function else: probas_dec = model.predict( [X_test[:, :, 7:], X_test[:, :, :7]]) elif config.cat: if config.ohe: x_cat = X_test[:, :, :].astype(int) print("Please wait, One-hot encoding ...") one_hot = np.zeros((x_cat.shape[0], x_cat.shape[1], 429), dtype=np.int) x_cat = (np.eye(config.n_cat_class)[x_cat].sum(2) > 0).astype(int) probas_dec = model.predict([x_cat]) #todo Replace np.eye with faster function else: probas_dec = model.predict([X_test]) else: probas_dec = model.predict([X_test]) Y_test, probas_dec = evaluation.decompensation_metrics( Y_test, probas_dec, max_time_step_test) fpr_dec, tpr_dec, thresholds = roc_curve(Y_test, probas_dec) specat90_dec.append(1 - fpr_dec[tpr_dec >= 0.90][0]) tprs_dec.append(interp(mean_fpr_dec, fpr_dec, tpr_dec)) tprs_dec[-1][0] = 0.0 roc_auc_dec = auc(fpr_dec, tpr_dec) aucs_dec.append(roc_auc_dec) TN, FP, FN, TP = confusion_matrix(Y_test, probas_dec.round()).ravel() PPV = TP / (TP + FP) NPV = TN / (TN + FN) ppvs_dec.append(PPV) npvs_dec.append(NPV) average_precision_dec = average_precision_score(Y_test, probas_dec) aucprs_dec.append(average_precision_dec) mccs_dec.append(matthews_corrcoef(Y_test, probas_dec.round())) mean_tpr_dec = np.mean(tprs_dec, axis=0) mean_tpr_dec[-1] = 1.0 mean_auc_dec = auc(mean_fpr_dec, mean_tpr_dec) std_auc_dec = np.std(aucs_dec) print("====================Decompensation================") print("Mean AUC{0:0.3f} +- STD{1:0.3f}".format(mean_auc_dec, std_auc_dec)) print("PPV: {0:0.3f}".format(np.mean(ppvs_dec))) print("NPV: {0:0.3f}".format(np.mean(npvs_dec))) print("AUCPR:{0:0.3f}".format(np.mean(aucprs_dec))) print("MCC: {0:0.3f}".format(np.mean(mccs_dec))) print("Spec@90: {0:0.3f}".format(np.mean(specat90_dec))) return { 'Mean AUC': mean_auc_dec, 'STD': std_auc_dec, 'PPV': np.mean(ppvs_dec), 'NPV': np.mean(npvs_dec), 'AUCPR': np.mean(aucprs_dec), 'MCC': np.mean(mccs_dec), 'Spec@90': np.mean(specat90_dec) }
def mort(config): cvscores_mort = [] tprs_mort = [] aucs_mort = [] mean_fpr_mort = np.linspace(0, 1, 100) i_mort = 0 ppvs_mort = [] npvs_mort = [] aucprs_mort = [] mccs_mort = [] specat90_mort = [] from data_extraction.data_extraction_mortality import data_extraction_mortality from data_extraction.utils import normalize_data_mort as normalize_data df_data = data_extraction_mortality(config) all_idx = np.array(list(df_data['patientunitstayid'].unique())) skf = KFold(n_splits=2) for train_idx, test_idx in skf.split(all_idx): train_idx = all_idx[train_idx] test_idx = all_idx[test_idx] if config.num and config.cat: train, test = normalize_data(config, df_data,train_idx, test_idx, cat=config.cat, num=config.num) train_gen, train_steps, (X_test, Y_test), max_time_step_test = data_reader.data_reader_for_model_mort(config, train, test,numerical=config.num, categorical=config.cat, batch_size=1024, val=False) elif config.num and not config.cat: train, test = normalize_data(config, df_data,train_idx, test_idx, cat=config.cat, num=config.num) train_gen, train_steps, (X_test, Y_test), max_time_step_test = data_reader.data_reader_for_model_mort(config, train, test,numerical=config.num, categorical=config.cat, batch_size=1024, val=False) elif not config.num and config.cat: train, test = normalize_data(config, df_data,train_idx, test_idx, cat=config.cat, num=config.num) train_gen, train_steps, (X_test, Y_test), max_time_step_test = data_reader.data_reader_for_model_mort(config, train, test, numerical=config.num, categorical=config.cat, batch_size=1024, val=False) model = base_mort(input_size=200, numerical=config.num, categorical=config.cat,ann=config.ann,ohe=config.ohe) history = model.fit_generator(train_gen,steps_per_epoch=25, epochs=config.epochs,verbose=1,shuffle=True) if config.num and config.cat: if config.ohe: x_cat = X_test[:, :, :7].astype(int) x_nc = X_test[:,:,7:] print("Please wait, One-hot encoding ...") one_hot = np.zeros((x_cat.shape[0], x_cat.shape[1], 429), dtype=np.int) x_cat = (np.eye(429)[x_cat].sum(2) > 0).astype(int) probas_mort = model.predict([x_nc, x_cat]) #todo Replace np.eye with faster function else: probas_mort = model.predict([X_test[:,:,7:],X_test[:,:,:7]]) elif config.num and not config.cat: probas_mort = model.predict([X_test]) elif not config.num and config.cat: probas_mort = model.predict([X_test]) fpr_mort, tpr_mort, thresholds = roc_curve(Y_test, probas_mort) tprs_mort.append(interp(mean_fpr_mort, fpr_mort, tpr_mort)) tprs_mort[-1][0] = 0.0 roc_auc_mort = auc(fpr_mort, tpr_mort) aucs_mort.append(roc_auc_mort) TN,FP,FN,TP = confusion_matrix(Y_test,probas_mort.round()).ravel() PPV = TP/(TP+FP) NPV = TN/(TN+FN) ppvs_mort.append(PPV) npvs_mort.append(NPV) average_precision_mort = average_precision_score(Y_test,probas_mort) aucprs_mort.append(average_precision_mort) mccs_mort.append(matthews_corrcoef(Y_test, probas_mort.round())) specat90_mort.append(1-fpr_mort[tpr_mort>=0.90][0]) mean_tpr_mort = np.mean(tprs_mort, axis=0) mean_tpr_mort[-1] = 1.0 mean_auc_mort = auc(mean_fpr_mort, mean_tpr_mort) std_auc_mort = np.std(aucs_mort) print("===========================Mortality=============================") print("Mean AUC {0:0.3f} +- STD{1:0.3f}".format(mean_auc_mort,std_auc_mort)) print("PPV: {0:0.3f}".format(np.mean(ppvs_mort))) print("NPV: {0:0.3f}".format(np.mean(npvs_mort))) print("AUCPR:{0:0.3f}".format(np.mean(aucprs_mort))) print("MCC: {0:0.3f}".format(np.mean(mccs_mort))) print("Spec@90: {0:0.3f}".format(np.mean(specat90_mort)))