def test_time(input_folder): seed = 2222 standardize_method = 'z' is_cz = False freq_list = [ '001', '002', '003', '004', '005', '006', '008', '009', '01', '011' ] freq_to_trainFreq_map = { '001': '001', '002': '002', '003': '004', '004': '005', '005': '007', '006': '008', '008': '01', '009': '011', '01': '013', '011': '014' } nel_graph_length = 13 e = run.Experiment('%s/%s' % (input_folder, standardize_method), '%s/dataset' % (input_folder), seed, is_cz, standardize_method, freq_list, freq_to_trainFreq_map, nel_graph_length) for foldi in range(5): ftrnel = "%s/mimic_train_fold%d.nel" % (e.cdn, foldi) ftrnode = "%s/mimic_train_fold%d.node" % (e.cdn, foldi) fnel = "%s/mimic_fold%d.nel" % (e.cdn, foldi) fnode = "%s/mimic_fold%d.node" % (e.cdn, foldi) e.subgraph_mining(tr_nel=ftrnel, tr_te_nel=fnel, freq_t='011', foldi=foldi)
def check_interpolation_and_subgraphs(): ft = 'raw' minp = 0.5 minc = 0.6 seed = 2222 standardize_method = "cz" is_cz = True # standardize_method = "z" # is_cz = False freq_list = [ '001', '002', '003', '004', '005', '006', '008', '009', '01', '011' ] freq_to_trainFreq_map = { '001': '001', '002': '002', '003': '004', '004': '005', '005': '007', '006': '008', '008': '01', '009': '011', '01': '013', '011': '014' } nel_graph_length = 13 fout = '../observer/check_interpolation_and_subgraphs/seed%s_%s_mice_mp%s_mc%s_%s' % ( seed, ft, minp, minc, standardize_method) cu.checkAndCreate(fout) cu.checkAndCreate('../data/seed%s/%s/mice/mp%s_mc%s/%s' % (seed, ft, minp, minc, standardize_method)) e = run.Experiment( '../data/seed%s/%s/mice/mp%s_mc%s/%s' % (seed, ft, minp, minc, standardize_method), '../data/seed%s/%s/mice/mp%s_mc%s/dataset' % (seed, ft, minp, minc), seed, is_cz, standardize_method, freq_list, freq_to_trainFreq_map, nel_graph_length) foldi = 2 train = e.ftrain % (e.dataset_folder, foldi, e.standardize_method) test = e.ftest % (e.dataset_folder, foldi, e.standardize_method) print train print test ftrnel = "%s/mimic_train_fold%d.nel" % (fout, foldi) ftrnode = "%s/mimic_train_fold%d.node" % (fout, foldi) fnel = "%s/mimic_fold%d.nel" % (fout, foldi) fnode = "%s/mimic_fold%d.node" % (fout, foldi) # e.interpolation(trcsv=train, tecsv=test, ftrnel=ftrnel, ftrnode=ftrnode, fnel=fnel, fnode=fnode) e.subgraph_mining(tr_nel=ftrnel, tr_te_nel=fnel, freq_t='011', foldi=foldi, cfolder=fout)
def check_nmfClassify(input_folder, output_folder, isg, freq_t, nc, c, pl, cw, ntestth): seed = 2222 standardize_method = 'z' is_cz = False freq_list = [ '001', '002', '003', '004', '005', '006', '008', '009', '01', '011' ] freq_to_trainFreq_map = { '001': '001', '002': '002', '003': '004', '004': '005', '005': '007', '006': '008', '008': '01', '009': '011', '01': '013', '011': '014' } nel_graph_length = 13 e = run.Experiment('%s/%s' % (input_folder, standardize_method), '%s/dataset' % (input_folder), seed, is_cz, standardize_method, freq_list, freq_to_trainFreq_map, nel_graph_length) res_list = [] for foldi in range(5): prediction_matrics = e.read_prediction_matrics( isg, freq_t, cfolder='%s/isg%d/same_freq_t/pt_sg_w' % (e.cdn, isg)) res = e.nmfClassify( prediction_matrics['ptsg'][foldi], prediction_matrics['ptwd'][foldi], prediction_matrics['sgs'][foldi], prediction_matrics['pt'][foldi], prediction_matrics['gt'][foldi], '%s/isg%d/nmf_piks/nmf_%s_fold%d_%d_ramdon0-again.pik' % (output_folder, isg, freq_t, foldi, nc), ntestth, foldi, nc, c, pl, cw) res_list.append(res) (auc, tr_auc) = e.get_mean_auc(res_list) print auc, tr_auc
def run_best_model(cdn): ft = 'raw' seed = 2222 standardize_method = 'z' is_cz = False cu.checkAndCreate('%s/seed%d' % (cdn, seed)) pp.split_nfolds('%s/alldata_readmit.csv' % cdn, '%s/seed%d/alldata_readmit' % (cdn, seed), shuffle=True, seed=seed) pp.split_by_feature_type(cdn='%s/seed%d' % (cdn, seed), fn_prefix='%s/seed%d/alldata_readmit' % (cdn, seed)) cu.checkAndCreate('%s/seed%d/raw/interp' % (cdn, seed)) cu.checkAndCreate('%s/seed%d/raw/interp/mean/dataset' % (cdn, seed)) for i in range(5): pp.impute_by_interpolation_on_last12h( '%s/seed%d/raw/test_fold%d.csv' % (cdn, seed, i), '%s/seed%d/raw/interp/test_fold%d.csv' % (cdn, seed, i), '%s/seed%d/raw/interp/extrapolation_log_test_fold%d.txt' % (cdn, seed, i)) pp.impute_by_interpolation_on_last12h( '%s/seed%d/raw/train_fold%d.csv' % (cdn, seed, i), '%s/seed%d/raw/interp/train_fold%d.csv' % (cdn, seed, i), '%s/seed%d/raw/interp/extrapolation_log_train_fold%d.txt' % (cdn, seed, i)) pp.impute_by_mean( '%s/seed%d/raw/interp/train_fold%d.csv' % (cdn, seed, i), '%s/seed%d/raw/interp/test_fold%d.csv' % (cdn, seed, i), '%s/seed%d/raw/interp/mean/dataset/train_fold%d.csv' % (cdn, seed, i), '%s/seed%d/raw/interp/mean/dataset/test_fold%d.csv' % (cdn, seed, i)) pp.standardize_data( '%s/seed%d/raw/interp/mean/dataset/train_fold%d.csv' % (cdn, seed, i), '%s/seed%d/raw/interp/mean/dataset/test_fold%d.csv' % (cdn, seed, i), '%s/seed%d/raw/interp/mean/dataset/train_fold%d_%s.csv' % (cdn, seed, i, standardize_method), '%s/seed%d/raw/interp/mean/dataset/test_fold%d_%s.csv' % (cdn, seed, i, standardize_method)) # run temporal model freq_list = ['011'] freq_to_trainFreq_map = {'011': '014'} nel_graph_length = 13 cu.checkAndCreate('%s/seed%d/%s/interp/mean/%s' % (cdn, seed, ft, standardize_method)) e = rn.Experiment( '%s/seed%d/%s/interp/mean/%s' % (cdn, seed, ft, standardize_method), '%s/seed%d/%s/interp/mean/dataset' % (cdn, seed, ft), seed, is_cz, standardize_method, freq_list, freq_to_trainFreq_map, nel_graph_length) isg = 0 freq_t = '011' nc = 110 c = 2 pl = 'l1' cw = 'balanced' ntestth = 2 cu.checkAndCreate('%s/isg%d' % (e.cdn, isg)) cu.checkAndCreate('%s/isg%d/pt_sg_w' % (e.cdn, isg)) cu.checkAndCreate('%s/isg%d/res' % (e.cdn, isg)) cu.checkAndCreate('%s/isg%d/nmf_piks' % (e.cdn, isg)) for foldi in range(5): train = e.ftrain % (e.dataset_folder, foldi, e.standardize_method) test = e.ftest % (e.dataset_folder, foldi, e.standardize_method) print train print test ftrnel = "%s/mimic_train_fold%d.nel" % (e.cdn, foldi) ftrnode = "%s/mimic_train_fold%d.node" % (e.cdn, foldi) fnel = "%s/mimic_fold%d.nel" % (e.cdn, foldi) fnode = "%s/mimic_fold%d.node" % (e.cdn, foldi) e.interpolation(trcsv=train, tecsv=test, ftrnel=ftrnel, ftrnode=ftrnode, fnel=fnel, fnode=fnode) e.get_freq_to_trainFreq_map(foldi) for freq_t in e.moss_freq_threshold_list: e.subgraph_mining(tr_nel=ftrnel, tr_te_nel=fnel, freq_t=freq_t, foldi=foldi) e.gen_pt_sg_files(isg, freq_t, foldi) cu.checkAndCreate('%s/seed%d/raw/interp/mean/last_measures/dataset' % (cdn, seed)) # run baseline model for i in range(5): pp.get_last_measurements( '%s/seed%d/raw/interp/mean/dataset/train_fold%d_%s.csv' % (cdn, seed, i, standardize_method), '%s/seed%d/raw/interp/mean/last_measures/dataset/train_fold%d_%s.csv' % (cdn, seed, i, standardize_method)) pp.get_last_measurements( '%s/seed%d/raw/interp/mean/dataset/test_fold%d_%s.csv' % (cdn, seed, i, standardize_method), '%s/seed%d/raw/interp/mean/last_measures/dataset/test_fold%d_%s.csv' % (cdn, seed, i, standardize_method)) best_features = rfe( '%s/seed%d/raw/interp/mean/last_measures' % (cdn, seed), 50, standardize_method, 5, 'l1', 'balanced') print best_features # best_features = ['urineByHrByWeight', 'HCT', 'INR', 'Platelets', 'RBC', # 'DeliveredTidalVolume', 'PlateauPres', 'RAW', 'RSBI', 'mDBP', 'CV_HR', # 'Art_BE', 'Art_CO2', 'Art_PaCO2', 'Art_pH', 'Cl', 'Mg', 'Anticoagulant', # 'beta.Blocking_agent', 'Somatostatin_preparation', 'Vasodilating_agent', # 'AIDS', 'MetCarcinoma'] baseline_auc = lr('%s/seed%d/raw/interp/mean/last_measures' % (cdn, seed), standardize_method, 5, 'l1', 'balanced', 50) print 'baseline AUC: %s' % baseline_auc res_list = [] for foldi in range(5): fnaddtr = '../data/seed2222/raw/interp/mean/last_measures/dataset/train_fold%d_%s.csv' % ( foldi, standardize_method) fnaddte = '../data/seed2222/raw/interp/mean/last_measures/dataset/test_fold%d_%s.csv' % ( foldi, standardize_method) prediction_matrics = e.read_prediction_matrics(isg, freq_t) (res, gt_te, pt_te, res_baseline) = e.nmfClassify_ob( prediction_matrics['ptsg'][foldi], prediction_matrics['ptwd'][foldi], prediction_matrics['sgs'][foldi], prediction_matrics['pt'][foldi], prediction_matrics['gt'][foldi], '%s/isg%d/nmf_piks/nmf_%s_fold%d_%d.pik' % (e.cdn, isg, freq_t, foldi, nc), ntestth, foldi, nc, c, pl, cw, fnaddtr, fnaddte, best_features) res_list.append(res) (auc, tr_auc) = e.get_mean_auc(res_list) print auc, tr_auc for i in range(len(res_list)): with open( '../data/seed2222/raw/interp/mean/z/isg0/res/c_pre_te_fold%d' % i, 'wb') as f: pickle.dump(res_list[i]['c_pre_te'], f) with open('../data/seed2222/raw/interp/mean/z/isg0/res/res_fold%d' % i, 'wb') as f: pickle.dump(res_list[i], f)
def get_res_and_oc(input_folder, output_folder): seed = 2222 standardize_method = 'z' is_cz = False freq_list = [ '001', '002', '003', '004', '005', '006', '008', '009', '01', '011' ] freq_to_trainFreq_map = { '001': '001', '002': '002', '003': '004', '004': '005', '005': '007', '006': '008', '008': '01', '009': '011', '01': '013', '011': '014' } nel_graph_length = 13 e = run.Experiment('%s/%s' % (input_folder, standardize_method), '%s/dataset' % (input_folder), seed, is_cz, standardize_method, freq_list, freq_to_trainFreq_map, nel_graph_length) isg = 0 freq_t = '011' # foldi = 0 nc = 110 c = 2 pl = 'l1' cw = 'balanced' ntestth = 2 best_features = [ 'urineByHrByWeight', 'HCT', 'INR', 'Platelets', 'RBC', 'DeliveredTidalVolume', 'PlateauPres', 'RAW', 'RSBI', 'mDBP', 'CV_HR', 'Art_BE', 'Art_CO2', 'Art_PaCO2', 'Art_pH', 'Cl', 'Mg', 'Anticoagulant', 'beta.Blocking_agent', 'Somatostatin_preparation', 'Vasodilating_agent', 'AIDS', 'MetCarcinoma' ] # res_list = [] # oc_list = [] # oa = [] # for foldi in range(5): # fnaddtr = '../../readmission_risk_baseline/data/seed2222/raw/interp/mean/last_measures/dataset/train_fold%d_%s.csv'%(foldi,standardize_method) # fnaddte = '../../readmission_risk_baseline/data/seed2222/raw/interp/mean/last_measures/dataset/test_fold%d_%s.csv'%(foldi,standardize_method) # prediction_matrics = e.read_prediction_matrics(isg,freq_t) # (res, gt_te, pt_te) = e.nmfClassify_ob(prediction_matrics['ptsg'][foldi], # prediction_matrics['ptwd'][foldi], # prediction_matrics['sgs'][foldi], # prediction_matrics['pt'][foldi], # prediction_matrics['gt'][foldi], # '%s/isg%d/nmf_piks/nmf_%s_fold%d_%d.pik'%(e.cdn,isg,freq_t,foldi,nc), # ntestth, foldi, nc, c, pl, cw, fnaddtr, fnaddte, best_features) # res_list.append(res) # oc_list.append(gt_te) # oa.append(res['n_pre_te']) # (auc, tr_auc) = e.get_mean_auc(res_list) # print auc, tr_auc res_list = [] ob = [] for foldi in range(5): prediction_matrics = e.read_prediction_matrics(isg, freq_t) res = e.nmfClassify( prediction_matrics['ptsg'][foldi], prediction_matrics['ptwd'][foldi], prediction_matrics['sgs'][foldi], prediction_matrics['pt'][foldi], prediction_matrics['gt'][foldi], '%s/isg%d/nmf_piks/nmf_%s_fold%d_%d.pik' % (e.cdn, isg, freq_t, foldi, nc), ntestth, foldi, nc, c, pl, cw) res_list.append(res) ob.append(res['n_pre_te']) (auc, tr_auc) = e.get_mean_auc(res_list) print auc, tr_auc
def error_analysis(input_folder, output_folder): ft = 'raw' minp = 0.5 minc = 0.6 seed = 2222 standardize_method = 'z' is_cz = False freq_list = [ '001', '002', '003', '004', '005', '006', '008', '009', '01', '011' ] freq_to_trainFreq_map = { '001': '001', '002': '002', '003': '004', '004': '005', '005': '007', '006': '008', '008': '01', '009': '011', '01': '013', '011': '014' } nel_graph_length = 13 # cu.checkAndCreate('../data/seed%s/%s/mice/mp%s_mc%s/%s'%(seed,ft,minp,minc,standardize_method)) e = run.Experiment('%s/%s' % (input_folder, standardize_method), '%s/dataset' % (input_folder), seed, is_cz, standardize_method, freq_list, freq_to_trainFreq_map, nel_graph_length) # e = run.Experiment('../data/seed%s/%s/mice/mp%s_mc%s/%s'%(seed,ft,minp,minc,standardize_method), # '../data/seed%s/%s/mice/mp%s_mc%s/dataset'%(seed,ft,minp,minc), # seed,is_cz,standardize_method,freq_list,freq_to_trainFreq_map,nel_graph_length) # NMF: isg = 0 freq_t = '011' nc = 110 c = 2 pl = 'l1' cw = 'balanced' ntestth = 2 # DirClassify: # isg = 0 # freq_t = '004' # c = 1 # pl = 'l1' # cw = 'balanced' # ntestth = 2 best_features = [ 'urineByHrByWeight', 'HCT', 'INR', 'Platelets', 'RBC', 'DeliveredTidalVolume', 'PlateauPres', 'RAW', 'RSBI', 'mDBP', 'CV_HR', 'Art_BE', 'Art_CO2', 'Art_PaCO2', 'Art_pH', 'Cl', 'Mg', 'Anticoagulant', 'beta.Blocking_agent', 'Somatostatin_preparation', 'Vasodilating_agent', 'AIDS', 'MetCarcinoma' ] res_list = [] # res_baseline_list = [] for foldi in range(5): fnaddtr = '../../readmission_risk_baseline/data/seed2222/raw/interp/mean/last_measures/dataset/train_fold%d_%s_t.csv' % ( foldi, standardize_method) fnaddte = '../../readmission_risk_baseline/data/seed2222/raw/interp/mean/last_measures/dataset/test_fold%d_%s_t.csv' % ( foldi, standardize_method) prediction_matrics = e.read_prediction_matrics(isg, freq_t) (res, gt_te, pt_te, res_baseline) = e.nmfClassify_ob( prediction_matrics['ptsg'][foldi], # res = e.nmfClassify(prediction_matrics['ptsg'][foldi], prediction_matrics['ptwd'][foldi], prediction_matrics['sgs'][foldi], prediction_matrics['pt'][foldi], prediction_matrics['gt'][foldi], '%s/isg%d/nmf_piks/nmf_%s_fold%d_%d.pik' % (e.cdn, isg, freq_t, foldi, nc), ntestth, foldi, nc, c, pl, cw, fnaddtr, fnaddte, best_features) # res = e.dirClassify(prediction_matrics['ptsg'][foldi], # # res = e.nmfClassify(prediction_matrics['ptsg'][foldi], # prediction_matrics['ptwd'][foldi], # prediction_matrics['sgs'][foldi], # prediction_matrics['pt'][foldi], # prediction_matrics['gt'][foldi], # ntestth, foldi, c, pl, cw) res_list.append(res) # res_baseline_list.append(res_baseline) # with open('%s/res_baseline_1170_list'%(output_folder),'wb') as f: # pickle.dump(res_baseline_list,f) (auc, tr_auc) = e.get_mean_auc(res_list) print auc, tr_auc # (auc, tr_auc) = e.get_mean_auc(res_baseline_list) # print auc, tr_auc # cu.checkAndCreate(output_folder) for i in range(len(res_list)): # i = 4 # with open('%s/gt_te_fold%d_t'%(output_folder,i),'wb') as f: # pickle.dump(gt_te,f) # with open('%s/pt_te_fold%d_t'%(output_folder,i),'wb') as f: # pickle.dump(pt_te,f) # with open('%s/pre_te_fold%d_t'%(output_folder,i),'wb') as f: # pickle.dump(res_list[i]['n_pre_te'],f) with open('%s/c_pre_te_fold%d' % (output_folder, i), 'wb') as f: pickle.dump(res_list[i]['c_pre_te'], f)