def main(): model_id = sys.argv[1] run_type = sys.argv[2] # ALL or SUBSET mcmc_id = sys.argv[3] # 119 for normal, 3 for DEMCMC n_iter = sys.argv[4] var_to_plot = sys.argv[5] assim_type = '_longadapted' cur_dir = os.getcwd() + '/' cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id + '/' files = glob.glob(cur_dir + cbr_dir + '*MCMC' + mcmc_id + '_' + n_iter + '_*.cbr') pixels = list(set([file[-10:-6] for file in files])) txt_file_dir = cur_dir + '../' txt_filename = 'global_map_parallel_' + model_id + '_MCMC' + mcmc_id + '_' + n_iter + '_' + var_to_plot + '.txt' txt_file = open(txt_file_dir + txt_filename, 'w') for pixel in pixels: txt_file.write( 'python3 scripts/global_map_parallel.py %s %s %s %s %s %s\n' % (model_id, run_type, mcmc_id, n_iter, var_to_plot, pixel)) txt_file.close() sh_file = open(txt_file_dir + txt_filename[:-3] + 'sh', 'w') autil.fill_in_sh(sh_file, array_size=len(pixels), n_hours=1, txt_file=txt_filename) return
def main(): # set run information to read model_id = sys.argv[1] mcmc_id = sys.argv[2] # 119 for normal, 3 for DEMCMC n_iter = sys.argv[3] nbe_optimization = sys.argv[4] # 'OFF' or 'ON' ens_size = 250 assim_type = sys.argv[5] n_chains_agg = 4 # set directories cur_dir = os.getcwd() + '/' misc_dir = cur_dir + '../../misc/' cbf_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/' cbr_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id + '/' plot_dir = cur_dir + '../../../../../../../scratch/users/cfamigli/cardamom/plots/' parnames = autil.get_parnames('../../misc/', model_id) # decide which tasks to perform find_rep_pixels = True agg_parameters = True submit_ic_opt = True submit_forward = False ############################## Identify and save representative pixels ################################################# n_reps = 5 if find_rep_pixels: # load globcover data gl = read_pickle(misc_dir + 'globcover_to_card.pkl') # load labels gl_lbls = list( read_csv(misc_dir + 'Globcover2009_Legend.csv')['Value'].values) n_classes = len(gl_lbls) print(gl_lbls) # load list of land pixels pixels = list( set([file[-8:-4] for file in glob.glob(cbf_dir + '*.cbf')])) pixels.sort() # open csv for save out f = open(misc_dir + 'globcover_fracs.csv', 'w') writer = csv.writer(f) writer.writerow( [item for sublist in [['pixel'], gl_lbls] for item in sublist]) # get list of average pft fractions by pixel av_fracs = np.ones((len(pixels), n_classes)) * np.nan types_present = [] for pixel in pixels: ind = pixels.index(pixel) if np.mod(ind, 100) == 0: print(ind) # get lc information types_at_geos_pixel, counts_at_geos_pixel = gl.loc[ gl['pixel'] == pixel]['types'].values[0][0], gl.loc[ gl['pixel'] == pixel]['counts'].values[0][0] types_at_geos_pixel, counts_at_geos_pixel = remove_nodata_pixels( types_at_geos_pixel, counts_at_geos_pixel) types_at_geos_pixel, counts_at_geos_pixel = append_all_types( types_at_geos_pixel, counts_at_geos_pixel, gl_lbls) types_at_geos_pixel, counts_at_geos_pixel = merge_types( types_at_geos_pixel, counts_at_geos_pixel, 170, 160) types_at_geos_pixel, counts_at_geos_pixel = merge_types( types_at_geos_pixel, counts_at_geos_pixel, 180, 160) types_present.append(types_at_geos_pixel[counts_at_geos_pixel > 0]) if np.sum(counts_at_geos_pixel) > 0: av_fracs[ind, :] = counts_at_geos_pixel / np.sum( counts_at_geos_pixel ) # average biome fraction across mstmip pixels within coarse pixel writer.writerow([ item for sublist in [[pixel], av_fracs[ind, :]] for item in sublist ]) #plot_pie(av_fracs[ind], pixel, gl_lbls, autil.rowcol_to_latlon([pixel]), plot_dir+'pie/', 'gl') reps, mxs, mxdoms = find_rep(av_fracs, pixels, n_reps) plot_reps(mxs, mxdoms, gl_lbls, plot_dir + 'pie/', 'rep_pix_gl_merge170+180to160') rep_df = fill_df(gl_lbls, reps, mxs, mxdoms) #rep_df.to_pickle(misc_dir+ 'rep_pixels_globcover.pkl') print(rep_df) f.close() ############################## Generate aggregated parameter sets ###################################################### ic_inds = autil.get_inds_ic(model_id) conv_chains = read_pickle(cbr_dir + model_id + assim_type + '_ALL' + '_MCMC' + mcmc_id + '_' + n_iter + '_best_subset.pkl') conv_chains.columns = ['pixel', 'bestchains', 'conv'] #rename columns for easier access if agg_parameters: #f_pft = open(misc_dir + 'pft/par_preds/par_set_agg_'+ model_id + assim_type+'_MCMC'+mcmc_id + '_'+n_iter + '.csv', 'w') #w_pft = csv.writer(f_pft) # load list of cbrs files = glob.glob(cbr_dir + '*MCMC' + mcmc_id + '_' + n_iter + '_*.cbr') files.sort() # get aggregated parameter sets from representative pixels par_set_agg = [] for pft in gl_lbls: print(pft) print('PFT: ' + str(pft)) # isolate row in dataframe corresponding to given pft rep_df_pft = rep_df.loc[rep_df['pft'] == int(pft)] # get list of pixels that are dominant rep_pixels_pft = [ rep_df_pft['reppix' + str(i)].values[0] for i in range(1, n_reps + 1) ] doms = [ rep_df_pft['reppix' + str(i) + 'fracdom'].values[0] for i in range(1, n_reps + 1) ] pixels_dom = [ pixel for pixel in rep_pixels_pft if doms[rep_pixels_pft.index(pixel)] == 1 ] if len(pixels_dom) > 0: par_set_agg.append( aggregate_parameter_sets(pixels_dom, files, parnames, ens_size, n_chains_agg, conv_chains)) else: par_set_agg.append( np.ones((ens_size * n_chains_agg, len(parnames))) * np.nan) #w_pft.writerow(np.nanmedian(par_set_agg[gl_lbls.index(pft)], axis=0)) #if np.sum(~np.isnan(par_set_agg[gl_lbls.index(pft)]))>0: autil.plot_par_histograms(par_set_agg[gl_lbls.index(pft)], parnames, savepath=plot_dir+'dists/', title='globcover_agg_PFT'+str(pft)+'_'+model_id+assim_type+'_'+mcmc_id+'_'+n_iter+'.pdf') #f_pft.close() ############################################################################################################################################ ################################### copy cbfs and substitute pars for IC optimization ###################################################### # set up cbfs for IC assimilation os.chdir(cbf_dir) cbf_files = glob.glob('*.cbf') cbf_files.sort() os.chdir(cur_dir + '/../') # set additional directories mdf_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_MDF/' if nbe_optimization == 'OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_MDF/' runmodel_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_GENERAL/' if nbe_optimization == 'OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_GENERAL/' cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/' cbf_pft_ic_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '_pft_ic/' + model_id + '/' cbr_pft_dir = '../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '_pft/' + model_id + '/' output_dir = '../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model_id + '/' output_pft_dir = '../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '_pft/' + model_id + '/' if mcmc_id == '119': frac_save_out = str(int(int(n_iter) / 500)) elif mcmc_id == '3': frac_save_out = str(int( int(n_iter) / 500 * 100)) # n_iterations/ frac_save_out * 100 will be ensemble size par_set_csv = read_csv(misc_dir + 'pft/par_preds/par_set_agg_' + model_id + assim_type + '_MCMC' + mcmc_id + '_' + n_iter + '.csv', header=None).values if submit_ic_opt: txt_filename = 'pft_ic_assim_list_' + model_id + assim_type + '_MCMC' + mcmc_id + '_' + n_iter + '.txt' txt_file = open(txt_filename, 'w') for cbf_file in cbf_files: print(cbf_file) cbf_data = rwb.read_cbf_file(cbf_dir + cbf_file) cbf_pixel = cbf_file[-8:-4] if cbf_pixel in conv_chains['pixel'].values: for pft in gl_lbls: if (int(pft) in types_present[pixels.index(cbf_pixel)]) & ( ~np.isnan( par_set_csv[gl_lbls.index(pft), :]).all()): par_set_agg_cbf = np.copy( par_set_csv[gl_lbls.index(pft), :]) # re-transform bday, fday to proper range par_set_agg_cbf[11] += 365.25 par_set_agg_cbf[14] += 365.25 parpriors = np.concatenate( (par_set_agg_cbf, np.ones(50 - len(parnames)) * -9999.)) parpriorunc = np.concatenate( (np.ones(len(parnames)) * 1.001, np.ones(50 - len(parnames)) * -9999.)) for ic_ind in ic_inds: parpriors[ic_ind] = -9999. parpriorunc[ic_ind] = -9999. if nbe_optimization == 'ON': parpriors[len(parnames) - 1] = -9999 parpriorunc[len(parnames) - 1] = -9999 cbf_data['PARPRIORS'] = parpriors.reshape(-1, 1) cbf_data['PARPRIORUNC'] = parpriorunc.reshape(-1, 1) f = cbf_file[: -9] + '_MCMC' + mcmc_id + '_' + n_iter + '_PFT' + str( pft) + '_assim_' + cbf_pixel #rwb.CARDAMOM_WRITE_BINARY_FILEFORMAT(cbf_data, cbf_pft_ic_dir + f +'.cbf') txt_file.write( '%sCARDAMOM_MDF.exe %s%s %s%s %s 0 %s 0.001 %s 1000' % (mdf_dir, cbf_pft_ic_dir[3:], f + '.cbf', cbr_pft_dir, f + '.cbr', n_iter, frac_save_out, mcmc_id)) txt_file.write('\n') if types_present[pixels.index( cbf_pixel)][-1] == int(pft) else txt_file.write( ' && ') txt_file.close() sh_file = open(txt_filename[:-3] + 'sh', 'w') autil.fill_in_sh(sh_file, array_size=len(conv_chains['pixel'].values), n_hours=48, txt_file=txt_filename, combined=True) if submit_forward: txt_filename = 'pft_ic_forward_list_' + model_id + assim_type + '_MCMC' + mcmc_id + '_' + n_iter + '.txt' txt_file = open(txt_filename, 'w') for cbf_file in cbf_files: print(cbf_file) cbf_data = rwb.read_cbf_file(cbf_dir + cbf_file) cbf_pixel = cbf_file[-8:-4] if cbf_pixel in conv_chains['pixel'].values: for pft in gl_lbls: if (int(pft) in types_present[pixels.index(cbf_pixel)]) & ( ~np.isnan( par_set_csv[gl_lbls.index(pft), :]).all()): f = cbf_file[: -9] + '_MCMC' + mcmc_id + '_' + n_iter + '_PFT' + str( pft) + '_assim_' + cbf_pixel if len(glob.glob(cbr_pft_dir + f + '.cbr')) > 0: cbr_assim = rwb.read_cbr_file( glob.glob(cbr_pft_dir + f + '.cbr')[0], {'nopars': len(parnames)}) ff = cbf_file[: -9] + '_MCMC' + mcmc_id + '_' + n_iter + '_PFT' + str( pft) + '_forward_' + cbf_pixel cbr_forward = par_set_csv[gl_lbls.index(pft), :] for ic_ind in ic_inds: cbr_forward[ic_ind] = np.nanmedian( cbr_assim[:, ic_ind]) cbr_forward = cbr_forward.reshape(1, len(parnames)) rwb.write_cbr_file(cbr_forward, cbr_pft_dir + ff + '.cbr') txt_file.write( '%sCARDAMOM_RUN_MODEL.exe %s%s %s%s %s%s %s%s %s%s %s%s' % (runmodel_dir, cbf_dir[3:], cbf_file, cbr_pft_dir, ff + '.cbr', output_pft_dir, 'fluxfile_' + ff + '.bin', output_pft_dir, 'poolfile_' + ff + '.bin', output_pft_dir, 'edcdfile_' + ff + '.bin', output_pft_dir, 'probfile_' + ff + '.bin')) txt_file.write('\n') if types_present[pixels.index( cbf_pixel)][-1] == int( pft) else txt_file.write(' && ') txt_file.close() sh_file = open(txt_filename[:-3] + 'sh', 'w') autil.fill_in_sh(sh_file, array_size=len(conv_chains['pixel'].values), n_hours=1, txt_file=txt_filename, combined=True) return
def main(): # get specifications from the user model_id = sys.argv[1] run_type = sys.argv[2] # ALL or SUBSET mcmc_id = sys.argv[3] # 119 for normal, 3 for DEMCMC assim_type = '_p25adapted' nbe_optimization = sys.argv[4] # 'OFF' or 'ON' n_iterations = sys.argv[5] runtime_assim = int(sys.argv[6]) n_chains = int(sys.argv[7]) separate_chains = sys.argv[8] # 0 or 1 chain_num = '_' + sys.argv[9] if int(separate_chains)==True else '' # set all directories cur_dir = os.getcwd() mdf_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_MDF/' if nbe_optimization=='OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_MDF/' runmodel_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_GENERAL/' if nbe_optimization=='OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_GENERAL/' cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf'+assim_type+'/' + model_id + '/' cbr_dir = '../../../../../scratch/users/cfamigli/cardamom/files/cbr'+assim_type+'/' + model_id + '/' output_dir = '../../../../../scratch/users/cfamigli/cardamom/files/output'+assim_type+'/' + model_id + '/' # set number of ensembles to save out if mcmc_id=='119': frac_save_out = str(int(int(n_iterations)/500)) elif mcmc_id=='3': frac_save_out = str(int(int(n_iterations)/500*100)) # n_iterations/ frac_save_out * 100 will be ensemble size # select which pixels to submit os.chdir(cbf_dir) if run_type=='ALL': cbf_files = glob.glob('*.cbf') elif run_type=='SUBSET_RANDOM': cbf_files = sample(glob.glob('*.cbf'), 10) elif run_type=='SUBSET_INPUT': cbf_files = select_cbf_files(glob.glob('*.cbf'), ['3809','3524','2224','4170','1945','3813','4054','3264','1271','3457']) os.chdir(cur_dir + '/../') # create separate assimilation and forward submission files for each separate chain if int(separate_chains)==True: assim_txt_filename = 'assimilation_list_' + model_id + '_' + run_type + assim_type+'_MCMC'+mcmc_id + '_'+n_iterations + chain_num+ '.txt' assim_txt_file = open(assim_txt_filename, 'w') forward_txt_filename = 'forward_list_' + model_id + '_' + run_type + assim_type+'_MCMC'+mcmc_id + '_'+n_iterations + chain_num+ '.txt' forward_txt_file = open(forward_txt_filename, 'w') for cbf_file in cbf_files: for chain in range(1,n_chains+1): c = chain_num if n_chains==1 else '_'+str(chain) assim_txt_file.write('%sCARDAMOM_MDF.exe %s%s %s%s %s 0 %s 0.001 %s 1000\n' % (mdf_dir, cbf_dir[3:], cbf_file, cbr_dir, cbf_file[:-8]+'MCMC'+mcmc_id+'_'+n_iterations+'_'+cbf_file[-8:-4]+ c +'.cbr', n_iterations, frac_save_out, mcmc_id)) forward_txt_file.write('%sCARDAMOM_RUN_MODEL.exe %s%s %s%s %s%s %s%s %s%s %s%s\n' % (runmodel_dir, cbf_dir[3:], cbf_file, cbr_dir, cbf_file[:-8]+'MCMC'+mcmc_id+'_'+n_iterations+'_'+cbf_file[-8:-4]+ c +'.cbr', output_dir, 'fluxfile_'+cbf_file[:-8]+'MCMC'+mcmc_id+'_'+n_iterations+'_'+cbf_file[-8:-4]+ c +'.bin', output_dir, 'poolfile_'+cbf_file[:-8]+'MCMC'+mcmc_id+'_'+n_iterations+'_'+cbf_file[-8:-4]+ c +'.bin', output_dir, 'edcdfile_'+cbf_file[:-8]+'MCMC'+mcmc_id+'_'+n_iterations+'_'+cbf_file[-8:-4]+ c +'.bin', output_dir, 'probfile_'+cbf_file[:-8]+'MCMC'+mcmc_id+'_'+n_iterations+'_'+cbf_file[-8:-4]+ c +'.bin')) assim_txt_file.close() forward_txt_file.close() assim_sh_file = open(assim_txt_filename[:-3] + 'sh', 'w') autil.fill_in_sh(assim_sh_file, array_size=len(cbf_files)*n_chains, n_hours=runtime_assim, txt_file=assim_txt_filename) forward_sh_file = open(forward_txt_filename[:-3] + 'sh', 'w') autil.fill_in_sh(forward_sh_file, array_size=len(cbf_files)*n_chains, n_hours=1, txt_file=forward_txt_filename) # create one combined submission file with all assimilation and forward commands for each pixel's chain on one line else: txt_filename = 'combined_assim_forward_list_' + model_id + '_' + run_type + assim_type+ '_MCMC'+mcmc_id + '_'+n_iterations + '.txt' txt_file = open(txt_filename, 'w') for cbf_file in cbf_files: for chain in range(1,n_chains+1): c = chain_num if n_chains==1 else '_'+str(chain) txt_file.write('%sCARDAMOM_MDF.exe %s%s %s%s %s 0 %s 0.001 %s 1000' % (mdf_dir, cbf_dir[3:], cbf_file, cbr_dir, cbf_file[:-8]+'MCMC'+mcmc_id+'_'+n_iterations+'_'+cbf_file[-8:-4]+ c +'.cbr', n_iterations, frac_save_out, mcmc_id)) txt_file.write(' && %sCARDAMOM_RUN_MODEL.exe %s%s %s%s %s%s %s%s %s%s %s%s' % (runmodel_dir, cbf_dir[3:], cbf_file, cbr_dir, cbf_file[:-8]+'MCMC'+mcmc_id+'_'+n_iterations+'_'+cbf_file[-8:-4]+ c +'.cbr', output_dir, 'fluxfile_'+cbf_file[:-8]+'MCMC'+mcmc_id+'_'+n_iterations+'_'+cbf_file[-8:-4]+ c +'.bin', output_dir, 'poolfile_'+cbf_file[:-8]+'MCMC'+mcmc_id+'_'+n_iterations+'_'+cbf_file[-8:-4]+ c +'.bin', output_dir, 'edcdfile_'+cbf_file[:-8]+'MCMC'+mcmc_id+'_'+n_iterations+'_'+cbf_file[-8:-4]+ c +'.bin', output_dir, 'probfile_'+cbf_file[:-8]+'MCMC'+mcmc_id+'_'+n_iterations+'_'+cbf_file[-8:-4]+ c +'.bin')) txt_file.write(' && ') if chain<n_chains else txt_file.write('\n') txt_file.close() sh_file = open(txt_filename[:-3] + 'sh', 'w') autil.fill_in_sh(sh_file, array_size=len(cbf_files), n_hours=runtime_assim, txt_file=txt_filename, combined=True) return
def main(): # set run information to read model_id = sys.argv[1] mcmc_id = sys.argv[2] # 119 for normal, 3 for DEMCMC n_iter = sys.argv[3] nbe_optimization = sys.argv[4] # 'OFF' or 'ON' ens_size = 500 assim_type = sys.argv[5] # set directories cur_dir = os.getcwd() + '/' misc_dir = cur_dir + '../../misc/' cbf_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/' cbr_opt_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id + '/' cbr_ef_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '_ef/' + model_id + '/' cbr_pft_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '_pft/' + model_id + '/' output_opt_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model_id + '/' output_ef_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '_ef/' + model_id + '/' output_pft_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '_pft/' + model_id + '/' plot_dir = cur_dir + '../../../../../../../scratch/users/cfamigli/cardamom/plots/' parnames = autil.get_parnames('../../misc/', model_id) # get list of cbfs os.chdir(cbf_dir) cbf_files = glob.glob('*.cbf') cbf_files.sort() pixel_lst = [] os.chdir(cur_dir + '/../') # initialize lists for error maps card_unc, opt_obs_err, pft_obs_err, ef_obs_err, obs_std = np.zeros( len(cbf_files)) * np.nan, np.zeros(len(cbf_files)) * np.nan, np.zeros( len(cbf_files)) * np.nan, np.zeros( len(cbf_files)) * np.nan, np.zeros(len(cbf_files)) * np.nan opt_pft_trend, opt_ef_trend, opt_pft_seas, opt_ef_seas, opt_mean, pft_mean, ef_mean = np.zeros( len(cbf_files)) * np.nan, np.zeros(len(cbf_files)) * np.nan, np.zeros( len(cbf_files)) * np.nan, np.zeros( len(cbf_files)) * np.nan, np.zeros( len(cbf_files)) * np.nan, np.zeros( len(cbf_files)) * np.nan, np.zeros( len(cbf_files)) * np.nan pft_mean_within_opt_unc, ef_mean_within_opt_unc = np.zeros( len(cbf_files)) * np.nan, np.zeros(len(cbf_files)) * np.nan ################################################## iterate through pixels ################################################## ############################################################################################################################ include_ef = True include_pft = True include_opt = True write_txt_sh_pft_rerun = True # initialize n_fluxes = autil.get_nofluxes_nopools_lma(model_id)[0] n_pools = autil.get_nofluxes_nopools_lma(model_id)[1] # load list of globcover labels gl_lbls = list( read_csv(misc_dir + 'Globcover2009_Legend.csv')['Value'].values) n_classes = len(gl_lbls) # load globcover csv for av_fracs determination gl_fracs = read_csv(misc_dir + 'globcover_fracs.csv', header=0) # load bestchains for cbr_files conv_chains = read_pickle(cbr_opt_dir + model_id + assim_type + '_ALL' + '_MCMC' + mcmc_id + '_' + n_iter + '_best_subset.pkl') conv_chains.columns = ['pixel', 'bestchains', 'conv'] #rename columns for easier access # create csv to track pft reruns pft_rerun_filename = 'pft_rerun_' + model_id + assim_type + '_MCMC' + mcmc_id + '_' + n_iter + '.csv' pft_rerun = open(misc_dir + pft_rerun_filename, 'w') w = csv.writer(pft_rerun) # run through all pixels for cbf_file in cbf_files: ind = cbf_files.index(cbf_file) pixel = cbf_file[-8:-4] pixel_lst.append(pixel) print(pixel) # read in fracs and types for pixel if int(pixel) in gl_fracs['pixel'].values: fracs_at_pixel = gl_fracs.loc[gl_fracs['pixel'] == int( pixel)].values[0][1:] types_at_pixel = get_types_at_pixel(gl_fracs, pixel) else: fracs_at_pixel = np.zeros(len(gl_lbls)) types_at_pixel = [] # read in cbf cbf_pixel = rwb.read_cbf_file(cbf_dir + cbf_file) nsteps = cbf_pixel['nodays'] ################################################## get PFT forward runs ################################################## ########################################################################################################################## can_plot_pft = False if include_pft: pixel_rerun = [] pft_spec = '5rp_' # initialize matrices to hold weighted average of fluxes and pools flux_pft_pixel = np.zeros((1, nsteps, n_fluxes)) pool_pft_pixel = np.zeros((1, nsteps + 1, n_pools)) #flux_pft_pixel = np.zeros((ens_size, nsteps, n_fluxes)) #pool_pft_pixel = np.zeros((ens_size, nsteps+1, n_pools)) # read all forward runs (each pft's run) for a given pixel print(types_at_pixel) for pft in types_at_pixel: suffix = cbf_file[:-9] + '_MCMC' + mcmc_id + '_' + n_iter + '_PFT' + str( int(pft) ) + '_forward_' + pixel + '.bin' #cbf_file[:-8]+'MCMC'+mcmc_id+'_'+n_iter+'_PFT'+str(int(pft))+'_'+pixel+'.bin' if (len(glob.glob(output_pft_dir + 'fluxfile_' + suffix)) > 0 ) & (len(glob.glob(output_pft_dir + 'poolfile_' + suffix)) > 0): print(str(int(pft))) flux_pft = rwb.readbinarymat( output_pft_dir + 'fluxfile_' + suffix, [nsteps, n_fluxes]) pool_pft = rwb.readbinarymat( output_pft_dir + 'poolfile_' + suffix, [nsteps + 1, n_pools]) #autil.plot_general_timeseries(autil.get_output('NBE', model_id, flux_pft, pool_pft, cbr_data=[], lma_ind=autil.get_nofluxes_nopools_lma(model_id)[2]), 'NBE', cbf_pixel, plot_dir+'timeseries/pft/', model_id + '_MCMC'+mcmc_id + '_'+n_iter + '_' + pixel + '_'+str(int(pft))+'.png') # add each flux and pool matrix (corresponding to each pft) according to pft fractions, as weighted average flux_pft[np.isnan(flux_pft)] = 0. pool_pft[np.isnan(pool_pft)] = 0. if (flux_pft.shape[0] > 0) & (pool_pft.shape[0] > 0): lbl_ind = gl_lbls.index(int(pft)) flux_pft_pixel += flux_pft * fracs_at_pixel[lbl_ind] pool_pft_pixel += pool_pft * fracs_at_pixel[lbl_ind] can_plot_pft = True else: pixel_rerun.append(pft) else: pixel_rerun.append(pft) if len(pixel_rerun) > 0: w.writerow([pixel] + pixel_rerun) ################################################ get optimal forward runs ################################################ ########################################################################################################################## can_plot_opt = False if include_opt: # get pixel's convergent chain numbers if pixel in conv_chains['pixel'].values: best_chains = conv_chains.loc[ conv_chains['pixel'] == pixel]['bestchains'].values[0][1:] flux_opt, pool_opt = [], [] # aggregate best chain outputs into one list for chain in best_chains: suffix = cbf_file[: -8] + 'MCMC' + mcmc_id + '_' + n_iter + '_' + pixel + '_' + chain + '.bin' if (len(glob.glob(output_opt_dir + 'fluxfile_' + suffix)) > 0) & (len( glob.glob(output_opt_dir + 'poolfile_' + suffix)) > 0): flux_opt.append( rwb.readbinarymat( output_opt_dir + 'fluxfile_' + suffix, [nsteps, n_fluxes])) pool_opt.append( rwb.readbinarymat( output_opt_dir + 'poolfile_' + suffix, [nsteps + 1, n_pools])) can_plot_opt = True # stack list elements for plotting flux_opt = np.vstack(flux_opt) pool_opt = np.vstack(pool_opt) ################################################### get EF forward runs ################################################### ########################################################################################################################### can_plot_ef = False if include_ef: ef_spec = 'clipped_PLS_soilgrids_poolobs_rescaled_forward_' # if 'wpolys' in ef_spec: use '_MCMC' # else: use 'MCMC' suffix = cbf_file[: -9] + '_MCMC' + mcmc_id + '_' + n_iter + '_' + ef_spec + pixel + '.bin' #cbf_file[:-8]+'MCMC'+mcmc_id+'_'+n_iter+'_EF_'+pixel+'.bin' if (len(glob.glob(output_ef_dir + 'fluxfile_' + suffix)) > 0) & ( len(glob.glob(output_ef_dir + 'poolfile_' + suffix)) > 0): flux_ef = rwb.readbinarymat( output_ef_dir + 'fluxfile_' + suffix, [nsteps, n_fluxes]) pool_ef = rwb.readbinarymat( output_ef_dir + 'poolfile_' + suffix, [nsteps + 1, n_pools]) can_plot_ef = True ##################################################### plot and compare #################################################### ########################################################################################################################### can_decompose = True if (can_plot_opt) & (can_plot_pft) & ( can_plot_ef) else False # plot optimal and pft predictions together output_opt = autil.get_output( 'NBE', model_id, flux_opt, pool_opt, cbr_data=[], lma_ind=autil.get_nofluxes_nopools_lma(model_id)[2]) if ( include_opt) & (can_plot_opt) else np.ones(nsteps) * np.nan output_pft = autil.get_output( 'NBE', model_id, flux_pft_pixel, pool_pft_pixel, cbr_data=[], lma_ind=autil.get_nofluxes_nopools_lma(model_id)[2]) if ( include_pft) & (can_plot_pft) else np.ones(nsteps) * np.nan output_ef = autil.get_output( 'NBE', model_id, flux_ef, pool_ef, cbr_data=[], lma_ind=autil.get_nofluxes_nopools_lma(model_id) [2]) if (include_ef) & (can_plot_ef) else np.ones(nsteps) * np.nan card_unc[ind], opt_obs_err[ind], pft_obs_err[ind], ef_obs_err[ ind], obs_std[ind] = autil.plot_opt_pft_ef_timeseries( output_opt, output_pft, output_ef, 'NBE', pixel, autil.rowcol_to_latlon([pixel]), cbf_pixel, err_v_obs=False, savepath=plot_dir + 'forward_compare/timeseries/' + model_id + '/', title=model_id + '_MCMC' + mcmc_id + '_' + n_iter + '_' + pft_spec + ef_spec + pixel + '.png') if can_decompose: opt_pft_trend[ind], opt_ef_trend[ind], opt_pft_seas[ ind], opt_ef_seas[ind], opt_mean[ind], pft_mean[ind], ef_mean[ ind], pft_mean_within_opt_unc[ind], ef_mean_within_opt_unc[ ind] = timeseries_decompose( output_opt, output_pft, output_ef, pixel, savepath=plot_dir + 'forward_compare/decomp/' + model_id + '/', savename=model_id + '_MCMC' + mcmc_id + '_' + n_iter + '_' + pft_spec + ef_spec + pixel) # close csv for rerun tracking pft_rerun.close() # plot decomposition results plot_decomposed( [opt_pft_trend, opt_ef_trend], [opt_pft_seas, opt_ef_seas], [opt_mean, pft_mean, ef_mean], [pft_mean_within_opt_unc, ef_mean_within_opt_unc], savepath=plot_dir + 'forward_compare/decomp/' + model_id + '/', savename=model_id + '_MCMC' + mcmc_id + '_' + n_iter + '_' + pft_spec + ef_spec) # plot error maps for data, plot_title, vmin, vmax in zip( [ card_unc, opt_obs_err, pft_obs_err, ef_obs_err, obs_std, opt_obs_err / obs_std, pft_obs_err / obs_std, ef_obs_err / obs_std, pft_obs_err / obs_std - opt_obs_err / obs_std, ef_obs_err / obs_std - opt_obs_err / obs_std, pft_obs_err / obs_std - ef_obs_err / obs_std ], [ 'opt_unc', 'opt_err', 'pft_err', 'ef_err', 'obs_std', 'norm_opt_err', 'norm_pft_err', 'norm_ef_err', 'norm_pft_minus_norm_opt_err', 'norm_ef_minus_norm_opt_err', 'norm_pft_minus_norm_ef_err' ], [0., 0., 0., 0., 0., 0., 0., 0., -1., -1., -1.], [0.7, 0.7, 0.7, 0.7, 0., 2., 2., 2., 1., 1., 1.]): data_nonan, pixel_lst_nonan = remove_nan(data, pixel_lst) stipple = card_unc if (plot_title == 'ef_err') | ( plot_title == 'pft_err') else None autil.plot_map( nrows=46, ncols=73, land_pixel_list=[file[-8:-4] for file in cbf_files], pixel_value_list=pixel_lst_nonan, value_list=data_nonan, vmin=vmin, vmax=vmax, cmap='bwr', savepath=plot_dir + 'forward_compare/maps/' + model_id + '/', savename=model_id + '_MCMC' + mcmc_id + '_' + n_iter + '_' + pft_spec + ef_spec + plot_title, stipple=stipple) #vmax=np.nanpercentile(data_nonan, 90) # save errors for comparison analysis DataFrame(list(zip(pixel_lst, list(ef_obs_err / obs_std))), columns=[ 'pixels', 'norm_mae' ]).to_pickle(misc_dir + 'mae_pkls/' + model_id + '_MCMC' + mcmc_id + '_' + n_iter + '_' + ef_spec + '.pkl') DataFrame(list(zip(pixel_lst, list(pft_obs_err / obs_std))), columns=[ 'pixels', 'norm_mae' ]).to_pickle(misc_dir + 'mae_pkls/' + model_id + '_MCMC' + mcmc_id + '_' + n_iter + '_' + pft_spec + '.pkl') # plot discrete map showing best parameterization (lowest error) for each pixel '''best_param_nonan, pixel_lst_nonan = best_param_nonancol([opt_obs_err, pft_obs_err, ef_obs_err], pixel_lst) autil.plot_map(nrows=46, ncols=73, land_pixel_list=[file[-8:-4] for file in cbf_files], pixel_value_list=pixel_lst_nonan, value_list=best_param_nonan, cmap=LinearSegmentedColormap.from_list('mycmap', [(0, 'dodgerblue'), (0.5, 'orangered'), (1., 'limegreen')]),savepath=plot_dir+'forward_compare/maps/'+model_id+'/', savename=model_id+'_MCMC'+mcmc_id+'_'+n_iter+'_'+ef_spec+'best_param')''' best_param_nonan, pixel_lst_nonan = best_param_nonancol( [pft_obs_err, ef_obs_err], pixel_lst) autil.plot_map( nrows=46, ncols=73, land_pixel_list=[file[-8:-4] for file in cbf_files], pixel_value_list=pixel_lst_nonan, value_list=best_param_nonan, cmap=LinearSegmentedColormap.from_list('mycmap', [(0, 'orangered'), (1., 'limegreen')]), savepath=plot_dir + 'forward_compare/maps/' + model_id + '/', savename=model_id + '_MCMC' + mcmc_id + '_' + n_iter + '_' + pft_spec + ef_spec + 'best_param') rgb_triplets = err_rgb_triplets([opt_obs_err, pft_obs_err, ef_obs_err], pixel_lst) autil.plot_map_rgb( nrows=46, ncols=73, land_pixel_list=[file[-8:-4] for file in cbf_files], pixel_value_list=pixel_lst, value_list=rgb_triplets, savepath=plot_dir + 'forward_compare/maps/' + model_id + '/', savename=model_id + '_MCMC' + mcmc_id + '_' + n_iter + '_' + pft_spec + ef_spec + 'rgb') ############################################### create resubmission for pft ############################################### ########################################################################################################################### if write_txt_sh_pft_rerun: # set additional directories mdf_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_MDF/' if nbe_optimization == 'OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_MDF/' runmodel_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_GENERAL/' if nbe_optimization == 'OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_GENERAL/' cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/' cbf_pft_ic_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '_pft_ic/' + model_id + '/' cbr_pft_dir = '../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '_pft/' + model_id + '/' output_dir = '../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model_id + '/' output_pft_dir = '../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '_pft/' + model_id + '/' if mcmc_id == '119': frac_save_out = str(int(int(n_iter) / 500)) elif mcmc_id == '3': frac_save_out = str( int(int(n_iter) / 500 * 100) ) # n_iterations/ frac_save_out * 100 will be ensemble size # set up which files to rerun pft_rerun = read_csv(misc_dir + pft_rerun_filename, header=None, sep=',', names=['pixel'] + gl_lbls) txt_filename = 'pft_ic_combined_list_' + model_id + assim_type + '_MCMC' + mcmc_id + '_' + n_iter + '_rerun.txt' txt_file = open(txt_filename, 'w') cl_count, row_count = 1, 0 for cbf_file in cbf_files: pixel = cbf_file[-8:-4] if int(pixel) in pft_rerun['pixel'].values: pixel_classes = pft_rerun.loc[pft_rerun['pixel'] == int( pixel)].values[0][1:] for cl in pixel_classes: if ~np.isnan(cl): f = cbf_file[:-9] + '_PFT' + str(int(cl)) + '_' + pixel txt_file.write( '%sCARDAMOM_MDF.exe %s%s %s%s %s 0 %s 0.001 %s 1000' % (mdf_dir, cbf_pft_ic_dir[3:], f + '.cbf', cbr_pft_dir, f + '.cbr', n_iter, frac_save_out, mcmc_id)) txt_file.write( ' && %sCARDAMOM_RUN_MODEL.exe %s%s %s%s %s%s %s%s %s%s %s%s' % (runmodel_dir, cbf_pft_ic_dir[3:], f + '.cbf', cbr_pft_dir, f + '.cbr', output_pft_dir, 'fluxfile_' + f + '.bin', output_pft_dir, 'poolfile_' + f + '.bin', output_pft_dir, 'edcdfile_' + f + '.bin', output_pft_dir, 'probfile_' + f + '.bin')) cl_count += 1 if np.mod(cl_count, 5) == 0: txt_file.write('\n') row_count += 1 else: txt_file.write(' && ') txt_file.close() sh_file = open(txt_filename[:-3] + 'sh', 'w') autil.fill_in_sh(sh_file, array_size=row_count, n_hours=10, txt_file=txt_filename, combined=True) return
def main(): # set run information to read model_id = sys.argv[1] mcmc_id = sys.argv[2] # 119 for normal, 3 for DEMCMC n_iter = sys.argv[3] nbe_optimization = sys.argv[4] # OFF OR ON ens_size = 500 assim_type = '_p25adapted' suffix = '_clipped_' if mcmc_id == '119': frac_save_out = str(int(int(n_iter) / 500)) n_chains_agg = 4 elif mcmc_id == '3': frac_save_out = str(int( int(n_iter) / 500 * 100)) # n_iterations/ frac_save_out * 100 will be ensemble size n_chains_agg = 2 # set directories cur_dir = os.getcwd() + '/' misc_dir = cur_dir + '/../../misc/' cbf_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/' cbr_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id + '/' cbr_ef_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '_ef/' + model_id + '/' plot_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/plots/' parnames = autil.get_parnames('../../misc/', model_id) # choose which features to use include_soilgrids = True include_poolobs = True include_gl_fracs = False # choose which model formulation to use train_full_ensemble = False rescale = True include_interactions = False include_squares = False include_all_polys = False do_feature_selection = False do_PLS = True n_features_select = int(sys.argv[5]) write_to_csv = False # choose which tasks to run opt_feature_select = True submit_ic_opt = True submit_forward = False ############################################################################################################################################ ############################# develop and train EF models ################################################################################### # load list of land pixels pixels = list(set([file[-8:-4] for file in glob.glob(cbf_dir + '*.cbf')])) pixels.sort() # load list of cbrs cbr_files = glob.glob(cbr_dir + '*MCMC' + mcmc_id + '_' + n_iter + '_*.cbr') # load bestchains for cbr_files conv_chains = read_pickle(cbr_dir + model_id + assim_type + '_ALL' + '_MCMC' + mcmc_id + '_' + n_iter + '_best_subset.pkl') conv_chains.columns = ['pixel', 'bestchains', 'conv'] #rename columns for easier access ic_inds = autil.get_inds_ic( model_id) # get indices of initial condition parameters # load globcover csv for land cover regression comparison gl_fracs = read_csv(misc_dir + 'globcover_fracs.csv', header=0) n_features_gl = len(gl_fracs.columns) - 1 suffix_gl = 'gl_' # get number of predictors n_features = ( rwb.read_cbf_file(glob.glob(cbf_dir + '*.cbf')[0])['nomet'] - 3 ) * 2 # remove 3 corresponding to day number and CO2, multiply by 2 (mean and sd) if do_PLS: suffix += 'PLS_' if include_soilgrids: soilgrids = read_csv('../../misc/soilgrids_defined_pixels_manual.csv', header=0) n_soilgrids = len(soilgrids.columns) - 1 n_features += n_soilgrids suffix += 'soilgrids_' if include_poolobs: n_poolobs = 4 n_features += n_poolobs suffix += 'poolobs_' if include_gl_fracs: n_features += n_features_gl suffix += suffix_gl # fill X and Y n_regr_models = len(parnames) X = np.ones( (len(pixels), n_features)) * np.nan # shape n_samples, n_features y = np.ones( (n_regr_models, len(pixels))) * np.nan # shape n_pars, n_samples y_full_ens = np.ones((ens_size, n_regr_models, len(pixels))) * np.nan # shape n_pars, n_samples X_gl = np.ones((len(pixels), n_features_gl)) * np.nan y_gl = np.ones((n_regr_models, len(pixels))) * np.nan for pixel in pixels: if (len( glob.glob(cbr_dir + '*MCMC' + mcmc_id + '_' + n_iter + '_' + pixel + '*.cbr')) > 0) & (pixel in conv_chains['pixel'].values): if conv_chains.loc[conv_chains['pixel'] == pixel]['conv'].values[0] == 0: continue else: ind = pixels.index(pixel) print(pixel) # get met cbf_file = glob.glob(cbf_dir + '*' + pixel + '.cbf')[0] met = rwb.read_cbf_file(cbf_file)['MET'] met = met[:, [1, 2, 3, 6, 7, 8]] # don't use index 0, 5 (day numbers) or 4 (Co2) X_end = met.shape[1] * 2 X[ind, :X_end] = np.concatenate( (np.nanmean(met, axis=0), np.nanstd(met, axis=0))) #X[ind,:met.shape[1]*12] = fill_X_met_12mo(X[ind,:met.shape[1]*12], met)#np.nanmean(met, axis=0) # append to X if include_soil_canopy_vars if include_soilgrids: if (int(pixel) in soilgrids['pixel'].values): X[ind, X_end:(X_end + n_soilgrids)] = soilgrids[ soilgrids['pixel'] == int(pixel)].values[0][1:] X_end = X_end + n_soilgrids if include_poolobs: lai, agb, som = rwb.read_cbf_file( cbf_file)['OBS']['LAI'], rwb.read_cbf_file( cbf_file)['OBS']['ABGB'], rwb.read_cbf_file( cbf_file)['OBS']['SOM'] if (len(lai) > 0) & (len(agb) > 0) & (len(som) > 0): X[ind, X_end:(X_end + n_poolobs)] = np.array([ np.nanmean(lai[lai > 0]), np.nanstd(lai[lai > 0]), np.nanmean(agb[agb > 0]), np.nanmean(som[som > 0]) ]) X_end = X_end + n_poolobs if include_gl_fracs: if (int(pixel) in gl_fracs['pixel'].values): X[ind, X_end:(X_end + n_features_gl)] = gl_fracs.loc[ gl_fracs['pixel'] == int(pixel)].values[0][1:] X_end = X_end + n_features_gl # fill globcover X if int(pixel) in gl_fracs['pixel'].values: X_gl[ind, :] = gl_fracs.loc[gl_fracs['pixel'] == int( pixel)].values[0][1:] # get parameter information # get pixel's convergent chain numbers best_chains = conv_chains.loc[ conv_chains['pixel'] == pixel]['bestchains'].values[0][1:] print(best_chains) # aggregate bestchains from optimal posteriors cbr_data = [] for chain in best_chains: file = [ i for i in cbr_files if pixel + '_' + chain + '.cbr' in i ][0] cbr_data.append( autil.modulus_Bday_Fday( rwb.read_cbr_file(file, {'nopars': len(parnames)}), parnames)) #cbr_data.append(rwb.read_cbr_file(file, {'nopars': len(parnames)})) cbr_data = np.vstack(cbr_data) y[:, ind] = np.nanmedian(cbr_data, axis=0) y_gl[:, ind] = np.nanmedian(cbr_data, axis=0) indices = np.random.choice( cbr_data.shape[0], ens_size, replace=False) # only take a subset of cbr rows y_full_ens[:, :, ind] = cbr_data[ indices, :] #reshape_cbr(cbr_data, ens_size*n_chains_agg) if not train_full_ensemble: f_bic = open( misc_dir + 'env_filter_manual/fs/bic_fs' + suffix.partition('fs')[0] + model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', 'a') w_bic = csv.writer(f_bic) # EF regressions reg_test_preds_list, card_test_preds_list, reg_train_preds_list, card_train_preds_list, pixels_r, suffix, k = run_regressions( X, y, pixels, rescale, include_interactions, include_squares, include_all_polys, do_feature_selection, do_PLS, write_to_csv, w_bic, n_features_select, suffix, ens_size, n_regr_models, n_features) f_bic.close() # globcover comparison '''gl_reg_test_preds_list, gl_card_test_preds_list, gl_reg_train_preds_list, gl_card_train_preds_list, gl_pixels_r, gl_suffix, gl_k = run_regressions(X_gl, y_gl, pixels, rescale, False, False, False, False, False, False, w_bic, n_features_select, suffix_gl, ens_size, n_regr_models, n_features_gl)''' else: suffix += 'full_ens_' icount = 0 for i in sample(range(y_full_ens.shape[0]), 100): print(icount) rtest, ctest, rtrain, ctrain, pixels_r, suffix, k = run_regressions( X, y_full_ens[i, :, :], pixels, rescale, include_interactions, include_squares, include_all_polys, do_feature_selection, n_features_select, suffix, ens_size, n_regr_models, n_features) reg_test_preds_list = [np.nanmedian( ri, axis=0) for ri in rtest] if icount == 0 else [ np.vstack((np.nanmedian(ri, axis=0), rfull)) for ri, rfull in zip(rtest, reg_test_preds_list) ] card_test_preds_list = np.copy(ctest) if icount == 0 else [ np.vstack((ci, cfull)) for ci, cfull in zip(ctest, card_test_preds_list) ] reg_train_preds_list = [np.nanmedian( ri, axis=0) for ri in rtrain] if icount == 0 else [ np.vstack((np.nanmedian(ri, axis=0), rfull)) for ri, rfull in zip(rtrain, reg_train_preds_list) ] card_train_preds_list = np.copy(ctrain) if icount == 0 else [ np.vstack((ci, cfull)) for ci, cfull in zip(ctrain, card_train_preds_list) ] icount += 1 # fill csv f_test = open( misc_dir + 'env_filter_manual/fs/fs_test' + suffix.partition('fs')[0] + model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', 'a') wr_test = csv.writer(f_test) f_train = open( misc_dir + 'env_filter_manual/fs/fs_train' + suffix.partition('fs')[0] + model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', 'a') wr_train = csv.writer(f_train) f_test_preds = open( misc_dir + 'env_filter_manual/par_preds/par_preds_test' + suffix + model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', 'a') wr_test_preds = csv.writer(f_test_preds) f_train_preds = open( misc_dir + 'env_filter_manual/par_preds/par_preds_train' + suffix + model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', 'a') wr_train_preds = csv.writer(f_train_preds) print('TEST:') #plot_scatter_test_pred(card_test_preds_list, reg_test_preds_list, k, pixels_r, parnames, wr_test, wr_test_preds, plot_dir+'env_filter/', 'par_preds_test'+suffix+model_id+'_MCMC'+mcmc_id+'_'+n_iter+assim_type, train_full_ensemble, write_to_csv) #plot_scatter_test_pred(gl_card_test_preds_list, gl_reg_test_preds_list, gl_k, gl_pixels_r, parnames, wr_test, wr_test_preds, plot_dir+'env_filter/', 'par_preds_test'+gl_suffix+model_id+'_MCMC'+mcmc_id+'_'+n_iter+assim_type, train_full_ensemble, write_to_csv) print('. . . . . \n\nTRAIN:') #plot_scatter_test_pred(card_train_preds_list, reg_train_preds_list, k, pixels_r, parnames, wr_train, wr_train_preds, plot_dir+'env_filter/', 'par_preds_train'+suffix+model_id+'_MCMC'+mcmc_id+'_'+n_iter+assim_type, train_full_ensemble, write_to_csv) #plot_scatter_test_pred(gl_card_train_preds_list, gl_reg_train_preds_list, gl_k, gl_pixels_r, parnames, wr_train, wr_train_preds, plot_dir+'env_filter/', 'par_preds_train'+gl_suffix+model_id+'_MCMC'+mcmc_id+'_'+n_iter+assim_type, train_full_ensemble, write_to_csv) f_test.close() f_train.close() f_test_preds.close() f_train_preds.close() ############################################################################################################################################ ################################### find optimal number of features for each parameter ##################################################### if opt_feature_select: test_rmse = read_csv(misc_dir + 'env_filter_manual/fs/fs_test' + suffix.partition('fs')[0] + model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', header=None) test_rmse.columns = [ item for sublist in [['n_features_select'], parnames] for item in sublist ] test_rmse.sort_values('n_features_select') train_rmse = read_csv(misc_dir + 'env_filter_manual/fs/fs_train' + suffix.partition('fs')[0] + model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', header=None) train_rmse.columns = [ item for sublist in [['n_features_select'], parnames] for item in sublist ] train_rmse.sort_values('n_features_select') x = test_rmse['n_features_select'].values opt_fs = plot_train_test(x, train_rmse, test_rmse, parnames, savepath=plot_dir + 'train_test/', savename=model_id + '_MCMC' + mcmc_id + suffix.partition('fs')[0], norm=False) opt_fs = plot_train_test(x, train_rmse, test_rmse, parnames, savepath=plot_dir + 'train_test/', savename=model_id + '_MCMC' + mcmc_id + suffix.partition('fs')[0], norm=True) print(opt_fs) '''bic_data = read_csv(misc_dir +'env_filter_manual/fs/bic_fs_soilgrids_poolobs_'+model_id+'_MCMC'+mcmc_id+'_'+n_iter+assim_type + '.csv', header=None) bic_data.columns = [item for sublist in [['n_features_select'],parnames] for item in sublist] bic_data.columns.sort_values('n_features_select') x = bic_data['n_features_select'].values opt_fs = plot_train_test(x, bic_data, bic_data*np.nan, parnames, savepath=plot_dir+'train_test/', savename='bic_'+model_id+'_MCMC'+mcmc_id+suffix.partition('fs')[0]) print(opt_fs)''' ############################################################################################################################################ ################################### copy cbfs and substitute pars for IC optimization ###################################################### # set directories for CARDAMOM runs mdf_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_MDF/' if nbe_optimization == 'OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_MDF/' runmodel_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_GENERAL/' if nbe_optimization == 'OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_GENERAL/' cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/' cbf_ef_ic_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '_ef_ic/' + model_id + '/' cbr_ef_dir = '../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '_ef/' + model_id + '/' output_dir = '../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model_id + '/' output_ef_dir = '../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '_ef/' + model_id + '/' # select which pixels to submit os.chdir(cbf_dir) cbf_files = glob.glob('*.cbf') cbf_files.sort() os.chdir(cur_dir + '/../') if submit_ic_opt: txt_filename = 'ef_ic_assim_list_' + model_id + assim_type + '_MCMC' + mcmc_id + '_' + n_iter + '.txt' txt_file = open(txt_filename, 'w') for cbf_file in cbf_files: print(cbf_file) cbf_data = rwb.read_cbf_file(cbf_dir + cbf_file) cbf_pixel = cbf_file[-8:-4] if cbf_pixel in pixels_r: parpriors = np.concatenate( (retrieve_preds(cbf_pixel, opt_fs, suffix, misc_dir + 'env_filter_manual/par_preds/'), np.ones(50 - len(parnames)) * -9999.)) parpriorunc = np.concatenate( (np.ones(len(parnames)) * 1.001, np.ones(50 - len(parnames)) * -9999.)) # except ICs for ic_ind in ic_inds: parpriors[ic_ind] = -9999. parpriorunc[ic_ind] = -9999. # except NBE unc if nbe_optimization == 'ON': parpriors[len(parnames) - 1] = -9999. parpriorunc[len(parnames) - 1] = -9999. cbf_data['PARPRIORS'] = parpriors.reshape(-1, 1) cbf_data['PARPRIORUNC'] = parpriorunc.reshape(-1, 1) fp = cbf_file[:-9] + suffix.partition('fs')[0] + cbf_pixel fa = cbf_file[: -9] + '_MCMC' + mcmc_id + '_' + n_iter + suffix.partition( 'fs')[0] + 'assim_' + cbf_pixel rwb.CARDAMOM_WRITE_BINARY_FILEFORMAT( cbf_data, cbf_ef_ic_dir + fp + '.cbf') txt_file.write( '%sCARDAMOM_MDF.exe %s%s %s%s %s 0 %s 0.001 %s 1000' % (mdf_dir, cbf_ef_ic_dir[3:], fp + '.cbf', cbr_ef_dir, fa + '.cbr', n_iter, frac_save_out, mcmc_id)) txt_file.write('\n') txt_file.close() sh_file = open(txt_filename[:-3] + 'sh', 'w') autil.fill_in_sh(sh_file, array_size=len(pixels_r), n_hours=6, txt_file=txt_filename, combined=False) if submit_forward: txt_filename = 'ef_ic_forward_list_' + model_id + assim_type + '_MCMC' + mcmc_id + '_' + n_iter + '.txt' txt_file = open(txt_filename, 'w') for cbf_file in cbf_files: print(cbf_file) cbf_data = rwb.read_cbf_file(cbf_dir + cbf_file) cbf_pixel = cbf_file[-8:-4] if cbf_pixel in pixels_r: fa = cbf_file[: -9] + '_MCMC' + mcmc_id + '_' + n_iter + suffix.partition( 'fs')[0] + 'assim_' + cbf_pixel cbr_assim = rwb.read_cbr_file( glob.glob(cbr_ef_dir + fa + '.cbr')[0], {'nopars': len(parnames)}) ff = cbf_file[: -9] + '_MCMC' + mcmc_id + '_' + n_iter + suffix.partition( 'fs')[0] + 'forward_' + cbf_pixel cbr_forward = retrieve_preds( cbf_pixel, opt_fs, suffix, misc_dir + 'env_filter_manual/par_preds/') for ic_ind in ic_inds: cbr_forward[ic_ind] = np.nanmedian(cbr_assim[:, ic_ind]) cbr_forward = cbr_forward.reshape(1, len(parnames)) rwb.write_cbr_file(cbr_forward, cbr_ef_dir + ff + '.cbr') txt_file.write( '%sCARDAMOM_RUN_MODEL.exe %s%s %s%s %s%s %s%s %s%s %s%s' % (runmodel_dir, cbf_dir[3:], cbf_file, cbr_ef_dir, ff + '.cbr', output_ef_dir, 'fluxfile_' + ff + '.bin', output_ef_dir, 'poolfile_' + ff + '.bin', output_ef_dir, 'edcdfile_' + ff + '.bin', output_ef_dir, 'probfile_' + ff + '.bin')) txt_file.write('\n') txt_file.close() sh_file = open(txt_filename[:-3] + 'sh', 'w') autil.fill_in_sh(sh_file, array_size=len(pixels_r), n_hours=1, txt_file=txt_filename, combined=False) return
def main(): model_id = sys.argv[1] run_type = sys.argv[2] # ALL or SUBSET mcmc_id = sys.argv[3] # 119 for normal, 3 for DEMCMC nbe_optimization = sys.argv[4] # 'OFF' or 'ON' assim_type = '_p25adapted' cur_dir = os.getcwd() + '/' mdf_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_MDF/' if nbe_optimization == 'OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_MDF/' runmodel_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_GENERAL/' if nbe_optimization == 'OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_GENERAL/' cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/' cbr_dir = '../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id + '/' output_dir = '../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model_id + '/' plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/' parnames = autil.get_parnames('../../misc/', model_id) n_iterations = sys.argv[5] runtime_assim = int(sys.argv[6]) resubmit_num = sys.argv[7] n_chains_resubmit = 4 ens_size = 500 if mcmc_id == '119': frac_save_out = str(int(int(n_iterations) / 500)) elif mcmc_id == '3': frac_save_out = str(int( int(n_iterations) / 500 * 100)) # n_iterations/ frac_save_out * 100 will be ensemble size # select which pixels to submit os.chdir(cbf_dir) if run_type == 'ALL': cbf_files = glob.glob('*.cbf') elif run_type == 'SUBSET_RANDOM': cbf_files = sample(glob.glob('*.cbf'), 10) elif run_type == 'SUBSET_INPUT': cbf_files = select_cbf_files(glob.glob('*.cbf'), [ '3809', '3524', '2224', '4170', '1945', '3813', '4054', '3264', '1271', '3457' ]) os.chdir(cur_dir + '/../') cbf_files.sort() # create one combined submission file with all assimilation and forward commands for each pixel's chain on one line txt_filename = 'combined_assim_forward_list_' + model_id + '_' + run_type + assim_type + '_MCMC' + mcmc_id + '_' + n_iterations + '_resubmit' + resubmit_num + '.txt' txt_file = open(txt_filename, 'w') resubmit_count = 0 gr_pixels = np.zeros( len(cbf_files)) * np.nan # list of GR for each pixel, for mapping pixels = [] best_subset = [] conv_bool_lst = [] for cbf_file in cbf_files: best_subset_pixel = [] resubmit = False print(cbf_file, cbf_files.index(cbf_file)) cbf_pixel = rwb.read_cbf_file(cur_dir + cbf_dir + cbf_file) pixel = cbf_file[-8:-4] cbr_files = glob.glob(cur_dir + '../' + cbr_dir + '*MCMC' + mcmc_id + '_' + n_iterations + '_' + pixel + '_*.cbr') cbr_files = sorted( cbr_files, key=lambda x: int( x.partition(pixel + '_')[-1].partition('.cbr')[0])) if len(cbr_files) >= n_chains_resubmit: pixels.append(pixel) #cbr_files = cbr_files[:16] ############ TEMP if len(cbr_files) > 0: end_chain = int( cbr_files[-1].partition(pixel + '_')[-1].partition('.cbr')[0]) #print('ENDCHAIN: '+str(end_chain)) else: end_chain = 0 resubmit = True # get all possible XX member combinations of cbr files n_chains_to_converge = n_chains_resubmit cbr_files_all_subsets = [ list(i) for i in itertools.combinations(cbr_files, n_chains_to_converge) ] continue_check = True for subset in cbr_files_all_subsets: if continue_check: # read parameters and compute gelman rubin cbr_chain_list = [] chain_nums = ['0'] for cbr_file in subset: #print(cbr_file[-10:-4]) cbr_chain = rwb.read_cbr_file(cbr_file, {'nopars': len(parnames)}) cbr_chain = autil.modulus_Bday_Fday(cbr_chain, parnames) chain_nums.append( cbr_file.partition('.cbr')[0].partition(pixel + '_') [-1]) # append chain number if np.shape(cbr_chain)[0] == ens_size: cbr_chain_list.append(cbr_chain) #print(np.shape(cbr_chain)) else: print('incorrect ensemble size)') resubmit = True if len(cbr_chain_list) > 1: gr = autil.gelman_rubin(cbr_chain_list) #print(gr) print( '%i/%i' % (sum(gr < 1.2), len(parnames)) ) #print('%i of %i parameters converged' % (sum(gr<1.2), len(parnames))) if (np.isnan(gr_pixels[cbf_files.index(cbf_file)])): gr_pixels[cbf_files.index(cbf_file)] = sum( gr < 1.2) / len(parnames) #if len(cbr_files_all_subsets)==1: best_subset_pixel.append(chain_nums) if sum(gr < 1.2) / len(parnames) < 0.9: #print('gr too low') resubmit = True if (sum(gr < 1.2) / len(parnames) >= gr_pixels[cbf_files.index(cbf_file)]): gr_pixels[cbf_files.index(cbf_file)] = sum( gr < 1.2) / len(parnames) best_subset_pixel.append(chain_nums) conv_bool = 0 else: resubmit = False continue_check = False gr_pixels[cbf_files.index(cbf_file)] = sum( gr < 1.2) / len(parnames) best_subset_pixel.append(chain_nums) conv_bool = 1 else: gr = np.nan print('gr undefined') best_subset_pixel.append(chain_nums) conv_bool = 0 resubmit = True if len(best_subset_pixel) > 0: best_subset.append(best_subset_pixel[-1]) conv_bool_lst.append(conv_bool) # write into text file if pixel needs to be resubmitted if resubmit: first_resubmit_chain = end_chain + 1 last_resubmit_chain = end_chain + n_chains_resubmit for chain in range(first_resubmit_chain, last_resubmit_chain + 1): c = '_' + str(chain) txt_file.write( '%sCARDAMOM_MDF.exe %s%s %s%s %s 0 %s 0.001 %s 1000' % (mdf_dir, cbf_dir[3:], cbf_file, cbr_dir, cbf_file[:-8] + 'MCMC' + mcmc_id + '_' + n_iterations + '_' + cbf_file[-8:-4] + c + '.cbr', n_iterations, frac_save_out, mcmc_id)) txt_file.write( ' && %sCARDAMOM_RUN_MODEL.exe %s%s %s%s %s%s %s%s %s%s %s%s' % (runmodel_dir, cbf_dir[3:], cbf_file, cbr_dir, cbf_file[:-8] + 'MCMC' + mcmc_id + '_' + n_iterations + '_' + cbf_file[-8:-4] + c + '.cbr', output_dir, 'fluxfile_' + cbf_file[:-8] + 'MCMC' + mcmc_id + '_' + n_iterations + '_' + cbf_file[-8:-4] + c + '.bin', output_dir, 'poolfile_' + cbf_file[:-8] + 'MCMC' + mcmc_id + '_' + n_iterations + '_' + cbf_file[-8:-4] + c + '.bin', output_dir, 'edcdfile_' + cbf_file[:-8] + 'MCMC' + mcmc_id + '_' + n_iterations + '_' + cbf_file[-8:-4] + c + '.bin', output_dir, 'probfile_' + cbf_file[:-8] + 'MCMC' + mcmc_id + '_' + n_iterations + '_' + cbf_file[-8:-4] + c + '.bin')) txt_file.write( ' && ') if chain < last_resubmit_chain else txt_file.write( '\n') resubmit_count += 1 txt_file.close() sh_file = open(txt_filename[:-3] + 'sh', 'w') autil.fill_in_sh(sh_file, array_size=resubmit_count, n_hours=runtime_assim, txt_file=txt_filename, combined=True) autil.plot_map(nrows=46, ncols=73, land_pixel_list=pixels, pixel_value_list=pixels, value_list=gr_pixels * 100, savepath=cur_dir + plot_dir + 'maps/', savename='gr_' + model_id + assim_type + '_' + run_type + '_MCMC' + mcmc_id + '_' + n_iterations + '_resubmit' + resubmit_num) #print(pixels, best_subset, conv_bool_lst) print(len(pixels), len(best_subset), len(conv_bool_lst)) DataFrame(list( zip(pixels, best_subset, conv_bool_lst))).to_pickle(cur_dir + '../' + cbr_dir + model_id + assim_type + '_' + run_type + '_MCMC' + mcmc_id + '_' + n_iterations + '_best_subset.pkl') return
def main(): # set run information to read model_id = sys.argv[1] run_type = sys.argv[2] # ALL OR SUBSET mcmc_id = sys.argv[3] # 119 for normal, 3 for DEMCMC n_iter = sys.argv[4] nbe_optimization = sys.argv[5] # OFF OR ON runtime_assim = int(sys.argv[6]) ens_size = 500 assim_type = '_p25adapted' # set directories cur_dir = os.getcwd() + '/' mdf_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_MDF/' if nbe_optimization=='OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_MDF/' runmodel_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_GENERAL/' if nbe_optimization=='OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_GENERAL/' cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf'+assim_type+'/' + model_id + '/' cbf_ic_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf'+assim_type+'/ic_test/' + model_id + '/' cbr_pft_dir = '../../../../../scratch/users/cfamigli/cardamom/files/cbr'+assim_type+'_pft/' + model_id + '/' cbr_ic_dir = '../../../../../scratch/users/cfamigli/cardamom/files/cbr'+assim_type+'_pft/ic_test/' + model_id + '/' output_ic_dir = '../../../../../scratch/users/cfamigli/cardamom/files/output'+assim_type+'_pft/ic_test/' + model_id + '/' plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/' # get model specific information parnames = autil.get_parnames('../../misc/', model_id) ic_inds = autil.get_inds_ic(model_id) # get indices of initial condition parameters if mcmc_id=='119': frac_save_out = str(int(int(n_iter)/500)) elif mcmc_id=='3': frac_save_out = str(int(int(n_iter)/500*100)) # n_iterations/ frac_save_out * 100 will be ensemble size # select which pixels to submit os.chdir(cbf_dir) if run_type=='ALL': cbf_files = glob.glob('*.cbf') elif run_type=='SUBSET_INPUT': cbf_files = select_cbf_files(glob.glob('*.cbf'), ['3809','3524','2224','4170','1945','3813','4054','3264','1271','3457']) os.chdir(cur_dir + '/../') cbf_files.sort() ############################################################################################################################################ # run through pixel cbfs for cbf_file in cbf_files: pixel = cbf_file[-8:-4] cbf_data = rwb.read_cbf_file(cbf_dir + cbf_file) # get list of pft cbrs for pixel cbr_files = glob.glob(cbr_pft_dir + '*' + pixel + '*.cbr') for cbr_file in cbr_files: cbr_data = rwb.read_cbr_file(cbr_file, {'nopars': len(parnames)}) parpriors = np.concatenate((np.nanmedian(cbr_data, axis=0), np.ones(50-len(parnames))*-9999.)) parpriorunc = np.concatenate((np.ones(len(parnames))*1.001, np.ones(50-len(parnames))*-9999.)) parpriors[ic_inds[0]:ic_inds[1]] = -9999. parpriorunc[ic_inds[0]:ic_inds[1]] = -9999. cbf_data['PARPRIORS'] = parpriors.reshape(-1,1) cbf_data['PARPRIORUNC'] = parpriorunc.reshape(-1,1) #rwb.CARDAMOM_WRITE_BINARY_FILEFORMAT(cbf_data, cbf_ic_dir + cbr_file.partition(cbr_pft_dir)[-1].partition('cbr')[0]+'cbf') ############################################################################################################################################ txt_filename = 'combined_assim_forward_list_' + model_id + '_' + run_type + assim_type+ '_MCMC'+mcmc_id + '_'+n_iter + '_ic_test.txt' txt_file = open(txt_filename, 'w') for cbf_ic_file in glob.glob(cbf_ic_dir + '*.cbf'): f = cbf_ic_file.partition(cbf_ic_dir)[-1] txt_file.write('%sCARDAMOM_MDF.exe %s%s %s%s %s 0 %s 0.001 %s 1000' % (mdf_dir, cbf_ic_dir[3:], f, cbr_ic_dir, f[:-4] + '.cbr', n_iter, frac_save_out, mcmc_id)) txt_file.write(' && %sCARDAMOM_RUN_MODEL.exe %s%s %s%s %s%s %s%s %s%s %s%s' % (runmodel_dir, cbf_ic_dir[3:], f, cbr_ic_dir, f[:-4] + '.cbr', output_ic_dir, 'fluxfile_'+ f[:-4] +'.bin', output_ic_dir, 'poolfile_'+ f[:-4] +'.bin', output_ic_dir, 'edcdfile_'+ f[:-4] +'.bin', output_ic_dir, 'probfile_'+ f[:-4] +'.bin')) txt_file.write('\n') txt_file.close() sh_file = open(txt_filename[:-3] + 'sh', 'w') autil.fill_in_sh(sh_file, array_size=len(glob.glob(cbf_ic_dir + '*.cbf')), n_hours=runtime_assim, txt_file=txt_filename, combined=True) return