def aggregate_parameter_sets(pixels_dom, all_cbr_files, parnames, ens_size, n_chains_agg, conv_chains_pkl): # aggregate parameter sets between representative pixels for a given pft, only if representative pixels are also dominant # get cbrs par_set_agg = [] for pixel in pixels_dom: par_set = [] if pixel in conv_chains_pkl['pixel'].values: print(pixel) # get pixel's convergent chain numbers best_chains = conv_chains_pkl.loc[ conv_chains_pkl['pixel'] == pixel]['bestchains'].values[0][1:] print(best_chains) # aggregate bestchains from optimal posteriors par_set_orig = [] for chain in best_chains: file = [ i for i in all_cbr_files if pixel + '_' + chain + '.cbr' in i ][0] par_set.append( autil.modulus_Bday_Fday( rwb.read_cbr_file(file, {'nopars': len(parnames)}), parnames)) else: par_set = np.ones( (ens_size * n_chains_agg, len(parnames))) * np.nan par_set_agg.append(np.vstack(par_set)) par_set_agg = np.vstack(par_set_agg) print(par_set_agg.shape) random_rows = np.random.choice(par_set_agg.shape[0], ens_size * n_chains_agg, replace=False) best_cbrs_sampled = par_set_agg[random_rows, :] print(best_cbrs_sampled.shape) print(np.nanmedian(best_cbrs_sampled, axis=0)) return best_cbrs_sampled
def main(): # set run information to read model_id = sys.argv[1] mcmc_id = sys.argv[2] # 119 for normal, 3 for DEMCMC n_iter = sys.argv[3] nbe_optimization = sys.argv[4] # OFF OR ON ens_size = 500 assim_type = '_p25adapted' suffix = '_clipped_' if mcmc_id == '119': frac_save_out = str(int(int(n_iter) / 500)) n_chains_agg = 4 elif mcmc_id == '3': frac_save_out = str(int( int(n_iter) / 500 * 100)) # n_iterations/ frac_save_out * 100 will be ensemble size n_chains_agg = 2 # set directories cur_dir = os.getcwd() + '/' misc_dir = cur_dir + '/../../misc/' cbf_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/' cbr_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id + '/' cbr_ef_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '_ef/' + model_id + '/' plot_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/plots/' parnames = autil.get_parnames('../../misc/', model_id) # choose which features to use include_soilgrids = True include_poolobs = True include_gl_fracs = False # choose which model formulation to use train_full_ensemble = False rescale = True include_interactions = False include_squares = False include_all_polys = False do_feature_selection = False do_PLS = True n_features_select = int(sys.argv[5]) write_to_csv = False # choose which tasks to run opt_feature_select = True submit_ic_opt = True submit_forward = False ############################################################################################################################################ ############################# develop and train EF models ################################################################################### # load list of land pixels pixels = list(set([file[-8:-4] for file in glob.glob(cbf_dir + '*.cbf')])) pixels.sort() # load list of cbrs cbr_files = glob.glob(cbr_dir + '*MCMC' + mcmc_id + '_' + n_iter + '_*.cbr') # load bestchains for cbr_files conv_chains = read_pickle(cbr_dir + model_id + assim_type + '_ALL' + '_MCMC' + mcmc_id + '_' + n_iter + '_best_subset.pkl') conv_chains.columns = ['pixel', 'bestchains', 'conv'] #rename columns for easier access ic_inds = autil.get_inds_ic( model_id) # get indices of initial condition parameters # load globcover csv for land cover regression comparison gl_fracs = read_csv(misc_dir + 'globcover_fracs.csv', header=0) n_features_gl = len(gl_fracs.columns) - 1 suffix_gl = 'gl_' # get number of predictors n_features = ( rwb.read_cbf_file(glob.glob(cbf_dir + '*.cbf')[0])['nomet'] - 3 ) * 2 # remove 3 corresponding to day number and CO2, multiply by 2 (mean and sd) if do_PLS: suffix += 'PLS_' if include_soilgrids: soilgrids = read_csv('../../misc/soilgrids_defined_pixels_manual.csv', header=0) n_soilgrids = len(soilgrids.columns) - 1 n_features += n_soilgrids suffix += 'soilgrids_' if include_poolobs: n_poolobs = 4 n_features += n_poolobs suffix += 'poolobs_' if include_gl_fracs: n_features += n_features_gl suffix += suffix_gl # fill X and Y n_regr_models = len(parnames) X = np.ones( (len(pixels), n_features)) * np.nan # shape n_samples, n_features y = np.ones( (n_regr_models, len(pixels))) * np.nan # shape n_pars, n_samples y_full_ens = np.ones((ens_size, n_regr_models, len(pixels))) * np.nan # shape n_pars, n_samples X_gl = np.ones((len(pixels), n_features_gl)) * np.nan y_gl = np.ones((n_regr_models, len(pixels))) * np.nan for pixel in pixels: if (len( glob.glob(cbr_dir + '*MCMC' + mcmc_id + '_' + n_iter + '_' + pixel + '*.cbr')) > 0) & (pixel in conv_chains['pixel'].values): if conv_chains.loc[conv_chains['pixel'] == pixel]['conv'].values[0] == 0: continue else: ind = pixels.index(pixel) print(pixel) # get met cbf_file = glob.glob(cbf_dir + '*' + pixel + '.cbf')[0] met = rwb.read_cbf_file(cbf_file)['MET'] met = met[:, [1, 2, 3, 6, 7, 8]] # don't use index 0, 5 (day numbers) or 4 (Co2) X_end = met.shape[1] * 2 X[ind, :X_end] = np.concatenate( (np.nanmean(met, axis=0), np.nanstd(met, axis=0))) #X[ind,:met.shape[1]*12] = fill_X_met_12mo(X[ind,:met.shape[1]*12], met)#np.nanmean(met, axis=0) # append to X if include_soil_canopy_vars if include_soilgrids: if (int(pixel) in soilgrids['pixel'].values): X[ind, X_end:(X_end + n_soilgrids)] = soilgrids[ soilgrids['pixel'] == int(pixel)].values[0][1:] X_end = X_end + n_soilgrids if include_poolobs: lai, agb, som = rwb.read_cbf_file( cbf_file)['OBS']['LAI'], rwb.read_cbf_file( cbf_file)['OBS']['ABGB'], rwb.read_cbf_file( cbf_file)['OBS']['SOM'] if (len(lai) > 0) & (len(agb) > 0) & (len(som) > 0): X[ind, X_end:(X_end + n_poolobs)] = np.array([ np.nanmean(lai[lai > 0]), np.nanstd(lai[lai > 0]), np.nanmean(agb[agb > 0]), np.nanmean(som[som > 0]) ]) X_end = X_end + n_poolobs if include_gl_fracs: if (int(pixel) in gl_fracs['pixel'].values): X[ind, X_end:(X_end + n_features_gl)] = gl_fracs.loc[ gl_fracs['pixel'] == int(pixel)].values[0][1:] X_end = X_end + n_features_gl # fill globcover X if int(pixel) in gl_fracs['pixel'].values: X_gl[ind, :] = gl_fracs.loc[gl_fracs['pixel'] == int( pixel)].values[0][1:] # get parameter information # get pixel's convergent chain numbers best_chains = conv_chains.loc[ conv_chains['pixel'] == pixel]['bestchains'].values[0][1:] print(best_chains) # aggregate bestchains from optimal posteriors cbr_data = [] for chain in best_chains: file = [ i for i in cbr_files if pixel + '_' + chain + '.cbr' in i ][0] cbr_data.append( autil.modulus_Bday_Fday( rwb.read_cbr_file(file, {'nopars': len(parnames)}), parnames)) #cbr_data.append(rwb.read_cbr_file(file, {'nopars': len(parnames)})) cbr_data = np.vstack(cbr_data) y[:, ind] = np.nanmedian(cbr_data, axis=0) y_gl[:, ind] = np.nanmedian(cbr_data, axis=0) indices = np.random.choice( cbr_data.shape[0], ens_size, replace=False) # only take a subset of cbr rows y_full_ens[:, :, ind] = cbr_data[ indices, :] #reshape_cbr(cbr_data, ens_size*n_chains_agg) if not train_full_ensemble: f_bic = open( misc_dir + 'env_filter_manual/fs/bic_fs' + suffix.partition('fs')[0] + model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', 'a') w_bic = csv.writer(f_bic) # EF regressions reg_test_preds_list, card_test_preds_list, reg_train_preds_list, card_train_preds_list, pixels_r, suffix, k = run_regressions( X, y, pixels, rescale, include_interactions, include_squares, include_all_polys, do_feature_selection, do_PLS, write_to_csv, w_bic, n_features_select, suffix, ens_size, n_regr_models, n_features) f_bic.close() # globcover comparison '''gl_reg_test_preds_list, gl_card_test_preds_list, gl_reg_train_preds_list, gl_card_train_preds_list, gl_pixels_r, gl_suffix, gl_k = run_regressions(X_gl, y_gl, pixels, rescale, False, False, False, False, False, False, w_bic, n_features_select, suffix_gl, ens_size, n_regr_models, n_features_gl)''' else: suffix += 'full_ens_' icount = 0 for i in sample(range(y_full_ens.shape[0]), 100): print(icount) rtest, ctest, rtrain, ctrain, pixels_r, suffix, k = run_regressions( X, y_full_ens[i, :, :], pixels, rescale, include_interactions, include_squares, include_all_polys, do_feature_selection, n_features_select, suffix, ens_size, n_regr_models, n_features) reg_test_preds_list = [np.nanmedian( ri, axis=0) for ri in rtest] if icount == 0 else [ np.vstack((np.nanmedian(ri, axis=0), rfull)) for ri, rfull in zip(rtest, reg_test_preds_list) ] card_test_preds_list = np.copy(ctest) if icount == 0 else [ np.vstack((ci, cfull)) for ci, cfull in zip(ctest, card_test_preds_list) ] reg_train_preds_list = [np.nanmedian( ri, axis=0) for ri in rtrain] if icount == 0 else [ np.vstack((np.nanmedian(ri, axis=0), rfull)) for ri, rfull in zip(rtrain, reg_train_preds_list) ] card_train_preds_list = np.copy(ctrain) if icount == 0 else [ np.vstack((ci, cfull)) for ci, cfull in zip(ctrain, card_train_preds_list) ] icount += 1 # fill csv f_test = open( misc_dir + 'env_filter_manual/fs/fs_test' + suffix.partition('fs')[0] + model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', 'a') wr_test = csv.writer(f_test) f_train = open( misc_dir + 'env_filter_manual/fs/fs_train' + suffix.partition('fs')[0] + model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', 'a') wr_train = csv.writer(f_train) f_test_preds = open( misc_dir + 'env_filter_manual/par_preds/par_preds_test' + suffix + model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', 'a') wr_test_preds = csv.writer(f_test_preds) f_train_preds = open( misc_dir + 'env_filter_manual/par_preds/par_preds_train' + suffix + model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', 'a') wr_train_preds = csv.writer(f_train_preds) print('TEST:') #plot_scatter_test_pred(card_test_preds_list, reg_test_preds_list, k, pixels_r, parnames, wr_test, wr_test_preds, plot_dir+'env_filter/', 'par_preds_test'+suffix+model_id+'_MCMC'+mcmc_id+'_'+n_iter+assim_type, train_full_ensemble, write_to_csv) #plot_scatter_test_pred(gl_card_test_preds_list, gl_reg_test_preds_list, gl_k, gl_pixels_r, parnames, wr_test, wr_test_preds, plot_dir+'env_filter/', 'par_preds_test'+gl_suffix+model_id+'_MCMC'+mcmc_id+'_'+n_iter+assim_type, train_full_ensemble, write_to_csv) print('. . . . . \n\nTRAIN:') #plot_scatter_test_pred(card_train_preds_list, reg_train_preds_list, k, pixels_r, parnames, wr_train, wr_train_preds, plot_dir+'env_filter/', 'par_preds_train'+suffix+model_id+'_MCMC'+mcmc_id+'_'+n_iter+assim_type, train_full_ensemble, write_to_csv) #plot_scatter_test_pred(gl_card_train_preds_list, gl_reg_train_preds_list, gl_k, gl_pixels_r, parnames, wr_train, wr_train_preds, plot_dir+'env_filter/', 'par_preds_train'+gl_suffix+model_id+'_MCMC'+mcmc_id+'_'+n_iter+assim_type, train_full_ensemble, write_to_csv) f_test.close() f_train.close() f_test_preds.close() f_train_preds.close() ############################################################################################################################################ ################################### find optimal number of features for each parameter ##################################################### if opt_feature_select: test_rmse = read_csv(misc_dir + 'env_filter_manual/fs/fs_test' + suffix.partition('fs')[0] + model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', header=None) test_rmse.columns = [ item for sublist in [['n_features_select'], parnames] for item in sublist ] test_rmse.sort_values('n_features_select') train_rmse = read_csv(misc_dir + 'env_filter_manual/fs/fs_train' + suffix.partition('fs')[0] + model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', header=None) train_rmse.columns = [ item for sublist in [['n_features_select'], parnames] for item in sublist ] train_rmse.sort_values('n_features_select') x = test_rmse['n_features_select'].values opt_fs = plot_train_test(x, train_rmse, test_rmse, parnames, savepath=plot_dir + 'train_test/', savename=model_id + '_MCMC' + mcmc_id + suffix.partition('fs')[0], norm=False) opt_fs = plot_train_test(x, train_rmse, test_rmse, parnames, savepath=plot_dir + 'train_test/', savename=model_id + '_MCMC' + mcmc_id + suffix.partition('fs')[0], norm=True) print(opt_fs) '''bic_data = read_csv(misc_dir +'env_filter_manual/fs/bic_fs_soilgrids_poolobs_'+model_id+'_MCMC'+mcmc_id+'_'+n_iter+assim_type + '.csv', header=None) bic_data.columns = [item for sublist in [['n_features_select'],parnames] for item in sublist] bic_data.columns.sort_values('n_features_select') x = bic_data['n_features_select'].values opt_fs = plot_train_test(x, bic_data, bic_data*np.nan, parnames, savepath=plot_dir+'train_test/', savename='bic_'+model_id+'_MCMC'+mcmc_id+suffix.partition('fs')[0]) print(opt_fs)''' ############################################################################################################################################ ################################### copy cbfs and substitute pars for IC optimization ###################################################### # set directories for CARDAMOM runs mdf_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_MDF/' if nbe_optimization == 'OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_MDF/' runmodel_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_GENERAL/' if nbe_optimization == 'OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_GENERAL/' cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/' cbf_ef_ic_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '_ef_ic/' + model_id + '/' cbr_ef_dir = '../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '_ef/' + model_id + '/' output_dir = '../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model_id + '/' output_ef_dir = '../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '_ef/' + model_id + '/' # select which pixels to submit os.chdir(cbf_dir) cbf_files = glob.glob('*.cbf') cbf_files.sort() os.chdir(cur_dir + '/../') if submit_ic_opt: txt_filename = 'ef_ic_assim_list_' + model_id + assim_type + '_MCMC' + mcmc_id + '_' + n_iter + '.txt' txt_file = open(txt_filename, 'w') for cbf_file in cbf_files: print(cbf_file) cbf_data = rwb.read_cbf_file(cbf_dir + cbf_file) cbf_pixel = cbf_file[-8:-4] if cbf_pixel in pixels_r: parpriors = np.concatenate( (retrieve_preds(cbf_pixel, opt_fs, suffix, misc_dir + 'env_filter_manual/par_preds/'), np.ones(50 - len(parnames)) * -9999.)) parpriorunc = np.concatenate( (np.ones(len(parnames)) * 1.001, np.ones(50 - len(parnames)) * -9999.)) # except ICs for ic_ind in ic_inds: parpriors[ic_ind] = -9999. parpriorunc[ic_ind] = -9999. # except NBE unc if nbe_optimization == 'ON': parpriors[len(parnames) - 1] = -9999. parpriorunc[len(parnames) - 1] = -9999. cbf_data['PARPRIORS'] = parpriors.reshape(-1, 1) cbf_data['PARPRIORUNC'] = parpriorunc.reshape(-1, 1) fp = cbf_file[:-9] + suffix.partition('fs')[0] + cbf_pixel fa = cbf_file[: -9] + '_MCMC' + mcmc_id + '_' + n_iter + suffix.partition( 'fs')[0] + 'assim_' + cbf_pixel rwb.CARDAMOM_WRITE_BINARY_FILEFORMAT( cbf_data, cbf_ef_ic_dir + fp + '.cbf') txt_file.write( '%sCARDAMOM_MDF.exe %s%s %s%s %s 0 %s 0.001 %s 1000' % (mdf_dir, cbf_ef_ic_dir[3:], fp + '.cbf', cbr_ef_dir, fa + '.cbr', n_iter, frac_save_out, mcmc_id)) txt_file.write('\n') txt_file.close() sh_file = open(txt_filename[:-3] + 'sh', 'w') autil.fill_in_sh(sh_file, array_size=len(pixels_r), n_hours=6, txt_file=txt_filename, combined=False) if submit_forward: txt_filename = 'ef_ic_forward_list_' + model_id + assim_type + '_MCMC' + mcmc_id + '_' + n_iter + '.txt' txt_file = open(txt_filename, 'w') for cbf_file in cbf_files: print(cbf_file) cbf_data = rwb.read_cbf_file(cbf_dir + cbf_file) cbf_pixel = cbf_file[-8:-4] if cbf_pixel in pixels_r: fa = cbf_file[: -9] + '_MCMC' + mcmc_id + '_' + n_iter + suffix.partition( 'fs')[0] + 'assim_' + cbf_pixel cbr_assim = rwb.read_cbr_file( glob.glob(cbr_ef_dir + fa + '.cbr')[0], {'nopars': len(parnames)}) ff = cbf_file[: -9] + '_MCMC' + mcmc_id + '_' + n_iter + suffix.partition( 'fs')[0] + 'forward_' + cbf_pixel cbr_forward = retrieve_preds( cbf_pixel, opt_fs, suffix, misc_dir + 'env_filter_manual/par_preds/') for ic_ind in ic_inds: cbr_forward[ic_ind] = np.nanmedian(cbr_assim[:, ic_ind]) cbr_forward = cbr_forward.reshape(1, len(parnames)) rwb.write_cbr_file(cbr_forward, cbr_ef_dir + ff + '.cbr') txt_file.write( '%sCARDAMOM_RUN_MODEL.exe %s%s %s%s %s%s %s%s %s%s %s%s' % (runmodel_dir, cbf_dir[3:], cbf_file, cbr_ef_dir, ff + '.cbr', output_ef_dir, 'fluxfile_' + ff + '.bin', output_ef_dir, 'poolfile_' + ff + '.bin', output_ef_dir, 'edcdfile_' + ff + '.bin', output_ef_dir, 'probfile_' + ff + '.bin')) txt_file.write('\n') txt_file.close() sh_file = open(txt_filename[:-3] + 'sh', 'w') autil.fill_in_sh(sh_file, array_size=len(pixels_r), n_hours=1, txt_file=txt_filename, combined=False) return
def main(): # get specifications for run to read model_ids = ['811','811','911','911'] assim_type = '_p25adapted' ens_size = 500 # get pixels, ids and number of iterations to read cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf'+assim_type+'/' + model_ids[0] + '/' pixels = ['3809','3524','2224','4170','1945','3813','4054','3264','1271','3457'] mcmc_ids = ['119','3','3','119'] n_iters = ['40000000','1000000','1000000','40000000'] nbe_mae, lai_mae, abgb_mae, gpp_mae = [], [], [], [] # run through pixels for pixel in pixels: # get that pixel's outputs for each MCMCID nbe_pred, lai_pred, abgb_pred, gpp_pred = [], [], [], [] for model_id, mcmc_id, n_iter in zip(model_ids, mcmc_ids, n_iters): # set directories cur_dir = os.getcwd() + '/' cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf'+assim_type+'/' + model_id + '/' cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr'+assim_type+'/' + model_id + '/' output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output'+assim_type+'/' + model_id + '/' plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/' parnames = autil.get_parnames('../../misc/', model_id) # read cbf file for that pixel cbf_pixel = rwb.read_cbf_file(glob.glob(cbf_dir + '*_' + pixel+'.cbf')[0]) # read obs and obs unc for that pixel nbe_obs, lai_obs, abgb_obs, sif_obs = cbf_pixel['OBS']['NBE'], cbf_pixel['OBS']['LAI'], cbf_pixel['OBS']['ABGB'], cbf_pixel['OBS']['GPP'] nbe_an_unc, nbe_seas_unc, lai_unc, abgb_unc = cbf_pixel['OBSUNC']['NBE']['annual_unc'], cbf_pixel['OBSUNC']['NBE']['seasonal_unc'], cbf_pixel['OTHER_OBS']['MLAI']['unc'], cbf_pixel['OBSUNC']['ABGB']['unc'] conv_chains_pkl = read_pickle(glob.glob(cbr_dir + model_id + assim_type + '*_MCMC'+mcmc_id + '_'+n_iter+'_best_subset.pkl')[0]) conv_chains_pkl.columns = ['pixel','bestchains','conv']# if model_id!='911' else ['pixel','bestchains'] #rename columns for easier access # grab cbrs corresponding to that pixel, MCMCID and number of iterations files = glob.glob(cbr_dir + '*MCMC'+mcmc_id+'_' + n_iter + '_'+ pixel+'*.cbr') files.sort() best_chains = conv_chains_pkl.loc[conv_chains_pkl['pixel']==pixel]['bestchains'].values[0][1:] # run through cbrs cbr_chain_list = [] for chain in best_chains: print(chain) # read cbr for one file and transform Bday, Fday file = [i for i in files if pixel+'_'+chain+'.cbr' in i][0] cbr_chain = autil.modulus_Bday_Fday(rwb.read_cbr_file(file, {'nopars': len(parnames)}), parnames) print(cbr_chain.shape) # read forward run for that cbr flux_chain = rwb.readbinarymat(output_dir + 'fluxfile_' + file.partition(cbr_dir)[-1][:-3]+'bin', [cbf_pixel['nodays'], autil.get_nofluxes_nopools_lma(model_id)[0]]) pool_chain = rwb.readbinarymat(output_dir + 'poolfile_' + file.partition(cbr_dir)[-1][:-3]+'bin', [cbf_pixel['nodays']+1, autil.get_nofluxes_nopools_lma(model_id)[1]]) # add chain to list for GR calculation if np.shape(cbr_chain)[0]==ens_size: cbr_chain_list.append(cbr_chain) # add forward run chain to aggregated matrix flux_pixel = np.copy(flux_chain) if best_chains.index(chain)==0 else np.concatenate((flux_pixel, flux_chain), axis=0) pool_pixel = np.copy(pool_chain) if best_chains.index(chain)==0 else np.concatenate((pool_pixel, pool_chain), axis=0) # compute gelman rubin if len(cbr_chain_list)>1: gr = autil.gelman_rubin(cbr_chain_list) print('%i of %i parameters converged' % (sum(gr<1.2), len(parnames))) else: gr = np.nan cbr_pixel = np.vstack(cbr_chain_list) print(pool_pixel.shape) print(cbr_pixel.shape) # nbe, lai, and abgb predictions at pixel # list with elements corresponding to MCMCIDs considered (e.g. first element is MCMCID 119) nbe_pred.append(autil.get_output('NBE', model_id, flux_pixel, pool_pixel, cbr_pixel, autil.get_nofluxes_nopools_lma(model_id)[2])) lai_pred.append(autil.get_output('LAI', model_id, flux_pixel, pool_pixel, cbr_pixel, autil.get_nofluxes_nopools_lma(model_id)[2])[:,:-1]) abgb_pred.append(autil.get_output('ABGB', model_id, flux_pixel, pool_pixel, cbr_pixel, autil.get_nofluxes_nopools_lma(model_id)[2])[:,:-1]) gpp_pred.append(autil.get_output('GPP', model_id, flux_pixel, pool_pixel, cbr_pixel, autil.get_nofluxes_nopools_lma(model_id)[2])) # plot time series lbls = [model_id+'_MCMC'+mcmc_id for model_id, mcmc_id in zip(model_ids, mcmc_ids)] plot_output_ts(cbf_pixel, nbe_pred, nbe_obs, nbe_an_unc, lbls=lbls, var='NBE', savepath=cur_dir+plot_dir+'demcmc_mcmc/', title='all_models'+'_NBE_'+pixel+'.png') plot_output_ts(cbf_pixel, lai_pred, lai_obs, lai_unc, lbls=lbls, var='LAI', savepath=cur_dir+plot_dir+'demcmc_mcmc/', title='all_models'+'_LAI_'+pixel+'.png') plot_output_ts(cbf_pixel, gpp_pred, sif_obs, 0, lbls=lbls, var='GPP', savepath=cur_dir+plot_dir+'demcmc_mcmc/', title='all_models'+'_GPP_'+pixel+'.png') # plot box plots plot_dist_compare(nbe_pred, nbe_obs, [nbe_an_unc, nbe_seas_unc], lbls=lbls, var='NBE', savepath=cur_dir+plot_dir+'demcmc_mcmc/', title='all_models'+'_NBE_'+pixel+'_dist_') plot_dist_compare(lai_pred, lai_obs, lai_unc, lbls=lbls, var='LAI', savepath=cur_dir+plot_dir+'demcmc_mcmc/', title='all_models'+'_LAI_'+pixel+'_dist_') plot_dist_compare(abgb_pred, abgb_obs, abgb_unc, lbls=lbls, var='ABGB', savepath=cur_dir+plot_dir+'demcmc_mcmc/', title='all_models'+'_ABGB_'+pixel+'_dist_') # plot obs vs median comparison nbe_mae.append([mae_real_numbers_only(f, nbe_obs)[0] for f in nbe_pred]) lai_mae.append([mae_real_numbers_only(f, lai_obs)[0] for f in lai_pred]) abgb_mae.append([mae_real_numbers_only(f, abgb_obs)[0] for f in abgb_pred]) print(rank_mae(nbe_mae, lbls)) print(rank_mae(lai_mae, lbls)) print(rank_mae(abgb_mae, lbls)) plot_maes(nbe_mae, pixels, savepath=cur_dir+plot_dir+'demcmc_mcmc/', title='all_models_NBE_mae') plot_maes(lai_mae, pixels, savepath=cur_dir+plot_dir+'demcmc_mcmc/', title='all_models_LAI_mae') plot_maes(abgb_mae, pixels, savepath=cur_dir+plot_dir+'demcmc_mcmc/', title='all_models_ABGB_mae') return
def main(): ### set specifications model_id = sys.argv[1] run_type = 'ALL' mcmc_id = '119' n_iter = '40000000' ens_size = 500 assim_type = '_longadapted' ### set directories cur_dir = os.getcwd() + '/' cbf_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbf'+assim_type+'/' + model_id + '/' cbr_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbr'+assim_type+'/' + model_id + '/' output_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/output'+assim_type+'/' + model_id + '/' plot_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/plots/' parnames = autil.get_parnames(cur_dir + '../../misc/', model_id) # get list of cbfs os.chdir(cbf_dir) cbf_files = glob.glob('*.cbf') cbf_files.sort() os.chdir(cur_dir) # initialize lists of pixel names and rmses pixels_plot = [] nbe_rmse, lai_rmse = [], [] for cbf_file in cbf_files: print(cbf_file, cbf_files.index(cbf_file)) cbf_pixel = rwb.read_cbf_file(cbf_dir + cbf_file) pixel = cbf_file[-8:-4] cbr_files = glob.glob(cbr_dir + '*MCMC'+mcmc_id+'_'+n_iter+'_' + pixel + '_*.cbr') cbr_files = sorted(cbr_files, key=lambda x: int(x.partition(pixel+'_')[-1].partition('.cbr')[0])) # get all possible XX member combinations of cbr files n_chains_to_converge = 4 cbr_files_all_subsets = [list(i) for i in itertools.combinations(cbr_files, n_chains_to_converge)] continue_check = True for subset in cbr_files_all_subsets: if continue_check: # read parameters and compute gelman rubin cbr_chain_list = [] for cbr_file in subset: cbr_chain = rwb.read_cbr_file(cbr_file, {'nopars': len(parnames)}) cbr_chain = autil.modulus_Bday_Fday(cbr_chain, parnames) if np.shape(cbr_chain)[0]==ens_size: cbr_chain_list.append(cbr_chain) if len(cbr_chain_list)>1: gr = autil.gelman_rubin(cbr_chain_list) if sum(gr<1.2)/len(parnames)>=0.9: continue_check = False cbr_agg = np.vstack(cbr_chain_list) pixels_plot.append(pixel) best_subset = subset.copy() else: gr = np.nan # if there is a convergent subset, read fluxes and pools if not continue_check: convergent_chain_nums = [el.partition('.cbr')[0].partition(pixel)[-1][1:] for el in best_subset] convergent_files = [el.partition('.cbr')[0].partition(model_id+'/')[-1] for el in best_subset] flux_pixel = [] pool_pixel = [] for filename in convergent_files: flux_chain = rwb.readbinarymat(output_dir + 'fluxfile_' + filename+'.bin', [cbf_pixel['nodays'], autil.get_nofluxes_nopools_lma(model_id)[0]]) pool_chain = rwb.readbinarymat(output_dir + 'poolfile_' + filename+'.bin', [cbf_pixel['nodays']+1, autil.get_nofluxes_nopools_lma(model_id)[1]]) if (flux_chain.shape[0]==ens_size) & (pool_chain.shape[0]==ens_size): flux_pixel.append(flux_chain) pool_pixel.append(pool_chain) nbe_pred = autil.get_output('NBE', model_id, np.vstack(flux_pixel), np.vstack(pool_pixel), cbr_agg, autil.get_nofluxes_nopools_lma(model_id)[2]) lai_pred = autil.get_output('LAI', model_id, np.vstack(flux_pixel), np.vstack(pool_pixel), cbr_agg, autil.get_nofluxes_nopools_lma(model_id)[2]) nbe_obs, lai_obs = cbf_pixel['OBS']['NBE'], cbf_pixel['OBS']['LAI'] nbe_rmse.append(rmse_real_numbers_only(nbe_pred, nbe_obs)) lai_rmse.append(rmse_real_numbers_only(lai_pred, lai_obs)) print(rmse_real_numbers_only(nbe_pred, nbe_obs), rmse_real_numbers_only(lai_pred, lai_obs)) autil.plot_map(nrows=46, ncols=73, land_pixel_list=[file[-8:-4] for file in glob.glob(cbf_dir + '*.cbf')], pixel_value_list=pixels_plot, value_list=nbe_rmse, savepath=plot_dir+'maps/', savename='rmse_nbe_' + model_id + assim_type+ '_MCMC' + mcmc_id + '_' + n_iter) autil.plot_map(nrows=46, ncols=73, land_pixel_list=[file[-8:-4] for file in glob.glob(cbf_dir + '*.cbf')], pixel_value_list=pixels_plot, value_list=lai_rmse, savepath=plot_dir+'maps/', savename='rmse_lai_' + model_id + assim_type+ '_MCMC' + mcmc_id + '_' + n_iter) rmse_df = DataFrame(list(zip(pixels_plot, nbe_rmse, lai_rmse))) rmse_df.columns = ['pixel','nbe_rmse','lai_rmse'] rmse_df.to_pickle(cur_dir + '../../misc/rmse_' + model_id + assim_type+ '_MCMC' + mcmc_id + '_' + n_iter + '.pkl') ################################################################################################################################################################# # analyze regionally '''region_mask = Dataset(cur_dir + '../../misc/fourregion_maskarrays.nc') region_mask.set_auto_mask(False) regionmat, lat, lon = region_mask['4region'][:], region_mask['lat'][:], region_mask['lon'][:] lat[0] = -90 lat[-1] = 90 model_ids = ['811', '911'] rmse_dfs = [] for model_id in model_ids: rmse_df = read_pickle(cur_dir + '../../misc/rmse_' + model_id + assim_type+ '_MCMC' + mcmc_id + '_' + n_iter + '.pkl') rmse_df.columns = ['pixel','nbe_rmse','lai_rmse'] regions = [] for pixel in rmse_df[rmse_df.columns[0]].tolist(): pixlat, pixlon = rwb.rowcol_to_latlon(pixel) regions.append(regionmat[np.argwhere(lat==pixlat)[0][0], np.argwhere(lon==pixlon)[0][0]]) rmse_df.insert(loc=1, column='region', value=regions) rmse_dfs.append(rmse_df) print(rmse_dfs[0].groupby('region')['nbe_rmse'].mean(), rmse_dfs[0].groupby('region')['lai_rmse'].mean()) print(rmse_dfs[1].groupby('region')['nbe_rmse'].mean(), rmse_dfs[1].groupby('region')['lai_rmse'].mean())''' return
def main(): model_id = sys.argv[1] run_type = sys.argv[2] # ALL or SUBSET mcmc_id = sys.argv[3] # 119 for normal, 3 for DEMCMC nbe_optimization = sys.argv[4] # 'OFF' or 'ON' assim_type = '_p25adapted' cur_dir = os.getcwd() + '/' mdf_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_MDF/' if nbe_optimization == 'OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_MDF/' runmodel_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_GENERAL/' if nbe_optimization == 'OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_GENERAL/' cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/' cbr_dir = '../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id + '/' output_dir = '../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model_id + '/' plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/' parnames = autil.get_parnames('../../misc/', model_id) n_iterations = sys.argv[5] runtime_assim = int(sys.argv[6]) resubmit_num = sys.argv[7] n_chains_resubmit = 4 ens_size = 500 if mcmc_id == '119': frac_save_out = str(int(int(n_iterations) / 500)) elif mcmc_id == '3': frac_save_out = str(int( int(n_iterations) / 500 * 100)) # n_iterations/ frac_save_out * 100 will be ensemble size # select which pixels to submit os.chdir(cbf_dir) if run_type == 'ALL': cbf_files = glob.glob('*.cbf') elif run_type == 'SUBSET_RANDOM': cbf_files = sample(glob.glob('*.cbf'), 10) elif run_type == 'SUBSET_INPUT': cbf_files = select_cbf_files(glob.glob('*.cbf'), [ '3809', '3524', '2224', '4170', '1945', '3813', '4054', '3264', '1271', '3457' ]) os.chdir(cur_dir + '/../') cbf_files.sort() # create one combined submission file with all assimilation and forward commands for each pixel's chain on one line txt_filename = 'combined_assim_forward_list_' + model_id + '_' + run_type + assim_type + '_MCMC' + mcmc_id + '_' + n_iterations + '_resubmit' + resubmit_num + '.txt' txt_file = open(txt_filename, 'w') resubmit_count = 0 gr_pixels = np.zeros( len(cbf_files)) * np.nan # list of GR for each pixel, for mapping pixels = [] best_subset = [] conv_bool_lst = [] for cbf_file in cbf_files: best_subset_pixel = [] resubmit = False print(cbf_file, cbf_files.index(cbf_file)) cbf_pixel = rwb.read_cbf_file(cur_dir + cbf_dir + cbf_file) pixel = cbf_file[-8:-4] cbr_files = glob.glob(cur_dir + '../' + cbr_dir + '*MCMC' + mcmc_id + '_' + n_iterations + '_' + pixel + '_*.cbr') cbr_files = sorted( cbr_files, key=lambda x: int( x.partition(pixel + '_')[-1].partition('.cbr')[0])) if len(cbr_files) >= n_chains_resubmit: pixels.append(pixel) #cbr_files = cbr_files[:16] ############ TEMP if len(cbr_files) > 0: end_chain = int( cbr_files[-1].partition(pixel + '_')[-1].partition('.cbr')[0]) #print('ENDCHAIN: '+str(end_chain)) else: end_chain = 0 resubmit = True # get all possible XX member combinations of cbr files n_chains_to_converge = n_chains_resubmit cbr_files_all_subsets = [ list(i) for i in itertools.combinations(cbr_files, n_chains_to_converge) ] continue_check = True for subset in cbr_files_all_subsets: if continue_check: # read parameters and compute gelman rubin cbr_chain_list = [] chain_nums = ['0'] for cbr_file in subset: #print(cbr_file[-10:-4]) cbr_chain = rwb.read_cbr_file(cbr_file, {'nopars': len(parnames)}) cbr_chain = autil.modulus_Bday_Fday(cbr_chain, parnames) chain_nums.append( cbr_file.partition('.cbr')[0].partition(pixel + '_') [-1]) # append chain number if np.shape(cbr_chain)[0] == ens_size: cbr_chain_list.append(cbr_chain) #print(np.shape(cbr_chain)) else: print('incorrect ensemble size)') resubmit = True if len(cbr_chain_list) > 1: gr = autil.gelman_rubin(cbr_chain_list) #print(gr) print( '%i/%i' % (sum(gr < 1.2), len(parnames)) ) #print('%i of %i parameters converged' % (sum(gr<1.2), len(parnames))) if (np.isnan(gr_pixels[cbf_files.index(cbf_file)])): gr_pixels[cbf_files.index(cbf_file)] = sum( gr < 1.2) / len(parnames) #if len(cbr_files_all_subsets)==1: best_subset_pixel.append(chain_nums) if sum(gr < 1.2) / len(parnames) < 0.9: #print('gr too low') resubmit = True if (sum(gr < 1.2) / len(parnames) >= gr_pixels[cbf_files.index(cbf_file)]): gr_pixels[cbf_files.index(cbf_file)] = sum( gr < 1.2) / len(parnames) best_subset_pixel.append(chain_nums) conv_bool = 0 else: resubmit = False continue_check = False gr_pixels[cbf_files.index(cbf_file)] = sum( gr < 1.2) / len(parnames) best_subset_pixel.append(chain_nums) conv_bool = 1 else: gr = np.nan print('gr undefined') best_subset_pixel.append(chain_nums) conv_bool = 0 resubmit = True if len(best_subset_pixel) > 0: best_subset.append(best_subset_pixel[-1]) conv_bool_lst.append(conv_bool) # write into text file if pixel needs to be resubmitted if resubmit: first_resubmit_chain = end_chain + 1 last_resubmit_chain = end_chain + n_chains_resubmit for chain in range(first_resubmit_chain, last_resubmit_chain + 1): c = '_' + str(chain) txt_file.write( '%sCARDAMOM_MDF.exe %s%s %s%s %s 0 %s 0.001 %s 1000' % (mdf_dir, cbf_dir[3:], cbf_file, cbr_dir, cbf_file[:-8] + 'MCMC' + mcmc_id + '_' + n_iterations + '_' + cbf_file[-8:-4] + c + '.cbr', n_iterations, frac_save_out, mcmc_id)) txt_file.write( ' && %sCARDAMOM_RUN_MODEL.exe %s%s %s%s %s%s %s%s %s%s %s%s' % (runmodel_dir, cbf_dir[3:], cbf_file, cbr_dir, cbf_file[:-8] + 'MCMC' + mcmc_id + '_' + n_iterations + '_' + cbf_file[-8:-4] + c + '.cbr', output_dir, 'fluxfile_' + cbf_file[:-8] + 'MCMC' + mcmc_id + '_' + n_iterations + '_' + cbf_file[-8:-4] + c + '.bin', output_dir, 'poolfile_' + cbf_file[:-8] + 'MCMC' + mcmc_id + '_' + n_iterations + '_' + cbf_file[-8:-4] + c + '.bin', output_dir, 'edcdfile_' + cbf_file[:-8] + 'MCMC' + mcmc_id + '_' + n_iterations + '_' + cbf_file[-8:-4] + c + '.bin', output_dir, 'probfile_' + cbf_file[:-8] + 'MCMC' + mcmc_id + '_' + n_iterations + '_' + cbf_file[-8:-4] + c + '.bin')) txt_file.write( ' && ') if chain < last_resubmit_chain else txt_file.write( '\n') resubmit_count += 1 txt_file.close() sh_file = open(txt_filename[:-3] + 'sh', 'w') autil.fill_in_sh(sh_file, array_size=resubmit_count, n_hours=runtime_assim, txt_file=txt_filename, combined=True) autil.plot_map(nrows=46, ncols=73, land_pixel_list=pixels, pixel_value_list=pixels, value_list=gr_pixels * 100, savepath=cur_dir + plot_dir + 'maps/', savename='gr_' + model_id + assim_type + '_' + run_type + '_MCMC' + mcmc_id + '_' + n_iterations + '_resubmit' + resubmit_num) #print(pixels, best_subset, conv_bool_lst) print(len(pixels), len(best_subset), len(conv_bool_lst)) DataFrame(list( zip(pixels, best_subset, conv_bool_lst))).to_pickle(cur_dir + '../' + cbr_dir + model_id + assim_type + '_' + run_type + '_MCMC' + mcmc_id + '_' + n_iterations + '_best_subset.pkl') return
def main(): # get specifications for run to read model_ids = ['811','811'] assim_type = '_p25adapted' ens_size = 500 mcmc_ids = ['119','3'] n_iters = ['40000000','1000000'] # set directories cur_dir = os.getcwd() + '/' plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/' n_pixels = 928 demcmc_pred, mcmc_pred = [np.ones(34)*np.nan for i in range(n_pixels)], [np.ones(34)*np.nan for i in range(n_pixels)] # run through pixels for mcmc_id, n_iter, model_id in zip(mcmc_ids, n_iters, model_ids): # get list of directories cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf'+assim_type+'/' + model_id + '/' cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr'+assim_type+'/' + model_id + '/' output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output'+assim_type+'/' + model_id + '/' parnames = autil.get_parnames('../../misc/', model_id) # get list of pixels pixels = [cbf[-8:-4] for cbf in glob.glob(cbf_dir + '*.cbf')] pixels.sort() # get best chains conv_chains = read_pickle(cbr_dir + model_id + assim_type + '_ALL' + '_MCMC'+mcmc_id + '_'+n_iter+'_best_subset.pkl') conv_chains.columns = ['pixel','bestchains','conv'] #rename columns for easier access for pixel in pixels: ind = pixels.index(pixel) if (len(glob.glob(cbr_dir + '*MCMC'+mcmc_id+'_'+n_iter+'_' + pixel + '*.cbr'))>0) & (pixel in conv_chains['pixel'].values): # read cbf file for that pixel cbf_pixel = rwb.read_cbf_file(glob.glob(cbf_dir + '*_' + pixel+'.cbf')[0]) # grab cbrs corresponding to that pixel, MCMCID and number of iterations cbr_files = glob.glob(cbr_dir + '*MCMC'+mcmc_id+'_' + n_iter + '_'+ pixel+'*.cbr') cbr_files.sort() # run through cbrs best_chains = conv_chains.loc[conv_chains['pixel']==pixel]['bestchains'].values[0][1:] print(pixel, best_chains) cbr_data = [] conv = conv_chains.loc[conv_chains['pixel']==pixel]['conv'].values[0] if conv==1: # aggregate bestchains from optimal posteriors for chain in best_chains: file = [i for i in cbr_files if pixel+'_'+chain+'.cbr' in i][0] cbr_data.append(autil.modulus_Bday_Fday(rwb.read_cbr_file(file, {'nopars': len(parnames)}), parnames)) cbr_data = np.vstack(cbr_data) else: cbr_data = np.ones((ens_size, len(parnames)))*np.nan if mcmc_id=='119': mcmc_pred[ind] = np.nanmedian(cbr_data, axis=0) elif mcmc_id=='3': demcmc_pred[ind] = np.nanmedian(cbr_data, axis=0) plot_scatter_compare(demcmc_pred, mcmc_pred, parnames, cur_dir+plot_dir+'demcmc_mcmc/', 'par_compare_811') return
def main(): model_id = sys.argv[1] mcmc_id = sys.argv[2] # 119 for normal, 3 for DEMCMC n_iter = sys.argv[3] ens_size = 500 assim_type = '_p25adapted' # EF comparison ef_spec = 'clipped_PLS_soilgrids_poolobs_rescaled_forward' # directories cur_dir = os.getcwd() + '/' cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/' cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id + '/' cbr_ef_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '_ef/' + model_id + '/' output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model_id + '/' output_ef_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '_ef/' + model_id + '/' plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/' parnames = autil.get_parnames('../../misc/', model_id) # get cbfs to run through os.chdir(cbf_dir) cbf_files = glob.glob('*.cbf') cbf_files.sort() os.chdir(cur_dir + '/../') opt_preds = np.zeros((len(cbf_files), len(parnames))) * np.nan ef_preds = np.zeros((len(cbf_files), len(parnames))) * np.nan for cbf_file in cbf_files: pixel = cbf_file[-8:-4] print(pixel) pixel_chains_opt = autil.find_all_chains( glob.glob(cbr_dir + '*_MCMC' + mcmc_id + '_' + n_iter + '_' + pixel + '*.cbr'), pixel) pixel_chains_opt.sort() # filenames pixel_chains_ef = autil.find_all_chains( glob.glob(cbr_ef_dir + '*_MCMC' + mcmc_id + '_' + n_iter + '_' + ef_spec + '_' + pixel + '.cbr'), pixel) pixel_chains_ef.sort() for pc_opt in pixel_chains_opt: cbr_chain_opt = rwb.read_cbr_file(pc_opt, {'nopars': len(parnames)}) cbr_chain_opt = autil.modulus_Bday_Fday(cbr_chain_opt, parnames) cbr_pixel_opt = np.copy(cbr_chain_opt) if pixel_chains_opt.index( pc_opt) == 0 else np.concatenate( (cbr_pixel_opt, cbr_chain_opt), axis=0) for pc_ef in pixel_chains_ef: cbr_chain_ef = rwb.read_cbr_file(pc_ef, {'nopars': len(parnames)}) cbr_chain_ef = autil.modulus_Bday_Fday(cbr_chain_ef, parnames) cbr_pixel_ef = np.copy(cbr_chain_ef) if pixel_chains_ef.index( pc_ef) == 0 else np.concatenate( (cbr_pixel_ef, cbr_chain_ef), axis=0) opt_preds[cbf_files.index(cbf_file), :] = np.nanmedian(cbr_pixel_opt, axis=0) ef_preds[cbf_files.index(cbf_file), :] = np.nanmedian(cbr_pixel_ef, axis=0) plot_scatter_compare(ef_preds, opt_preds, parnames, plot_dir + 'scatters/', model_id + '_MCMC' + mcmc_id + '_' + n_iter) return
def main(): model_id = sys.argv[1] run_type = sys.argv[2] # ALL or SUBSET mcmc_id = sys.argv[3] # 119 for normal, 3 for DEMCMC n_iter = sys.argv[4] ens_size = 500 assim_type = '_p25adapted' use_bestchains_pkl = False cur_dir = os.getcwd() + '/' cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf'+assim_type+'_ef_ic/' + model_id + '/' cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr'+assim_type+'_ef/' + model_id + '/' output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output'+assim_type+'_ef/' + model_id + '/' plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/' parnames = autil.get_parnames('../../misc/', model_id) # load list of land pixels pixels = list(set([file[-8:-4] for file in glob.glob(cbf_dir + '*.cbf')])) if run_type=='ALL' else ['3809','3524','2224','4170','1945','3813','4054','3264','1271','3457'] pixels.sort() # load list of cbrs cbr_files = glob.glob(cbr_dir+'*MCMC'+mcmc_id+'_'+n_iter+'_*PLS*forward*.cbr') # for loop over pixels gr_pixels = np.zeros(len(pixels))*np.nan # list of GR for each pixel, for mapping par_pixels = np.zeros((len(pixels), len(parnames)))*np.nan for pixel in pixels: print(pixel, pixels.index(pixel)) pixel_chains = autil.find_all_chains(cbr_files, pixel) pixel_chains.sort() # filenames if use_bestchains_pkl: conv_chains_pkl = read_pickle(glob.glob(cbr_dir + model_id + assim_type + '*_MCMC'+mcmc_id + '_'+n_iter+'_best_subset.pkl')[0]) conv_chains_pkl.columns = ['pixel','bestchains','conv'] #rename columns for easier access if pixel in conv_chains_pkl['pixel'].values: bestchains = conv_chains_pkl.loc[conv_chains_pkl['pixel']==pixel]['bestchains'].values[0][1:] print(bestchains) pixel_chains = [pixel_chain for pixel_chain in pixel_chains if pixel_chain.partition(pixel+'_')[-1][:-4] in bestchains] else: continue #cbf_pixel = rwb.read_cbf_file(cur_dir + cbf_dir + pixel_chains[0].partition('_MCMC')[0]+'_'+pixel+'.cbf') cbf_filename = glob.glob(cur_dir + cbf_dir + '*'+pixel+'.cbf')[0] cbf_pixel = rwb.read_cbf_file(cbf_filename) cbr_chain_list = [] for pixel_chain in pixel_chains: print(pixel_chain) cbr_chain = rwb.read_cbr_file(pixel_chain, {'nopars': len(parnames)}) cbr_chain = autil.modulus_Bday_Fday(cbr_chain, parnames) cbr_pixel = np.copy(cbr_chain) if pixel_chains.index(pixel_chain)==0 else np.concatenate((cbr_pixel, cbr_chain), axis=0) #autil.plot_par_histograms(cbr_chain, parnames=parnames, savepath=cur_dir+plot_dir+'dists/', title=model_id+'_'+pixel_chain[:-3]+'png') try: flux_chain = rwb.readbinarymat(cur_dir + output_dir + 'fluxfile_' + pixel_chain.partition(cbr_dir)[-1][:-3]+'bin', [cbf_pixel['nodays'], autil.get_nofluxes_nopools_lma(model_id)[0]]) pool_chain = rwb.readbinarymat(cur_dir + output_dir + 'poolfile_' + pixel_chain.partition(cbr_dir)[-1][:-3]+'bin', [cbf_pixel['nodays']+1, autil.get_nofluxes_nopools_lma(model_id)[1]]) #autil.plot_flux_pool_timeseries(cbf_pixel, cbr_chain, flux_chain, pool_chain, autil.get_nofluxes_nopools_lma(model_id)[2], savepath=cur_dir+plot_dir+'timeseries/', title=model_id+'_'+pixel_chain[:-3]+'png') flux_pixel = np.copy(flux_chain) if pixel_chains.index(pixel_chain)==0 else np.concatenate((flux_pixel, flux_chain), axis=0) pool_pixel = np.copy(pool_chain) if pixel_chains.index(pixel_chain)==0 else np.concatenate((pool_pixel, pool_chain), axis=0) except Exception as e: pass if np.shape(cbr_chain)[0]==ens_size: cbr_chain_list.append(cbr_chain) #print(np.shape(cbr_chain)) if len(cbr_chain_list)>1: gr = autil.gelman_rubin(cbr_chain_list) #print(gr) print('%i of %i parameters converged' % (sum(gr<1.2), len(parnames))) gr_pixels[pixels.index(pixel)] = sum(gr<1.2)/len(parnames) else: gr = np.nan par_pixels[pixels.index(pixel),:] = np.nanmedian(cbr_pixel, axis=0) #autil.plot_par_histograms(cbr_pixel, parnames=parnames, savepath=cur_dir+plot_dir+'dists/', title=model_id+assim_type+'_MCMC'+mcmc_id+'_'+cbf_filename.partition(cbf_dir)[-1][:-4]+'.png') #autil.plot_flux_pool_timeseries(cbf_pixel, cbr_pixel, flux_pixel, pool_pixel, autil.get_nofluxes_nopools_lma(model_id)[2], savepath=cur_dir+plot_dir+'timeseries/', title=model_id+assim_type+'_MCMC'+mcmc_id+'_'+cbf_filename.partition(cbf_dir)[-1][:-4]+'.png') #vmax = [None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,210,200,215,6600,195,24000,None,None,None,900,None,None,None,None,None,None,None] #np.nanpercentile(par_pixels[:,par], 90) for par in range(len(parnames)): autil.plot_map(nrows=46, ncols=73, land_pixel_list=[file[-8:-4] for file in glob.glob(cur_dir + cbf_dir + '*.cbf')], pixel_value_list=pixels, value_list=par_pixels[:,par], vmax=np.nanpercentile(par_pixels[:,par], 90), savepath=cur_dir+plot_dir+'maps/', savename='par'+str(par)+'_' + model_id +assim_type+ '_MCMC' + mcmc_id +'_'+ n_iter+'_EF_clipped_PLS_soilgrids_poolobs_rescaled_forward') #autil.plot_map(nrows=46, ncols=73, land_pixel_list=[file[-8:-4] for file in glob.glob(cur_dir + cbf_dir + '*.cbf')], pixel_value_list=pixels, value_list=np.ones(len(pixels)), savepath=cur_dir+plot_dir+'maps/', title='test_pixels.png') #autil.plot_map(nrows=46, ncols=73, land_pixel_list=[file[-8:-4] for file in glob.glob(cur_dir + cbf_dir + '*.cbf')], pixel_value_list=pixels, value_list=gr_pixels*100, savepath=cur_dir+plot_dir+'maps/', savename='gr_' + model_id + assim_type+ '_' +run_type+ '_MCMC' + mcmc_id + '_' + n_iter) return