def main(): cur_dir = os.getcwd() + '/' plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/' os.chdir(plot_dir + 'dists/') # get list of model ids models_full = list(set([el.split('_')[0] for el in glob.glob('*.png')])) # remove 101, temporary until 102-->101 models_full.remove('102') os.chdir(cur_dir) # set lists of variables and pixels vrs = [ 'NBE', 'cumNBE', 'LAI', 'GPP', 'Reco', 'Rauto', 'Rhet', 'lit', 'root', 'som', 'wood' ] pixels = [ '3809', '3524', '2224', '4170', '1945', '3813', '4054', '3264', '1271', '3457' ] # set MCMC ID mcmc_id = sys.argv[1] n_iter = sys.argv[2] assim_type = '_longadapted' nmodels_leave_out = sys.argv[3] models = random.sample(models_full, len(models_full) - int(nmodels_leave_out)) print(models) # dataframe will hold model structural uncertainty (Ms) and model parametric uncertainty (Mp) for each pixel-var combination # n is number of models that make up the suite partitioning = DataFrame(columns={'Ms', 'Mp', 'n'}) df_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/processed_df/' for var in vrs: print('Variable: ' + var) Mp_pixels = np.zeros( len(pixels)) * np.nan # list of Mp for each pixel, for mapping for pixel in pixels: print('Pixel: ' + pixel) nsteps = 228 if assim_type == '_longadapted' else 240 meds, ub, lb = np.zeros((len(models), nsteps)) * np.nan, np.zeros( (len(models), nsteps) ) * np.nan, np.zeros( (len(models), nsteps) ) * np.nan # medians, upper bounds, lower bounds of prediction through time Mp, n = 0, 0 for model in models: print(model) cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model + '/' cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model + '/' output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model + '/' parnames = autil.get_parnames(cur_dir + '../../misc/', model) os.chdir(cur_dir + cbr_dir) #files = set(glob.glob('*.cbr')) - set(glob.glob('*MCMC'+mcmc_id+'*.cbr')) #files = glob.glob('*MCMC'+mcmc_id+'*.cbr') files = set( glob.glob('*MCMC' + mcmc_id + '_' + n_iter + '_*.cbr')) pixel_chains = autil.find_all_chains( files, pixel ) # list of files corresponding to each chain at that pixel, e.g. 2224_1, 2224_2, 2224_3, 2222_4 pixel_chains.sort() n_chains = len(pixel_chains) if n_chains > 0: cbf_pixel = rwb.read_cbf_file( cur_dir + cbf_dir + pixel_chains[0].partition('_MCMC')[0] + '_' + pixel + '.cbf') cbr_chain_list = [] for pixel_chain in pixel_chains: print(pixel_chain) cbr_chain = rwb.read_cbr_file( pixel_chain, {'nopars': len(parnames) }) # cbr file for one chain cbr_chain_list.append( cbr_chain ) # list of separate cbrs for each chain, use for gelman rubin cbr_pixel = np.copy(cbr_chain) if pixel_chains.index( pixel_chain) == 0 else np.concatenate( (cbr_pixel, cbr_chain), axis=0) # concatenate all chain cbrs #autil.plot_par_histograms(cbr_chain, parnames=parnames, savepath=cur_dir+plot_dir+'dists/', title=model+'_'+pixel_chain[:-3]+'png') flux_chain = rwb.readbinarymat( cur_dir + output_dir + 'fluxfile_' + pixel_chain[:-3] + 'bin', [ cbf_pixel['nodays'], autil.get_nofluxes_nopools_lma(model)[0] ]) pool_chain = rwb.readbinarymat( cur_dir + output_dir + 'poolfile_' + pixel_chain[:-3] + 'bin', [ cbf_pixel['nodays'] + 1, autil.get_nofluxes_nopools_lma(model)[1] ]) #autil.plot_flux_pool_timeseries(cbf_pixel, cbr_chain, flux_chain, pool_chain, autil.get_nofluxes_nopools_lma(model)[2], savepath=cur_dir+plot_dir+'timeseries/', title=model+'_'+pixel_chain[:-3]+'png') flux_pixel = np.copy(flux_chain) if pixel_chains.index( pixel_chain) == 0 else np.concatenate( (flux_pixel, flux_chain), axis=0) # concatenate all chain flux outputs pool_pixel = np.copy(pool_chain) if pixel_chains.index( pixel_chain) == 0 else np.concatenate( (pool_pixel, pool_chain), axis=0) # concatenate all chain pool outputs gr = autil.gelman_rubin( cbr_chain_list) # gelman rubin function from matt gr_thresh = 1.2 # below this value parameters are assumed to be convergent print('%i of %i parameters converged with GR<%.1f' % (sum(gr < gr_thresh), len(parnames), gr_thresh)) #autil.plot_par_histograms(cbr_pixel, parnames=parnames, savepath=cur_dir+plot_dir+'dists/', title=model+'_'+pixel_chain[:-6]+'.png') #autil.plot_flux_pool_timeseries(cbf_pixel, cbr_pixel, flux_pixel, pool_pixel, autil.get_nofluxes_nopools_lma(model)[2], savepath=cur_dir+plot_dir+'timeseries/', title=model+'_'+pixel_chain[:-6]+'.png') if (sum(gr < gr_thresh) / len(parnames) < .9 ): # don't include nonconvergent runs in analysis continue else: fwd_data = autil.get_output( var, model, flux_pixel, pool_pixel, cbr_pixel, autil.get_nofluxes_nopools_lma( model)[2]) # get forward data for var if len(fwd_data) > 0: if fwd_data.shape[1] > nsteps: fwd_data = fwd_data[:, :-1] fwd_data = autil.remove_outliers(fwd_data) # fill medians, upper bounds, and lower bounds meds[models.index(model), :] = np.nanmedian( fwd_data, axis=0) ub[models.index(model), :] = np.nanpercentile( fwd_data, 75, axis=0) lb[models.index(model), :] = np.nanpercentile( fwd_data, 25, axis=0) fwd_data = autil.remove_below_25_above_75( fwd_data ) # set values outside of 25th-75th range to nan Mp += np.nanvar( fwd_data, axis=0 ) # sum of intra-ensemble variance, only compute on 25th-75th n += 1 Ms = np.nanvar(meds, axis=0) # inter-median variance Mp = Mp / n if n != 0 else float('nan') Ms_div_sum = Ms / (Ms + Mp) Mp_div_sum = Mp / (Ms + Mp) partitioning.loc[pixel + '_' + var] = { 'Ms': np.nanmean(Ms_div_sum), 'Mp': np.nanmean(Mp_div_sum), 'n': n } Mp_pixels[pixels.index(pixel)] = np.nanmean(Mp_div_sum) print(partitioning.to_string()) partitioning.sort_index( axis=1).to_pickle(cur_dir + df_dir + 'summary' + assim_type + '_MCMC' + mcmc_id + '_' + date.today().strftime("%m%d%y") + '_' + str(len(models)) + '.pkl') return
def main(): combinations = [['811', '119', '40000000'], ['811', '3', '1000000'], ['911', '119', '40000000']] assim_type = '_longadapted' metric = sys.argv[1] vrs = [ 'NBE', 'cumNBE', 'LAI', 'GPP', 'Reco', 'Rauto', 'Rhet', 'lit', 'root', 'som', 'wood' ] pixels = [ '3809', '3524', '2224', '4170', '1945', '3813', '4054', '3264', '1271', '3457' ] ens_spread = np.ones( (len(pixels), len(vrs), len(combinations))) * float('nan') conv = np.ones((len(pixels), len(combinations))) * float('nan') cur_dir = os.getcwd() + '/' for pixel in pixels: comb_count = 0 for comb in combinations: model_id = comb[0] mcmc_id = comb[1] it = comb[2] cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/' cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id + '/' output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model_id + '/' plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/' parnames = autil.get_parnames(cur_dir + '../../misc/', model_id) os.chdir(cur_dir + cbr_dir) files = glob.glob('*MCMC' + mcmc_id + '_' + it + '_' + pixel + '*.cbr') pixel_chains = autil.find_all_chains(files, pixel) pixel_chains.sort() # filenames if model_id == '911': pixel_chains = pixel_chains[-4:] print(pixel_chains) cbf_pixel = rwb.read_cbf_file( cur_dir + cbf_dir + pixel_chains[0].partition('_MCMC')[0] + '_' + pixel + '.cbf') cbr_chain_list = [] for pixel_chain in pixel_chains: print(pixel_chain) cbr_chain = rwb.read_cbr_file(pixel_chain, {'nopars': len(parnames)}) cbr_pixel = np.copy(cbr_chain) if pixel_chains.index( pixel_chain) == 0 else np.concatenate( (cbr_pixel, cbr_chain), axis=0) flux_chain = rwb.readbinarymat( cur_dir + output_dir + 'fluxfile_' + pixel_chain[:-3] + 'bin', [ cbf_pixel['nodays'], autil.get_nofluxes_nopools_lma(model_id)[0] ]) pool_chain = rwb.readbinarymat( cur_dir + output_dir + 'poolfile_' + pixel_chain[:-3] + 'bin', [ cbf_pixel['nodays'] + 1, autil.get_nofluxes_nopools_lma(model_id)[1] ]) flux_pixel = np.copy(flux_chain) if pixel_chains.index( pixel_chain) == 0 else np.concatenate( (flux_pixel, flux_chain), axis=0) pool_pixel = np.copy(pool_chain) if pixel_chains.index( pixel_chain) == 0 else np.concatenate( (pool_pixel, pool_chain), axis=0) cbr_chain_list.append(cbr_chain) print(np.shape(cbr_chain)) print(np.shape(cbr_pixel)) gr = autil.gelman_rubin(cbr_chain_list) print('%i of %i parameters converged' % (sum(gr < 1.2), len(parnames))) conv[pixels.index(pixel), comb_count] = sum(gr < 1.2) / len(parnames) * 100 for var in vrs: print(var) try: obs = cbf_pixel['OBS'][var] obs[obs == -9999] = float('nan') except: obs = np.ones(cbf_pixel['nodays']) * np.nan n_obs = np.sum(np.isfinite(obs)) fwd_data = autil.get_output( var, model_id, flux_pixel, pool_pixel, cbr_pixel, autil.get_nofluxes_nopools_lma(model_id)[2]) if len(fwd_data) > 0: if fwd_data.shape[1] > cbf_pixel['nodays']: fwd_data = fwd_data[:, :-1] fwd_data = autil.remove_outliers(fwd_data) med = np.nanmedian(fwd_data, axis=0) ub = np.nanpercentile(fwd_data, 75, axis=0) lb = np.nanpercentile(fwd_data, 25, axis=0) ens_spread[pixels.index(pixel), vrs.index(var), comb_count] = np.nanmean( abs(ub - lb)) if metric == 'spread' else np.sqrt( np.nansum((med - obs)**2) / n_obs) comb_count += 1 for var in vrs: autil.plot_spread_v_iter( ens_spread, pixels, vrs.index(var), var, it, metric, cur_dir + plot_dir + 'spread_v_iter', 'iter_test_compare_' + assim_type + '_' + model_id + '_' + var + '_' + metric, single_val=True ) #'iter_test_MCMC'+mcmc_id+'_'+model_id+'_'+var + '_' + metric) autil.plot_conv_v_iter(conv, pixels, it, cur_dir + plot_dir + 'spread_v_iter', 'iter_test_compare' + assim_type + '_' + model_id + '_conv', single_val=True) return
def main(): # set run information to read model_id = sys.argv[1] mcmc_id = sys.argv[2] # 119 for normal, 3 for DEMCMC n_iter = sys.argv[3] ens_size = 500 assim_type = '_longadapted' # set directories cur_dir = os.getcwd() + '/' misc_dir = cur_dir + '/../../misc/' cbf_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/' cbr_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id + '/' plot_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/plots/' parnames = autil.get_parnames('../../misc/', model_id) # load map containing the location of each mstmip pixel on the GEOSCHEM grid pixel_nums = np.load(misc_dir + 'mstmip_pixel_nums.npy') # load map of biome fractions from mstmip with np.load(misc_dir + 'mstmip_biome_frac.npz') as data: biome_frac = data['arr_0'] n_classes = biome_frac.shape[0] # load list of land pixels pixels = list(set([file[-8:-4] for file in glob.glob(cbf_dir + '*.cbf')])) # load list of cbrs files = glob.glob(cbr_dir + '*MCMC' + mcmc_id + '_' + n_iter + '_*.cbr') # fill X and Y n_regr_models = len(parnames) X = np.ones( (len(pixels), n_classes)) * np.nan # shape n_samples, n_features y = np.ones( (n_regr_models, len(pixels))) * np.nan # shape n_pars, n_samples for pixel in pixels: ind = pixels.index(pixel) if np.mod(ind, 10) == 0: print(ind) # get lc information locs = [pixel_nums == float(pixel)][0] fracs_at_geos_pixel = no_water_pixels(biome_frac[:, locs]) av_fracs = np.nanmean( fracs_at_geos_pixel, axis=1 ) # average biome fraction across mstmip pixels within coarse pixel X[ind, :] = av_fracs # get parameter information pixel_chains = autil.find_all_chains(files, pixel) pixel_chains.sort() # filenames # concatenate across chains if len(pixel_chains) > 0: for pixel_chain in pixel_chains: cbr_chain = rwb.read_cbr_file(pixel_chain, {'nopars': len(parnames)}) cbr_pixel = np.copy(cbr_chain) if pixel_chains.index( pixel_chain) == 0 else np.concatenate( (cbr_pixel, cbr_chain), axis=0) y[:, ind] = np.nanmedian(cbr_pixel, axis=0) # remove nan values so regression runs Xr, yr = drop_nan(X, y) # set up regression models y_test_all_pars, y_pred_all_pars = [], [] for regr_model in range(n_regr_models): print('running regression for ' + parnames[regr_model] + ' . . . ') # split train and test sets, 60-40 X_train, X_test, y_train, y_test = train_test_split(Xr, yr[regr_model, :], test_size=0.4) y_test_all_pars.append(y_test) # fit regression model on train regr = LinearRegression().fit(X_train, y_train) # make predictions on test set y_pred_all_pars.append(regr.predict(X_test)) # make summary scatter plot plot_scatter_test_pred( y_test_all_pars, y_pred_all_pars, parnames, plot_dir + 'lc_scat/', 'par_preds_' + model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type) return
def main(): model_id = sys.argv[1] run_type = sys.argv[2] # ALL or SUBSET mcmc_id = sys.argv[3] # 119 for normal, 3 for DEMCMC n_iter = sys.argv[4] var_to_plot = sys.argv[5] # GR, a flux or pool, or PARXX ens_size = 500 assim_type = '_longadapted' cur_dir = os.getcwd() + '/' if 'scripts' not in cur_dir: cur_dir = cur_dir + 'scripts/' cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf'+assim_type+'/' + model_id + '/' cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr'+assim_type+'/' + model_id + '/' output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output'+assim_type+'/' + model_id + '/' plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/' parnames = autil.get_parnames(cur_dir + '../../misc/', model_id) os.chdir(cbr_dir) files = glob.glob('*MCMC'+mcmc_id+'_'+n_iter+'_*.cbr') pixel = sys.argv[6] print(pixel) pixel_chains = autil.find_all_chains(files, pixel) pixel_chains.sort() # filenames print(pixel_chains) cbf_pixel = rwb.read_cbf_file(cur_dir + cbf_dir + pixel_chains[0].partition('_MCMC')[0]+'_'+pixel+'.cbf') cbr_chain_list = [] for pixel_chain in pixel_chains: print(pixel_chain) cbr_chain = rwb.read_cbr_file(pixel_chain, {'nopars': len(parnames)}) cbr_pixel = np.copy(cbr_chain) if pixel_chains.index(pixel_chain)==0 else np.concatenate((cbr_pixel, cbr_chain), axis=0) flux_chain = rwb.readbinarymat(cur_dir + output_dir + 'fluxfile_' + pixel_chain[:-3]+'bin', [cbf_pixel['nodays'], autil.get_nofluxes_nopools_lma(model_id)[0]]) pool_chain = rwb.readbinarymat(cur_dir + output_dir + 'poolfile_' + pixel_chain[:-3]+'bin', [cbf_pixel['nodays']+1, autil.get_nofluxes_nopools_lma(model_id)[1]]) flux_pixel = np.copy(flux_chain) if pixel_chains.index(pixel_chain)==0 else np.concatenate((flux_pixel, flux_chain), axis=0) pool_pixel = np.copy(pool_chain) if pixel_chains.index(pixel_chain)==0 else np.concatenate((pool_pixel, pool_chain), axis=0) if np.shape(cbr_chain)[0]==ens_size: cbr_chain_list.append(cbr_chain) print(np.shape(cbr_chain)) ### COMPUTE GELMAN RUBIN if len(cbr_chain_list)>1: gr = autil.gelman_rubin(cbr_chain_list) gr_pixel = sum(gr<1.2)/len(parnames) else: gr_pixel = -9999. ### DETERMINE DATA TO WRITE TO FILE if var_to_plot == 'GR': data = np.copy(gr_pixel) elif 'PAR' in var_to_plot: parnum = int(var_to_plot.partition('PAR')[-1]) if gr_pixel>0.9: data = np.nanmedian(cbr_pixel[:,parnum-1]) else: data = -9999. else: if gr_pixel>0.9: data = np.nanmean(np.nanmedian(autil.get_output(var_to_plot, model_id, flux_pixel, pool_pixel, cbr_pixel, autil.get_nofluxes_nopools_lma(model_id)[2]), axis=0)) else: data = -9999. with open(cur_dir + '../../misc/' + model_id + '_' + pixel_chains[0].partition('_MCMC')[0] + '_MCMC' + mcmc_id + '_' + n_iter + '_' + var_to_plot + '.csv','a') as f: writer = csv.writer(f) new_row = [pixel, data] assert len(new_row)==2 writer.writerow(new_row) return
def main(): model_id = sys.argv[1] mcmc_id = sys.argv[2] # 119 for normal, 3 for DEMCMC n_iter = sys.argv[3] ens_size = 500 assim_type = '_p25adapted' # EF comparison ef_spec = 'clipped_PLS_soilgrids_poolobs_rescaled_forward' # directories cur_dir = os.getcwd() + '/' cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/' cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id + '/' cbr_ef_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '_ef/' + model_id + '/' output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model_id + '/' output_ef_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '_ef/' + model_id + '/' plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/' parnames = autil.get_parnames('../../misc/', model_id) # get cbfs to run through os.chdir(cbf_dir) cbf_files = glob.glob('*.cbf') cbf_files.sort() os.chdir(cur_dir + '/../') opt_preds = np.zeros((len(cbf_files), len(parnames))) * np.nan ef_preds = np.zeros((len(cbf_files), len(parnames))) * np.nan for cbf_file in cbf_files: pixel = cbf_file[-8:-4] print(pixel) pixel_chains_opt = autil.find_all_chains( glob.glob(cbr_dir + '*_MCMC' + mcmc_id + '_' + n_iter + '_' + pixel + '*.cbr'), pixel) pixel_chains_opt.sort() # filenames pixel_chains_ef = autil.find_all_chains( glob.glob(cbr_ef_dir + '*_MCMC' + mcmc_id + '_' + n_iter + '_' + ef_spec + '_' + pixel + '.cbr'), pixel) pixel_chains_ef.sort() for pc_opt in pixel_chains_opt: cbr_chain_opt = rwb.read_cbr_file(pc_opt, {'nopars': len(parnames)}) cbr_chain_opt = autil.modulus_Bday_Fday(cbr_chain_opt, parnames) cbr_pixel_opt = np.copy(cbr_chain_opt) if pixel_chains_opt.index( pc_opt) == 0 else np.concatenate( (cbr_pixel_opt, cbr_chain_opt), axis=0) for pc_ef in pixel_chains_ef: cbr_chain_ef = rwb.read_cbr_file(pc_ef, {'nopars': len(parnames)}) cbr_chain_ef = autil.modulus_Bday_Fday(cbr_chain_ef, parnames) cbr_pixel_ef = np.copy(cbr_chain_ef) if pixel_chains_ef.index( pc_ef) == 0 else np.concatenate( (cbr_pixel_ef, cbr_chain_ef), axis=0) opt_preds[cbf_files.index(cbf_file), :] = np.nanmedian(cbr_pixel_opt, axis=0) ef_preds[cbf_files.index(cbf_file), :] = np.nanmedian(cbr_pixel_ef, axis=0) plot_scatter_compare(ef_preds, opt_preds, parnames, plot_dir + 'scatters/', model_id + '_MCMC' + mcmc_id + '_' + n_iter) return
def main(): model_id_start = sys.argv[1] run_type = sys.argv[2] # ALL or SUBSET metric = sys.argv[3] # spread or RMSE assim_type = '_p25adapted' compare_between = sys.argv[4] # MCMCID or MODEL or NBEUNC n_iters = [ ['40000000'], ['40000000'] ] #['500000','1000000','2500000','5000000','10000000'],['40000000']]#[['100000', '250000', '500000', '1000000', '1750000', '2500000', '5000000'], ['100000', '250000', '500000', '1000000', '5000000', '10000000', '25000000','50000000']] vrs = [ 'NBE', 'cumNBE', 'LAI', 'GPP', 'Reco', 'Rauto', 'Rhet', 'lit', 'root', 'som', 'wood' ] pixels = [ '3809', '3524', '2224', '4170', '1945', '3813', '4054', '3264', '1271', '3457' ] cur_dir = os.getcwd() + '/' cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id_start + '/' cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id_start + '/' output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model_id_start + '/' plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/' parnames = autil.get_parnames('../../misc/', model_id_start) if compare_between == 'MCMCID': comps = ['3', '119'] elif compare_between == 'MODEL': comps = [model_id_start, '911'] mcmc_id = '119' elif compare_between == 'NBEUNC': comps = [assim_type, '_p25adapted_NBEuncreduced'] mcmc_id = '119' ens_spread = [ np.ones((len(pixels), len(vrs), len(n_iters[0]))) * float('nan'), np.ones((len(pixels), len(vrs), len(n_iters[1]))) * float('nan') ] conv = [ np.ones((len(pixels), len(n_iters[0]))) * float('nan'), np.ones((len(pixels), len(n_iters[1]))) * float('nan') ] for pixel in pixels: for comp in comps: if compare_between == 'MCMCID': mcmc_id = comp elif compare_between == 'MODEL': model_id_start = comp cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + comp + '/' cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + comp + '/' output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + comp + '/' plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/' parnames = autil.get_parnames(cur_dir + '../../misc/', comp) elif compare_between == 'NBEUNC': assim_type = comp cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + comp + '/' + model_id_start + '/' cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + comp + '/' + model_id_start + '/' output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output' + comp + '/' + model_id_start + '/' plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/' parnames = autil.get_parnames(cur_dir + '../../misc/', model_id_start) os.chdir(cur_dir + cbr_dir) for it in n_iters[comps.index(comp)]: files = glob.glob('*MCMC' + mcmc_id + '_' + it + '_' + pixel + '*.cbr') pixel_chains = autil.find_all_chains(files, pixel) pixel_chains.sort() # filenames #if ((comp=='911') & (pixel_chains[0][-5]=='1')): pixel_chains.pop(0) #if ((comp=='911') & (pixel_chains[0][-5]=='2')): pixel_chains.pop(0) print(pixel_chains) cbf_pixel = rwb.read_cbf_file( cur_dir + cbf_dir + pixel_chains[0].partition('_MCMC')[0] + '_' + pixel + '.cbf') cbr_chain_list = [] for pixel_chain in pixel_chains[:4]: print(pixel_chain) cbr_chain = rwb.read_cbr_file(pixel_chain, {'nopars': len(parnames)}) cbr_pixel = np.copy(cbr_chain) if pixel_chains.index( pixel_chain) == 0 else np.concatenate( (cbr_pixel, cbr_chain), axis=0) flux_chain = rwb.readbinarymat( cur_dir + output_dir + 'fluxfile_' + pixel_chain[:-3] + 'bin', [ cbf_pixel['nodays'], autil.get_nofluxes_nopools_lma(model_id_start)[0] ]) pool_chain = rwb.readbinarymat( cur_dir + output_dir + 'poolfile_' + pixel_chain[:-3] + 'bin', [ cbf_pixel['nodays'] + 1, autil.get_nofluxes_nopools_lma(model_id_start)[1] ]) flux_pixel = np.copy(flux_chain) if pixel_chains.index( pixel_chain) == 0 else np.concatenate( (flux_pixel, flux_chain), axis=0) pool_pixel = np.copy(pool_chain) if pixel_chains.index( pixel_chain) == 0 else np.concatenate( (pool_pixel, pool_chain), axis=0) cbr_chain_list.append(cbr_chain) print(np.shape(cbr_chain)) print(np.shape(cbr_pixel)) gr = autil.gelman_rubin(cbr_chain_list) print('%i of %i parameters converged' % (sum(gr < 1.2), len(parnames))) conv[comps.index( comp)][pixels.index(pixel), n_iters[comps.index(comp)].index(it)] = sum( gr < 1.2) / len(parnames) * 100 for var in vrs: print(var) try: obs = cbf_pixel['OBS'][var] obs[obs == -9999] = float('nan') except: obs = np.ones(cbf_pixel['nodays']) * np.nan n_obs = np.sum(np.isfinite(obs)) fwd_data = autil.get_output( var, model_id_start, flux_pixel, pool_pixel, cbr_pixel, autil.get_nofluxes_nopools_lma(model_id_start)[2]) if len(fwd_data) > 0: if fwd_data.shape[1] > cbf_pixel['nodays']: fwd_data = fwd_data[:, :-1] fwd_data = autil.remove_outliers(fwd_data) med = np.nanmedian(fwd_data, axis=0) ub = np.nanpercentile(fwd_data, 75, axis=0) lb = np.nanpercentile(fwd_data, 25, axis=0) ens_spread[comps.index(comp)][ pixels.index(pixel), vrs.index(var), n_iters[comps.index(comp)].index(it)] = np.nanmean( abs(ub - lb)) if metric == 'spread' else np.sqrt( np.nansum((med - obs)**2) / n_obs) print(ens_spread[comps.index(comp)] [pixels.index(pixel), vrs.index(var), n_iters[comps.index(comp)].index(it)]) print(ens_spread) for var in vrs: autil.plot_spread_v_iter( ens_spread, pixels, vrs.index(var), var, n_iters, metric, cur_dir + plot_dir + 'spread_v_iter', 'iter_test' + assim_type + '_' + compare_between + '_' + model_id_start + '_' + var + '_' + metric, single_val=True ) #'iter_test_MCMC'+mcmc_id+'_'+model_id_start+'_'+var + '_' + metric) autil.plot_conv_v_iter(conv, pixels, n_iters, cur_dir + plot_dir + 'spread_v_iter', 'iter_test' + assim_type + '_' + compare_between + '_' + model_id_start + '_conv', single_val=True) return
def main(): model_id = sys.argv[1] run_type = sys.argv[2] # ALL or SUBSET mcmc_id = sys.argv[3] # 119 for normal, 3 for DEMCMC n_iter = sys.argv[4] ens_size = 500 assim_type = '_p25adapted' use_bestchains_pkl = False cur_dir = os.getcwd() + '/' cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf'+assim_type+'_ef_ic/' + model_id + '/' cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr'+assim_type+'_ef/' + model_id + '/' output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output'+assim_type+'_ef/' + model_id + '/' plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/' parnames = autil.get_parnames('../../misc/', model_id) # load list of land pixels pixels = list(set([file[-8:-4] for file in glob.glob(cbf_dir + '*.cbf')])) if run_type=='ALL' else ['3809','3524','2224','4170','1945','3813','4054','3264','1271','3457'] pixels.sort() # load list of cbrs cbr_files = glob.glob(cbr_dir+'*MCMC'+mcmc_id+'_'+n_iter+'_*PLS*forward*.cbr') # for loop over pixels gr_pixels = np.zeros(len(pixels))*np.nan # list of GR for each pixel, for mapping par_pixels = np.zeros((len(pixels), len(parnames)))*np.nan for pixel in pixels: print(pixel, pixels.index(pixel)) pixel_chains = autil.find_all_chains(cbr_files, pixel) pixel_chains.sort() # filenames if use_bestchains_pkl: conv_chains_pkl = read_pickle(glob.glob(cbr_dir + model_id + assim_type + '*_MCMC'+mcmc_id + '_'+n_iter+'_best_subset.pkl')[0]) conv_chains_pkl.columns = ['pixel','bestchains','conv'] #rename columns for easier access if pixel in conv_chains_pkl['pixel'].values: bestchains = conv_chains_pkl.loc[conv_chains_pkl['pixel']==pixel]['bestchains'].values[0][1:] print(bestchains) pixel_chains = [pixel_chain for pixel_chain in pixel_chains if pixel_chain.partition(pixel+'_')[-1][:-4] in bestchains] else: continue #cbf_pixel = rwb.read_cbf_file(cur_dir + cbf_dir + pixel_chains[0].partition('_MCMC')[0]+'_'+pixel+'.cbf') cbf_filename = glob.glob(cur_dir + cbf_dir + '*'+pixel+'.cbf')[0] cbf_pixel = rwb.read_cbf_file(cbf_filename) cbr_chain_list = [] for pixel_chain in pixel_chains: print(pixel_chain) cbr_chain = rwb.read_cbr_file(pixel_chain, {'nopars': len(parnames)}) cbr_chain = autil.modulus_Bday_Fday(cbr_chain, parnames) cbr_pixel = np.copy(cbr_chain) if pixel_chains.index(pixel_chain)==0 else np.concatenate((cbr_pixel, cbr_chain), axis=0) #autil.plot_par_histograms(cbr_chain, parnames=parnames, savepath=cur_dir+plot_dir+'dists/', title=model_id+'_'+pixel_chain[:-3]+'png') try: flux_chain = rwb.readbinarymat(cur_dir + output_dir + 'fluxfile_' + pixel_chain.partition(cbr_dir)[-1][:-3]+'bin', [cbf_pixel['nodays'], autil.get_nofluxes_nopools_lma(model_id)[0]]) pool_chain = rwb.readbinarymat(cur_dir + output_dir + 'poolfile_' + pixel_chain.partition(cbr_dir)[-1][:-3]+'bin', [cbf_pixel['nodays']+1, autil.get_nofluxes_nopools_lma(model_id)[1]]) #autil.plot_flux_pool_timeseries(cbf_pixel, cbr_chain, flux_chain, pool_chain, autil.get_nofluxes_nopools_lma(model_id)[2], savepath=cur_dir+plot_dir+'timeseries/', title=model_id+'_'+pixel_chain[:-3]+'png') flux_pixel = np.copy(flux_chain) if pixel_chains.index(pixel_chain)==0 else np.concatenate((flux_pixel, flux_chain), axis=0) pool_pixel = np.copy(pool_chain) if pixel_chains.index(pixel_chain)==0 else np.concatenate((pool_pixel, pool_chain), axis=0) except Exception as e: pass if np.shape(cbr_chain)[0]==ens_size: cbr_chain_list.append(cbr_chain) #print(np.shape(cbr_chain)) if len(cbr_chain_list)>1: gr = autil.gelman_rubin(cbr_chain_list) #print(gr) print('%i of %i parameters converged' % (sum(gr<1.2), len(parnames))) gr_pixels[pixels.index(pixel)] = sum(gr<1.2)/len(parnames) else: gr = np.nan par_pixels[pixels.index(pixel),:] = np.nanmedian(cbr_pixel, axis=0) #autil.plot_par_histograms(cbr_pixel, parnames=parnames, savepath=cur_dir+plot_dir+'dists/', title=model_id+assim_type+'_MCMC'+mcmc_id+'_'+cbf_filename.partition(cbf_dir)[-1][:-4]+'.png') #autil.plot_flux_pool_timeseries(cbf_pixel, cbr_pixel, flux_pixel, pool_pixel, autil.get_nofluxes_nopools_lma(model_id)[2], savepath=cur_dir+plot_dir+'timeseries/', title=model_id+assim_type+'_MCMC'+mcmc_id+'_'+cbf_filename.partition(cbf_dir)[-1][:-4]+'.png') #vmax = [None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,210,200,215,6600,195,24000,None,None,None,900,None,None,None,None,None,None,None] #np.nanpercentile(par_pixels[:,par], 90) for par in range(len(parnames)): autil.plot_map(nrows=46, ncols=73, land_pixel_list=[file[-8:-4] for file in glob.glob(cur_dir + cbf_dir + '*.cbf')], pixel_value_list=pixels, value_list=par_pixels[:,par], vmax=np.nanpercentile(par_pixels[:,par], 90), savepath=cur_dir+plot_dir+'maps/', savename='par'+str(par)+'_' + model_id +assim_type+ '_MCMC' + mcmc_id +'_'+ n_iter+'_EF_clipped_PLS_soilgrids_poolobs_rescaled_forward') #autil.plot_map(nrows=46, ncols=73, land_pixel_list=[file[-8:-4] for file in glob.glob(cur_dir + cbf_dir + '*.cbf')], pixel_value_list=pixels, value_list=np.ones(len(pixels)), savepath=cur_dir+plot_dir+'maps/', title='test_pixels.png') #autil.plot_map(nrows=46, ncols=73, land_pixel_list=[file[-8:-4] for file in glob.glob(cur_dir + cbf_dir + '*.cbf')], pixel_value_list=pixels, value_list=gr_pixels*100, savepath=cur_dir+plot_dir+'maps/', savename='gr_' + model_id + assim_type+ '_' +run_type+ '_MCMC' + mcmc_id + '_' + n_iter) return