def edit_101(infile, outfile, ID, inds_shift, inds_del): cbf = rwb.read_cbf_file(infile) cbf['ID'] = ID cbf['MET'] = cbf['MET'][:, :-2] '''cbf['MET'][:,6] = 0 cbf['OTHER_OBS']['MFire']['mean'] = -9999 cbf['OTHER_OBS']['MFire']['unc'] = -9999''' count_real = 0 for parprior, parpriorunc in zip(cbf['PARPRIORS'], cbf['PARPRIORUNC']): if parprior == -9999: continue else: ind_in = np.where(cbf['PARPRIORS'] == parprior)[0][0] cbf['PARPRIORS'][inds_shift[count_real]] = parprior cbf['PARPRIORUNC'][inds_shift[count_real]] = parpriorunc if ind_in != inds_shift[count_real]: cbf['PARPRIORS'][ind_in] = -9999 cbf['PARPRIORUNC'][ind_in] = -9999 count_real += 1 for ind in inds_del: cbf['PARPRIORS'][ind] = -9999 cbf['PARPRIORUNC'][ind] = -9999 rwb.CARDAMOM_WRITE_BINARY_FILEFORMAT(cbf, outfile) return
def delete_model_parpriors(infile, outfile, ind_out): # read cbf file using readwritebinary and edit parpriors, parpriorunc # inds is a list cbf = rwb.read_cbf_file(infile) for ind in ind_out: cbf['PARPRIORS'][ind_out] = -9999 cbf['PARPRIORUNC'][ind_out] = -9999 rwb.CARDAMOM_WRITE_BINARY_FILEFORMAT(cbf, outfile) return
def edit_model_fire(infile, outfile): # remove columns from met for certain models cbf = rwb.read_cbf_file(infile) print(cbf['MET'].shape) cbf['MET'][:, 6] = -9999 cbf['OTHER_OBS']['MFire']['mean'] = -9999 cbf['OTHER_OBS']['MFire']['unc'] = -9999 rwb.CARDAMOM_WRITE_BINARY_FILEFORMAT(cbf, outfile) return
def shift_model_parpriors(infile, outfile, inds_out): # read cbf file using readwritebinary and edit parpriors, parpriorunc # inds is a list cbf = rwb.read_cbf_file(infile) count_real = 0 for parprior, parpriorunc in zip(cbf['PARPRIORS'], cbf['PARPRIORUNC']): if parprior == -9999: continue else: ind_in = np.where(cbf['PARPRIORS'] == parprior)[0][0] cbf['PARPRIORS'][inds_out[count_real]] = parprior cbf['PARPRIORUNC'][inds_out[count_real]] = parpriorunc if ind_in != inds_out[count_real]: cbf['PARPRIORS'][ind_in] = -9999 cbf['PARPRIORUNC'][ind_in] = -9999 count_real += 1 rwb.CARDAMOM_WRITE_BINARY_FILEFORMAT(cbf, outfile) return
def main(): # set run information to read model_id = sys.argv[1] mcmc_id = sys.argv[2] # 119 for normal, 3 for DEMCMC n_iter = sys.argv[3] nbe_optimization = sys.argv[4] # OFF OR ON ens_size = 500 assim_type = '_p25adapted' suffix = '_clipped_' if mcmc_id == '119': frac_save_out = str(int(int(n_iter) / 500)) n_chains_agg = 4 elif mcmc_id == '3': frac_save_out = str(int( int(n_iter) / 500 * 100)) # n_iterations/ frac_save_out * 100 will be ensemble size n_chains_agg = 2 # set directories cur_dir = os.getcwd() + '/' misc_dir = cur_dir + '/../../misc/' cbf_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/' cbr_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id + '/' cbr_ef_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '_ef/' + model_id + '/' plot_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/plots/' parnames = autil.get_parnames('../../misc/', model_id) # choose which features to use include_soilgrids = True include_poolobs = True include_gl_fracs = False # choose which model formulation to use train_full_ensemble = False rescale = True include_interactions = False include_squares = False include_all_polys = False do_feature_selection = False do_PLS = True n_features_select = int(sys.argv[5]) write_to_csv = False # choose which tasks to run opt_feature_select = True submit_ic_opt = True submit_forward = False ############################################################################################################################################ ############################# develop and train EF models ################################################################################### # load list of land pixels pixels = list(set([file[-8:-4] for file in glob.glob(cbf_dir + '*.cbf')])) pixels.sort() # load list of cbrs cbr_files = glob.glob(cbr_dir + '*MCMC' + mcmc_id + '_' + n_iter + '_*.cbr') # load bestchains for cbr_files conv_chains = read_pickle(cbr_dir + model_id + assim_type + '_ALL' + '_MCMC' + mcmc_id + '_' + n_iter + '_best_subset.pkl') conv_chains.columns = ['pixel', 'bestchains', 'conv'] #rename columns for easier access ic_inds = autil.get_inds_ic( model_id) # get indices of initial condition parameters # load globcover csv for land cover regression comparison gl_fracs = read_csv(misc_dir + 'globcover_fracs.csv', header=0) n_features_gl = len(gl_fracs.columns) - 1 suffix_gl = 'gl_' # get number of predictors n_features = ( rwb.read_cbf_file(glob.glob(cbf_dir + '*.cbf')[0])['nomet'] - 3 ) * 2 # remove 3 corresponding to day number and CO2, multiply by 2 (mean and sd) if do_PLS: suffix += 'PLS_' if include_soilgrids: soilgrids = read_csv('../../misc/soilgrids_defined_pixels_manual.csv', header=0) n_soilgrids = len(soilgrids.columns) - 1 n_features += n_soilgrids suffix += 'soilgrids_' if include_poolobs: n_poolobs = 4 n_features += n_poolobs suffix += 'poolobs_' if include_gl_fracs: n_features += n_features_gl suffix += suffix_gl # fill X and Y n_regr_models = len(parnames) X = np.ones( (len(pixels), n_features)) * np.nan # shape n_samples, n_features y = np.ones( (n_regr_models, len(pixels))) * np.nan # shape n_pars, n_samples y_full_ens = np.ones((ens_size, n_regr_models, len(pixels))) * np.nan # shape n_pars, n_samples X_gl = np.ones((len(pixels), n_features_gl)) * np.nan y_gl = np.ones((n_regr_models, len(pixels))) * np.nan for pixel in pixels: if (len( glob.glob(cbr_dir + '*MCMC' + mcmc_id + '_' + n_iter + '_' + pixel + '*.cbr')) > 0) & (pixel in conv_chains['pixel'].values): if conv_chains.loc[conv_chains['pixel'] == pixel]['conv'].values[0] == 0: continue else: ind = pixels.index(pixel) print(pixel) # get met cbf_file = glob.glob(cbf_dir + '*' + pixel + '.cbf')[0] met = rwb.read_cbf_file(cbf_file)['MET'] met = met[:, [1, 2, 3, 6, 7, 8]] # don't use index 0, 5 (day numbers) or 4 (Co2) X_end = met.shape[1] * 2 X[ind, :X_end] = np.concatenate( (np.nanmean(met, axis=0), np.nanstd(met, axis=0))) #X[ind,:met.shape[1]*12] = fill_X_met_12mo(X[ind,:met.shape[1]*12], met)#np.nanmean(met, axis=0) # append to X if include_soil_canopy_vars if include_soilgrids: if (int(pixel) in soilgrids['pixel'].values): X[ind, X_end:(X_end + n_soilgrids)] = soilgrids[ soilgrids['pixel'] == int(pixel)].values[0][1:] X_end = X_end + n_soilgrids if include_poolobs: lai, agb, som = rwb.read_cbf_file( cbf_file)['OBS']['LAI'], rwb.read_cbf_file( cbf_file)['OBS']['ABGB'], rwb.read_cbf_file( cbf_file)['OBS']['SOM'] if (len(lai) > 0) & (len(agb) > 0) & (len(som) > 0): X[ind, X_end:(X_end + n_poolobs)] = np.array([ np.nanmean(lai[lai > 0]), np.nanstd(lai[lai > 0]), np.nanmean(agb[agb > 0]), np.nanmean(som[som > 0]) ]) X_end = X_end + n_poolobs if include_gl_fracs: if (int(pixel) in gl_fracs['pixel'].values): X[ind, X_end:(X_end + n_features_gl)] = gl_fracs.loc[ gl_fracs['pixel'] == int(pixel)].values[0][1:] X_end = X_end + n_features_gl # fill globcover X if int(pixel) in gl_fracs['pixel'].values: X_gl[ind, :] = gl_fracs.loc[gl_fracs['pixel'] == int( pixel)].values[0][1:] # get parameter information # get pixel's convergent chain numbers best_chains = conv_chains.loc[ conv_chains['pixel'] == pixel]['bestchains'].values[0][1:] print(best_chains) # aggregate bestchains from optimal posteriors cbr_data = [] for chain in best_chains: file = [ i for i in cbr_files if pixel + '_' + chain + '.cbr' in i ][0] cbr_data.append( autil.modulus_Bday_Fday( rwb.read_cbr_file(file, {'nopars': len(parnames)}), parnames)) #cbr_data.append(rwb.read_cbr_file(file, {'nopars': len(parnames)})) cbr_data = np.vstack(cbr_data) y[:, ind] = np.nanmedian(cbr_data, axis=0) y_gl[:, ind] = np.nanmedian(cbr_data, axis=0) indices = np.random.choice( cbr_data.shape[0], ens_size, replace=False) # only take a subset of cbr rows y_full_ens[:, :, ind] = cbr_data[ indices, :] #reshape_cbr(cbr_data, ens_size*n_chains_agg) if not train_full_ensemble: f_bic = open( misc_dir + 'env_filter_manual/fs/bic_fs' + suffix.partition('fs')[0] + model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', 'a') w_bic = csv.writer(f_bic) # EF regressions reg_test_preds_list, card_test_preds_list, reg_train_preds_list, card_train_preds_list, pixels_r, suffix, k = run_regressions( X, y, pixels, rescale, include_interactions, include_squares, include_all_polys, do_feature_selection, do_PLS, write_to_csv, w_bic, n_features_select, suffix, ens_size, n_regr_models, n_features) f_bic.close() # globcover comparison '''gl_reg_test_preds_list, gl_card_test_preds_list, gl_reg_train_preds_list, gl_card_train_preds_list, gl_pixels_r, gl_suffix, gl_k = run_regressions(X_gl, y_gl, pixels, rescale, False, False, False, False, False, False, w_bic, n_features_select, suffix_gl, ens_size, n_regr_models, n_features_gl)''' else: suffix += 'full_ens_' icount = 0 for i in sample(range(y_full_ens.shape[0]), 100): print(icount) rtest, ctest, rtrain, ctrain, pixels_r, suffix, k = run_regressions( X, y_full_ens[i, :, :], pixels, rescale, include_interactions, include_squares, include_all_polys, do_feature_selection, n_features_select, suffix, ens_size, n_regr_models, n_features) reg_test_preds_list = [np.nanmedian( ri, axis=0) for ri in rtest] if icount == 0 else [ np.vstack((np.nanmedian(ri, axis=0), rfull)) for ri, rfull in zip(rtest, reg_test_preds_list) ] card_test_preds_list = np.copy(ctest) if icount == 0 else [ np.vstack((ci, cfull)) for ci, cfull in zip(ctest, card_test_preds_list) ] reg_train_preds_list = [np.nanmedian( ri, axis=0) for ri in rtrain] if icount == 0 else [ np.vstack((np.nanmedian(ri, axis=0), rfull)) for ri, rfull in zip(rtrain, reg_train_preds_list) ] card_train_preds_list = np.copy(ctrain) if icount == 0 else [ np.vstack((ci, cfull)) for ci, cfull in zip(ctrain, card_train_preds_list) ] icount += 1 # fill csv f_test = open( misc_dir + 'env_filter_manual/fs/fs_test' + suffix.partition('fs')[0] + model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', 'a') wr_test = csv.writer(f_test) f_train = open( misc_dir + 'env_filter_manual/fs/fs_train' + suffix.partition('fs')[0] + model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', 'a') wr_train = csv.writer(f_train) f_test_preds = open( misc_dir + 'env_filter_manual/par_preds/par_preds_test' + suffix + model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', 'a') wr_test_preds = csv.writer(f_test_preds) f_train_preds = open( misc_dir + 'env_filter_manual/par_preds/par_preds_train' + suffix + model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', 'a') wr_train_preds = csv.writer(f_train_preds) print('TEST:') #plot_scatter_test_pred(card_test_preds_list, reg_test_preds_list, k, pixels_r, parnames, wr_test, wr_test_preds, plot_dir+'env_filter/', 'par_preds_test'+suffix+model_id+'_MCMC'+mcmc_id+'_'+n_iter+assim_type, train_full_ensemble, write_to_csv) #plot_scatter_test_pred(gl_card_test_preds_list, gl_reg_test_preds_list, gl_k, gl_pixels_r, parnames, wr_test, wr_test_preds, plot_dir+'env_filter/', 'par_preds_test'+gl_suffix+model_id+'_MCMC'+mcmc_id+'_'+n_iter+assim_type, train_full_ensemble, write_to_csv) print('. . . . . \n\nTRAIN:') #plot_scatter_test_pred(card_train_preds_list, reg_train_preds_list, k, pixels_r, parnames, wr_train, wr_train_preds, plot_dir+'env_filter/', 'par_preds_train'+suffix+model_id+'_MCMC'+mcmc_id+'_'+n_iter+assim_type, train_full_ensemble, write_to_csv) #plot_scatter_test_pred(gl_card_train_preds_list, gl_reg_train_preds_list, gl_k, gl_pixels_r, parnames, wr_train, wr_train_preds, plot_dir+'env_filter/', 'par_preds_train'+gl_suffix+model_id+'_MCMC'+mcmc_id+'_'+n_iter+assim_type, train_full_ensemble, write_to_csv) f_test.close() f_train.close() f_test_preds.close() f_train_preds.close() ############################################################################################################################################ ################################### find optimal number of features for each parameter ##################################################### if opt_feature_select: test_rmse = read_csv(misc_dir + 'env_filter_manual/fs/fs_test' + suffix.partition('fs')[0] + model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', header=None) test_rmse.columns = [ item for sublist in [['n_features_select'], parnames] for item in sublist ] test_rmse.sort_values('n_features_select') train_rmse = read_csv(misc_dir + 'env_filter_manual/fs/fs_train' + suffix.partition('fs')[0] + model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', header=None) train_rmse.columns = [ item for sublist in [['n_features_select'], parnames] for item in sublist ] train_rmse.sort_values('n_features_select') x = test_rmse['n_features_select'].values opt_fs = plot_train_test(x, train_rmse, test_rmse, parnames, savepath=plot_dir + 'train_test/', savename=model_id + '_MCMC' + mcmc_id + suffix.partition('fs')[0], norm=False) opt_fs = plot_train_test(x, train_rmse, test_rmse, parnames, savepath=plot_dir + 'train_test/', savename=model_id + '_MCMC' + mcmc_id + suffix.partition('fs')[0], norm=True) print(opt_fs) '''bic_data = read_csv(misc_dir +'env_filter_manual/fs/bic_fs_soilgrids_poolobs_'+model_id+'_MCMC'+mcmc_id+'_'+n_iter+assim_type + '.csv', header=None) bic_data.columns = [item for sublist in [['n_features_select'],parnames] for item in sublist] bic_data.columns.sort_values('n_features_select') x = bic_data['n_features_select'].values opt_fs = plot_train_test(x, bic_data, bic_data*np.nan, parnames, savepath=plot_dir+'train_test/', savename='bic_'+model_id+'_MCMC'+mcmc_id+suffix.partition('fs')[0]) print(opt_fs)''' ############################################################################################################################################ ################################### copy cbfs and substitute pars for IC optimization ###################################################### # set directories for CARDAMOM runs mdf_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_MDF/' if nbe_optimization == 'OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_MDF/' runmodel_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_GENERAL/' if nbe_optimization == 'OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_GENERAL/' cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/' cbf_ef_ic_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '_ef_ic/' + model_id + '/' cbr_ef_dir = '../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '_ef/' + model_id + '/' output_dir = '../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model_id + '/' output_ef_dir = '../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '_ef/' + model_id + '/' # select which pixels to submit os.chdir(cbf_dir) cbf_files = glob.glob('*.cbf') cbf_files.sort() os.chdir(cur_dir + '/../') if submit_ic_opt: txt_filename = 'ef_ic_assim_list_' + model_id + assim_type + '_MCMC' + mcmc_id + '_' + n_iter + '.txt' txt_file = open(txt_filename, 'w') for cbf_file in cbf_files: print(cbf_file) cbf_data = rwb.read_cbf_file(cbf_dir + cbf_file) cbf_pixel = cbf_file[-8:-4] if cbf_pixel in pixels_r: parpriors = np.concatenate( (retrieve_preds(cbf_pixel, opt_fs, suffix, misc_dir + 'env_filter_manual/par_preds/'), np.ones(50 - len(parnames)) * -9999.)) parpriorunc = np.concatenate( (np.ones(len(parnames)) * 1.001, np.ones(50 - len(parnames)) * -9999.)) # except ICs for ic_ind in ic_inds: parpriors[ic_ind] = -9999. parpriorunc[ic_ind] = -9999. # except NBE unc if nbe_optimization == 'ON': parpriors[len(parnames) - 1] = -9999. parpriorunc[len(parnames) - 1] = -9999. cbf_data['PARPRIORS'] = parpriors.reshape(-1, 1) cbf_data['PARPRIORUNC'] = parpriorunc.reshape(-1, 1) fp = cbf_file[:-9] + suffix.partition('fs')[0] + cbf_pixel fa = cbf_file[: -9] + '_MCMC' + mcmc_id + '_' + n_iter + suffix.partition( 'fs')[0] + 'assim_' + cbf_pixel rwb.CARDAMOM_WRITE_BINARY_FILEFORMAT( cbf_data, cbf_ef_ic_dir + fp + '.cbf') txt_file.write( '%sCARDAMOM_MDF.exe %s%s %s%s %s 0 %s 0.001 %s 1000' % (mdf_dir, cbf_ef_ic_dir[3:], fp + '.cbf', cbr_ef_dir, fa + '.cbr', n_iter, frac_save_out, mcmc_id)) txt_file.write('\n') txt_file.close() sh_file = open(txt_filename[:-3] + 'sh', 'w') autil.fill_in_sh(sh_file, array_size=len(pixels_r), n_hours=6, txt_file=txt_filename, combined=False) if submit_forward: txt_filename = 'ef_ic_forward_list_' + model_id + assim_type + '_MCMC' + mcmc_id + '_' + n_iter + '.txt' txt_file = open(txt_filename, 'w') for cbf_file in cbf_files: print(cbf_file) cbf_data = rwb.read_cbf_file(cbf_dir + cbf_file) cbf_pixel = cbf_file[-8:-4] if cbf_pixel in pixels_r: fa = cbf_file[: -9] + '_MCMC' + mcmc_id + '_' + n_iter + suffix.partition( 'fs')[0] + 'assim_' + cbf_pixel cbr_assim = rwb.read_cbr_file( glob.glob(cbr_ef_dir + fa + '.cbr')[0], {'nopars': len(parnames)}) ff = cbf_file[: -9] + '_MCMC' + mcmc_id + '_' + n_iter + suffix.partition( 'fs')[0] + 'forward_' + cbf_pixel cbr_forward = retrieve_preds( cbf_pixel, opt_fs, suffix, misc_dir + 'env_filter_manual/par_preds/') for ic_ind in ic_inds: cbr_forward[ic_ind] = np.nanmedian(cbr_assim[:, ic_ind]) cbr_forward = cbr_forward.reshape(1, len(parnames)) rwb.write_cbr_file(cbr_forward, cbr_ef_dir + ff + '.cbr') txt_file.write( '%sCARDAMOM_RUN_MODEL.exe %s%s %s%s %s%s %s%s %s%s %s%s' % (runmodel_dir, cbf_dir[3:], cbf_file, cbr_ef_dir, ff + '.cbr', output_ef_dir, 'fluxfile_' + ff + '.bin', output_ef_dir, 'poolfile_' + ff + '.bin', output_ef_dir, 'edcdfile_' + ff + '.bin', output_ef_dir, 'probfile_' + ff + '.bin')) txt_file.write('\n') txt_file.close() sh_file = open(txt_filename[:-3] + 'sh', 'w') autil.fill_in_sh(sh_file, array_size=len(pixels_r), n_hours=1, txt_file=txt_filename, combined=False) return
def edit_model_met_shape(infile, outfile, n_met_to_drop): # remove columns from met for certain models cbf = rwb.read_cbf_file(infile) cbf['MET'] = cbf['MET'][:, :(n_met_to_drop * -1)] rwb.CARDAMOM_WRITE_BINARY_FILEFORMAT(cbf, outfile) return
def edit_model_met(infile, outfile): # read cbf file using readwritebinary and edit ID flag cbf = rwb.read_cbf_file(infile) cbf['MET'] = cbf['MET'][:, :-2] rwb.CARDAMOM_WRITE_BINARY_FILEFORMAT(cbf, outfile) return
def edit_model_id(infile, outfile, ID): # read cbf file using readwritebinary and edit ID flag cbf = rwb.read_cbf_file(infile) cbf['ID'] = ID rwb.CARDAMOM_WRITE_BINARY_FILEFORMAT(cbf, outfile) return