def main():

    cur_dir = os.getcwd() + '/'
    plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/'

    os.chdir(plot_dir + 'dists/')

    # get list of model ids
    models_full = list(set([el.split('_')[0] for el in glob.glob('*.png')]))

    # remove 101, temporary until 102-->101
    models_full.remove('102')
    os.chdir(cur_dir)

    # set lists of variables and pixels
    vrs = [
        'NBE', 'cumNBE', 'LAI', 'GPP', 'Reco', 'Rauto', 'Rhet', 'lit', 'root',
        'som', 'wood'
    ]
    pixels = [
        '3809', '3524', '2224', '4170', '1945', '3813', '4054', '3264', '1271',
        '3457'
    ]

    # set MCMC ID
    mcmc_id = sys.argv[1]
    n_iter = sys.argv[2]
    assim_type = '_longadapted'

    nmodels_leave_out = sys.argv[3]
    models = random.sample(models_full,
                           len(models_full) - int(nmodels_leave_out))
    print(models)

    # dataframe will hold model structural uncertainty (Ms) and model parametric uncertainty (Mp) for each pixel-var combination
    # n is number of models that make up the suite
    partitioning = DataFrame(columns={'Ms', 'Mp', 'n'})
    df_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/processed_df/'

    for var in vrs:
        print('Variable: ' + var)

        Mp_pixels = np.zeros(
            len(pixels)) * np.nan  # list of Mp for each pixel, for mapping
        for pixel in pixels:
            print('Pixel: ' + pixel)

            nsteps = 228 if assim_type == '_longadapted' else 240
            meds, ub, lb = np.zeros((len(models), nsteps)) * np.nan, np.zeros(
                (len(models), nsteps)
            ) * np.nan, np.zeros(
                (len(models), nsteps)
            ) * np.nan  # medians, upper bounds, lower bounds of prediction through time
            Mp, n = 0, 0

            for model in models:
                print(model)

                cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model + '/'
                cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model + '/'
                output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model + '/'
                parnames = autil.get_parnames(cur_dir + '../../misc/', model)

                os.chdir(cur_dir + cbr_dir)
                #files = set(glob.glob('*.cbr')) - set(glob.glob('*MCMC'+mcmc_id+'*.cbr'))
                #files = glob.glob('*MCMC'+mcmc_id+'*.cbr')
                files = set(
                    glob.glob('*MCMC' + mcmc_id + '_' + n_iter + '_*.cbr'))

                pixel_chains = autil.find_all_chains(
                    files, pixel
                )  # list of files corresponding to each chain at that pixel, e.g. 2224_1, 2224_2, 2224_3, 2222_4
                pixel_chains.sort()
                n_chains = len(pixel_chains)

                if n_chains > 0:
                    cbf_pixel = rwb.read_cbf_file(
                        cur_dir + cbf_dir +
                        pixel_chains[0].partition('_MCMC')[0] + '_' + pixel +
                        '.cbf')

                    cbr_chain_list = []
                    for pixel_chain in pixel_chains:
                        print(pixel_chain)
                        cbr_chain = rwb.read_cbr_file(
                            pixel_chain, {'nopars': len(parnames)
                                          })  # cbr file for one chain
                        cbr_chain_list.append(
                            cbr_chain
                        )  # list of separate cbrs for each chain, use for gelman rubin
                        cbr_pixel = np.copy(cbr_chain) if pixel_chains.index(
                            pixel_chain) == 0 else np.concatenate(
                                (cbr_pixel, cbr_chain),
                                axis=0)  # concatenate all chain cbrs
                        #autil.plot_par_histograms(cbr_chain, parnames=parnames, savepath=cur_dir+plot_dir+'dists/', title=model+'_'+pixel_chain[:-3]+'png')

                        flux_chain = rwb.readbinarymat(
                            cur_dir + output_dir + 'fluxfile_' +
                            pixel_chain[:-3] + 'bin', [
                                cbf_pixel['nodays'],
                                autil.get_nofluxes_nopools_lma(model)[0]
                            ])
                        pool_chain = rwb.readbinarymat(
                            cur_dir + output_dir + 'poolfile_' +
                            pixel_chain[:-3] + 'bin', [
                                cbf_pixel['nodays'] + 1,
                                autil.get_nofluxes_nopools_lma(model)[1]
                            ])
                        #autil.plot_flux_pool_timeseries(cbf_pixel, cbr_chain, flux_chain, pool_chain, autil.get_nofluxes_nopools_lma(model)[2], savepath=cur_dir+plot_dir+'timeseries/', title=model+'_'+pixel_chain[:-3]+'png')

                        flux_pixel = np.copy(flux_chain) if pixel_chains.index(
                            pixel_chain) == 0 else np.concatenate(
                                (flux_pixel, flux_chain),
                                axis=0)  # concatenate all chain flux outputs
                        pool_pixel = np.copy(pool_chain) if pixel_chains.index(
                            pixel_chain) == 0 else np.concatenate(
                                (pool_pixel, pool_chain),
                                axis=0)  # concatenate all chain pool outputs

                    gr = autil.gelman_rubin(
                        cbr_chain_list)  # gelman rubin function from matt
                    gr_thresh = 1.2  # below this value parameters are assumed to be convergent
                    print('%i of %i parameters converged with GR<%.1f' %
                          (sum(gr < gr_thresh), len(parnames), gr_thresh))

                    #autil.plot_par_histograms(cbr_pixel, parnames=parnames, savepath=cur_dir+plot_dir+'dists/', title=model+'_'+pixel_chain[:-6]+'.png')
                    #autil.plot_flux_pool_timeseries(cbf_pixel, cbr_pixel, flux_pixel, pool_pixel, autil.get_nofluxes_nopools_lma(model)[2], savepath=cur_dir+plot_dir+'timeseries/', title=model+'_'+pixel_chain[:-6]+'.png')

                    if (sum(gr < gr_thresh) / len(parnames) < .9
                        ):  # don't include nonconvergent runs in analysis
                        continue
                    else:
                        fwd_data = autil.get_output(
                            var, model, flux_pixel, pool_pixel, cbr_pixel,
                            autil.get_nofluxes_nopools_lma(
                                model)[2])  # get forward data for var

                        if len(fwd_data) > 0:
                            if fwd_data.shape[1] > nsteps:
                                fwd_data = fwd_data[:, :-1]

                            fwd_data = autil.remove_outliers(fwd_data)
                            # fill medians, upper bounds, and lower bounds
                            meds[models.index(model), :] = np.nanmedian(
                                fwd_data, axis=0)
                            ub[models.index(model), :] = np.nanpercentile(
                                fwd_data, 75, axis=0)
                            lb[models.index(model), :] = np.nanpercentile(
                                fwd_data, 25, axis=0)

                            fwd_data = autil.remove_below_25_above_75(
                                fwd_data
                            )  # set values outside of 25th-75th range to nan
                            Mp += np.nanvar(
                                fwd_data, axis=0
                            )  # sum of intra-ensemble variance, only compute on 25th-75th
                            n += 1

            Ms = np.nanvar(meds, axis=0)  # inter-median variance
            Mp = Mp / n if n != 0 else float('nan')

            Ms_div_sum = Ms / (Ms + Mp)
            Mp_div_sum = Mp / (Ms + Mp)

            partitioning.loc[pixel + '_' + var] = {
                'Ms': np.nanmean(Ms_div_sum),
                'Mp': np.nanmean(Mp_div_sum),
                'n': n
            }
            Mp_pixels[pixels.index(pixel)] = np.nanmean(Mp_div_sum)

    print(partitioning.to_string())
    partitioning.sort_index(
        axis=1).to_pickle(cur_dir + df_dir + 'summary' + assim_type + '_MCMC' +
                          mcmc_id + '_' + date.today().strftime("%m%d%y") +
                          '_' + str(len(models)) + '.pkl')

    return
示例#2
0
def main():
    combinations = [['811', '119', '40000000'], ['811', '3', '1000000'],
                    ['911', '119', '40000000']]
    assim_type = '_longadapted'
    metric = sys.argv[1]

    vrs = [
        'NBE', 'cumNBE', 'LAI', 'GPP', 'Reco', 'Rauto', 'Rhet', 'lit', 'root',
        'som', 'wood'
    ]
    pixels = [
        '3809', '3524', '2224', '4170', '1945', '3813', '4054', '3264', '1271',
        '3457'
    ]

    ens_spread = np.ones(
        (len(pixels), len(vrs), len(combinations))) * float('nan')
    conv = np.ones((len(pixels), len(combinations))) * float('nan')

    cur_dir = os.getcwd() + '/'

    for pixel in pixels:

        comb_count = 0
        for comb in combinations:

            model_id = comb[0]
            mcmc_id = comb[1]
            it = comb[2]

            cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/'
            cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id + '/'
            output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model_id + '/'
            plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/'
            parnames = autil.get_parnames(cur_dir + '../../misc/', model_id)

            os.chdir(cur_dir + cbr_dir)
            files = glob.glob('*MCMC' + mcmc_id + '_' + it + '_' + pixel +
                              '*.cbr')
            pixel_chains = autil.find_all_chains(files, pixel)
            pixel_chains.sort()  # filenames
            if model_id == '911': pixel_chains = pixel_chains[-4:]
            print(pixel_chains)

            cbf_pixel = rwb.read_cbf_file(
                cur_dir + cbf_dir + pixel_chains[0].partition('_MCMC')[0] +
                '_' + pixel + '.cbf')

            cbr_chain_list = []
            for pixel_chain in pixel_chains:
                print(pixel_chain)
                cbr_chain = rwb.read_cbr_file(pixel_chain,
                                              {'nopars': len(parnames)})
                cbr_pixel = np.copy(cbr_chain) if pixel_chains.index(
                    pixel_chain) == 0 else np.concatenate(
                        (cbr_pixel, cbr_chain), axis=0)

                flux_chain = rwb.readbinarymat(
                    cur_dir + output_dir + 'fluxfile_' + pixel_chain[:-3] +
                    'bin', [
                        cbf_pixel['nodays'],
                        autil.get_nofluxes_nopools_lma(model_id)[0]
                    ])
                pool_chain = rwb.readbinarymat(
                    cur_dir + output_dir + 'poolfile_' + pixel_chain[:-3] +
                    'bin', [
                        cbf_pixel['nodays'] + 1,
                        autil.get_nofluxes_nopools_lma(model_id)[1]
                    ])

                flux_pixel = np.copy(flux_chain) if pixel_chains.index(
                    pixel_chain) == 0 else np.concatenate(
                        (flux_pixel, flux_chain), axis=0)
                pool_pixel = np.copy(pool_chain) if pixel_chains.index(
                    pixel_chain) == 0 else np.concatenate(
                        (pool_pixel, pool_chain), axis=0)

                cbr_chain_list.append(cbr_chain)
                print(np.shape(cbr_chain))
                print(np.shape(cbr_pixel))

            gr = autil.gelman_rubin(cbr_chain_list)
            print('%i of %i parameters converged' %
                  (sum(gr < 1.2), len(parnames)))
            conv[pixels.index(pixel),
                 comb_count] = sum(gr < 1.2) / len(parnames) * 100

            for var in vrs:
                print(var)

                try:
                    obs = cbf_pixel['OBS'][var]
                    obs[obs == -9999] = float('nan')
                except:
                    obs = np.ones(cbf_pixel['nodays']) * np.nan
                n_obs = np.sum(np.isfinite(obs))

                fwd_data = autil.get_output(
                    var, model_id, flux_pixel, pool_pixel, cbr_pixel,
                    autil.get_nofluxes_nopools_lma(model_id)[2])

                if len(fwd_data) > 0:
                    if fwd_data.shape[1] > cbf_pixel['nodays']:
                        fwd_data = fwd_data[:, :-1]

                    fwd_data = autil.remove_outliers(fwd_data)
                    med = np.nanmedian(fwd_data, axis=0)
                    ub = np.nanpercentile(fwd_data, 75, axis=0)
                    lb = np.nanpercentile(fwd_data, 25, axis=0)

                    ens_spread[pixels.index(pixel),
                               vrs.index(var), comb_count] = np.nanmean(
                                   abs(ub -
                                       lb)) if metric == 'spread' else np.sqrt(
                                           np.nansum((med - obs)**2) / n_obs)

            comb_count += 1

    for var in vrs:
        autil.plot_spread_v_iter(
            ens_spread,
            pixels,
            vrs.index(var),
            var,
            it,
            metric,
            cur_dir + plot_dir + 'spread_v_iter',
            'iter_test_compare_' + assim_type + '_' + model_id + '_' + var +
            '_' + metric,
            single_val=True
        )  #'iter_test_MCMC'+mcmc_id+'_'+model_id+'_'+var + '_' + metric)

    autil.plot_conv_v_iter(conv,
                           pixels,
                           it,
                           cur_dir + plot_dir + 'spread_v_iter',
                           'iter_test_compare' + assim_type + '_' + model_id +
                           '_conv',
                           single_val=True)

    return
示例#3
0
def main():

    # set run information to read
    model_id = sys.argv[1]
    mcmc_id = sys.argv[2]  # 119 for normal, 3 for DEMCMC
    n_iter = sys.argv[3]
    ens_size = 500
    assim_type = '_longadapted'

    # set directories
    cur_dir = os.getcwd() + '/'
    misc_dir = cur_dir + '/../../misc/'
    cbf_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/'
    cbr_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id + '/'
    plot_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/plots/'
    parnames = autil.get_parnames('../../misc/', model_id)

    # load map containing the location of each mstmip pixel on the GEOSCHEM grid
    pixel_nums = np.load(misc_dir + 'mstmip_pixel_nums.npy')

    # load map of biome fractions from mstmip
    with np.load(misc_dir + 'mstmip_biome_frac.npz') as data:
        biome_frac = data['arr_0']
    n_classes = biome_frac.shape[0]

    # load list of land pixels
    pixels = list(set([file[-8:-4] for file in glob.glob(cbf_dir + '*.cbf')]))

    # load list of cbrs
    files = glob.glob(cbr_dir + '*MCMC' + mcmc_id + '_' + n_iter + '_*.cbr')

    # fill X and Y
    n_regr_models = len(parnames)
    X = np.ones(
        (len(pixels), n_classes)) * np.nan  # shape n_samples, n_features
    y = np.ones(
        (n_regr_models, len(pixels))) * np.nan  # shape n_pars, n_samples
    for pixel in pixels:
        ind = pixels.index(pixel)
        if np.mod(ind, 10) == 0: print(ind)

        # get lc information
        locs = [pixel_nums == float(pixel)][0]
        fracs_at_geos_pixel = no_water_pixels(biome_frac[:, locs])
        av_fracs = np.nanmean(
            fracs_at_geos_pixel, axis=1
        )  # average biome fraction across mstmip pixels within coarse pixel
        X[ind, :] = av_fracs

        # get parameter information
        pixel_chains = autil.find_all_chains(files, pixel)
        pixel_chains.sort()  # filenames

        # concatenate across chains
        if len(pixel_chains) > 0:
            for pixel_chain in pixel_chains:
                cbr_chain = rwb.read_cbr_file(pixel_chain,
                                              {'nopars': len(parnames)})
                cbr_pixel = np.copy(cbr_chain) if pixel_chains.index(
                    pixel_chain) == 0 else np.concatenate(
                        (cbr_pixel, cbr_chain), axis=0)

            y[:, ind] = np.nanmedian(cbr_pixel, axis=0)

    # remove nan values so regression runs
    Xr, yr = drop_nan(X, y)

    # set up regression models
    y_test_all_pars, y_pred_all_pars = [], []
    for regr_model in range(n_regr_models):
        print('running regression for ' + parnames[regr_model] + ' . . . ')
        # split train and test sets, 60-40
        X_train, X_test, y_train, y_test = train_test_split(Xr,
                                                            yr[regr_model, :],
                                                            test_size=0.4)
        y_test_all_pars.append(y_test)

        # fit regression model on train
        regr = LinearRegression().fit(X_train, y_train)

        # make predictions on test set
        y_pred_all_pars.append(regr.predict(X_test))

    # make summary scatter plot
    plot_scatter_test_pred(
        y_test_all_pars, y_pred_all_pars, parnames, plot_dir + 'lc_scat/',
        'par_preds_' + model_id + '_MCMC' + mcmc_id + '_' + n_iter +
        assim_type)

    return
示例#4
0
def main():
    model_id = sys.argv[1]
    run_type = sys.argv[2] # ALL or SUBSET
    mcmc_id = sys.argv[3] # 119 for normal, 3 for DEMCMC
    n_iter = sys.argv[4]
    var_to_plot = sys.argv[5] # GR, a flux or pool, or PARXX
    ens_size = 500
    assim_type = '_longadapted'
    
    cur_dir = os.getcwd() + '/'
    if 'scripts' not in cur_dir:
        cur_dir = cur_dir + 'scripts/'
    
    cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf'+assim_type+'/' + model_id + '/'
    cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr'+assim_type+'/' + model_id + '/'
    output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output'+assim_type+'/' + model_id + '/'
    plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/'
    parnames = autil.get_parnames(cur_dir + '../../misc/', model_id)
    
    os.chdir(cbr_dir)
    files = glob.glob('*MCMC'+mcmc_id+'_'+n_iter+'_*.cbr')
    pixel = sys.argv[6]
    print(pixel)
    
    pixel_chains = autil.find_all_chains(files, pixel)
    pixel_chains.sort() # filenames
    print(pixel_chains)
    
    cbf_pixel = rwb.read_cbf_file(cur_dir + cbf_dir + pixel_chains[0].partition('_MCMC')[0]+'_'+pixel+'.cbf')
    
    cbr_chain_list = []
    for pixel_chain in pixel_chains:
        print(pixel_chain)
        cbr_chain = rwb.read_cbr_file(pixel_chain, {'nopars': len(parnames)})
        cbr_pixel = np.copy(cbr_chain) if pixel_chains.index(pixel_chain)==0 else np.concatenate((cbr_pixel, cbr_chain), axis=0)
        
        flux_chain = rwb.readbinarymat(cur_dir + output_dir + 'fluxfile_' + pixel_chain[:-3]+'bin', [cbf_pixel['nodays'], autil.get_nofluxes_nopools_lma(model_id)[0]])
        pool_chain = rwb.readbinarymat(cur_dir + output_dir + 'poolfile_' + pixel_chain[:-3]+'bin', [cbf_pixel['nodays']+1, autil.get_nofluxes_nopools_lma(model_id)[1]])

        flux_pixel = np.copy(flux_chain) if pixel_chains.index(pixel_chain)==0 else np.concatenate((flux_pixel, flux_chain), axis=0)
        pool_pixel = np.copy(pool_chain) if pixel_chains.index(pixel_chain)==0 else np.concatenate((pool_pixel, pool_chain), axis=0)
        
        if np.shape(cbr_chain)[0]==ens_size:
            cbr_chain_list.append(cbr_chain)
            print(np.shape(cbr_chain))
      
    ### COMPUTE GELMAN RUBIN  
    if len(cbr_chain_list)>1:
        gr = autil.gelman_rubin(cbr_chain_list)
        gr_pixel = sum(gr<1.2)/len(parnames)
    else:
        gr_pixel = -9999.
       
    ### DETERMINE DATA TO WRITE TO FILE
    if var_to_plot == 'GR':
        data = np.copy(gr_pixel)
    elif 'PAR' in var_to_plot:
        parnum = int(var_to_plot.partition('PAR')[-1])
        if gr_pixel>0.9:
            data = np.nanmedian(cbr_pixel[:,parnum-1])
        else:
            data = -9999.
    else:
        if gr_pixel>0.9:
            data = np.nanmean(np.nanmedian(autil.get_output(var_to_plot, model_id, flux_pixel, pool_pixel, cbr_pixel, autil.get_nofluxes_nopools_lma(model_id)[2]), axis=0))
        else:
            data = -9999.
        
    with open(cur_dir + '../../misc/' + model_id + '_' + pixel_chains[0].partition('_MCMC')[0] + '_MCMC' + mcmc_id + '_' + n_iter + '_' + var_to_plot + '.csv','a') as f:
        writer = csv.writer(f)
        new_row = [pixel, data]
        assert len(new_row)==2
        writer.writerow(new_row)
        
    return
def main():
    model_id = sys.argv[1]
    mcmc_id = sys.argv[2]  # 119 for normal, 3 for DEMCMC
    n_iter = sys.argv[3]
    ens_size = 500
    assim_type = '_p25adapted'

    # EF comparison
    ef_spec = 'clipped_PLS_soilgrids_poolobs_rescaled_forward'

    # directories
    cur_dir = os.getcwd() + '/'
    cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/'
    cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id + '/'
    cbr_ef_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '_ef/' + model_id + '/'
    output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model_id + '/'
    output_ef_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '_ef/' + model_id + '/'
    plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/'
    parnames = autil.get_parnames('../../misc/', model_id)

    # get cbfs to run through
    os.chdir(cbf_dir)
    cbf_files = glob.glob('*.cbf')
    cbf_files.sort()
    os.chdir(cur_dir + '/../')

    opt_preds = np.zeros((len(cbf_files), len(parnames))) * np.nan
    ef_preds = np.zeros((len(cbf_files), len(parnames))) * np.nan

    for cbf_file in cbf_files:

        pixel = cbf_file[-8:-4]
        print(pixel)

        pixel_chains_opt = autil.find_all_chains(
            glob.glob(cbr_dir + '*_MCMC' + mcmc_id + '_' + n_iter + '_' +
                      pixel + '*.cbr'), pixel)
        pixel_chains_opt.sort()  # filenames

        pixel_chains_ef = autil.find_all_chains(
            glob.glob(cbr_ef_dir + '*_MCMC' + mcmc_id + '_' + n_iter + '_' +
                      ef_spec + '_' + pixel + '.cbr'), pixel)
        pixel_chains_ef.sort()

        for pc_opt in pixel_chains_opt:
            cbr_chain_opt = rwb.read_cbr_file(pc_opt,
                                              {'nopars': len(parnames)})
            cbr_chain_opt = autil.modulus_Bday_Fday(cbr_chain_opt, parnames)
            cbr_pixel_opt = np.copy(cbr_chain_opt) if pixel_chains_opt.index(
                pc_opt) == 0 else np.concatenate(
                    (cbr_pixel_opt, cbr_chain_opt), axis=0)

        for pc_ef in pixel_chains_ef:
            cbr_chain_ef = rwb.read_cbr_file(pc_ef, {'nopars': len(parnames)})
            cbr_chain_ef = autil.modulus_Bday_Fday(cbr_chain_ef, parnames)
            cbr_pixel_ef = np.copy(cbr_chain_ef) if pixel_chains_ef.index(
                pc_ef) == 0 else np.concatenate(
                    (cbr_pixel_ef, cbr_chain_ef), axis=0)

        opt_preds[cbf_files.index(cbf_file), :] = np.nanmedian(cbr_pixel_opt,
                                                               axis=0)
        ef_preds[cbf_files.index(cbf_file), :] = np.nanmedian(cbr_pixel_ef,
                                                              axis=0)

    plot_scatter_compare(ef_preds, opt_preds, parnames, plot_dir + 'scatters/',
                         model_id + '_MCMC' + mcmc_id + '_' + n_iter)

    return
def main():
    model_id_start = sys.argv[1]
    run_type = sys.argv[2]  # ALL or SUBSET
    metric = sys.argv[3]  # spread or RMSE
    assim_type = '_p25adapted'
    compare_between = sys.argv[4]  # MCMCID or MODEL or NBEUNC

    n_iters = [
        ['40000000'], ['40000000']
    ]  #['500000','1000000','2500000','5000000','10000000'],['40000000']]#[['100000', '250000', '500000', '1000000', '1750000', '2500000', '5000000'], ['100000', '250000', '500000', '1000000', '5000000', '10000000', '25000000','50000000']]
    vrs = [
        'NBE', 'cumNBE', 'LAI', 'GPP', 'Reco', 'Rauto', 'Rhet', 'lit', 'root',
        'som', 'wood'
    ]
    pixels = [
        '3809', '3524', '2224', '4170', '1945', '3813', '4054', '3264', '1271',
        '3457'
    ]

    cur_dir = os.getcwd() + '/'
    cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id_start + '/'
    cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id_start + '/'
    output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model_id_start + '/'
    plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/'
    parnames = autil.get_parnames('../../misc/', model_id_start)

    if compare_between == 'MCMCID':
        comps = ['3', '119']

    elif compare_between == 'MODEL':
        comps = [model_id_start, '911']
        mcmc_id = '119'

    elif compare_between == 'NBEUNC':
        comps = [assim_type, '_p25adapted_NBEuncreduced']
        mcmc_id = '119'

    ens_spread = [
        np.ones((len(pixels), len(vrs), len(n_iters[0]))) * float('nan'),
        np.ones((len(pixels), len(vrs), len(n_iters[1]))) * float('nan')
    ]
    conv = [
        np.ones((len(pixels), len(n_iters[0]))) * float('nan'),
        np.ones((len(pixels), len(n_iters[1]))) * float('nan')
    ]

    for pixel in pixels:

        for comp in comps:
            if compare_between == 'MCMCID':
                mcmc_id = comp
            elif compare_between == 'MODEL':
                model_id_start = comp
                cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + comp + '/'
                cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + comp + '/'
                output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + comp + '/'
                plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/'
                parnames = autil.get_parnames(cur_dir + '../../misc/', comp)
            elif compare_between == 'NBEUNC':
                assim_type = comp
                cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + comp + '/' + model_id_start + '/'
                cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + comp + '/' + model_id_start + '/'
                output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output' + comp + '/' + model_id_start + '/'
                plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/'
                parnames = autil.get_parnames(cur_dir + '../../misc/',
                                              model_id_start)

            os.chdir(cur_dir + cbr_dir)
            for it in n_iters[comps.index(comp)]:
                files = glob.glob('*MCMC' + mcmc_id + '_' + it + '_' + pixel +
                                  '*.cbr')
                pixel_chains = autil.find_all_chains(files, pixel)
                pixel_chains.sort()  # filenames
                #if ((comp=='911') & (pixel_chains[0][-5]=='1')): pixel_chains.pop(0)
                #if ((comp=='911') & (pixel_chains[0][-5]=='2')): pixel_chains.pop(0)
                print(pixel_chains)

                cbf_pixel = rwb.read_cbf_file(
                    cur_dir + cbf_dir + pixel_chains[0].partition('_MCMC')[0] +
                    '_' + pixel + '.cbf')

                cbr_chain_list = []
                for pixel_chain in pixel_chains[:4]:
                    print(pixel_chain)
                    cbr_chain = rwb.read_cbr_file(pixel_chain,
                                                  {'nopars': len(parnames)})
                    cbr_pixel = np.copy(cbr_chain) if pixel_chains.index(
                        pixel_chain) == 0 else np.concatenate(
                            (cbr_pixel, cbr_chain), axis=0)

                    flux_chain = rwb.readbinarymat(
                        cur_dir + output_dir + 'fluxfile_' + pixel_chain[:-3] +
                        'bin', [
                            cbf_pixel['nodays'],
                            autil.get_nofluxes_nopools_lma(model_id_start)[0]
                        ])
                    pool_chain = rwb.readbinarymat(
                        cur_dir + output_dir + 'poolfile_' + pixel_chain[:-3] +
                        'bin', [
                            cbf_pixel['nodays'] + 1,
                            autil.get_nofluxes_nopools_lma(model_id_start)[1]
                        ])

                    flux_pixel = np.copy(flux_chain) if pixel_chains.index(
                        pixel_chain) == 0 else np.concatenate(
                            (flux_pixel, flux_chain), axis=0)
                    pool_pixel = np.copy(pool_chain) if pixel_chains.index(
                        pixel_chain) == 0 else np.concatenate(
                            (pool_pixel, pool_chain), axis=0)

                    cbr_chain_list.append(cbr_chain)
                    print(np.shape(cbr_chain))
                    print(np.shape(cbr_pixel))

                gr = autil.gelman_rubin(cbr_chain_list)
                print('%i of %i parameters converged' %
                      (sum(gr < 1.2), len(parnames)))
                conv[comps.index(
                    comp)][pixels.index(pixel),
                           n_iters[comps.index(comp)].index(it)] = sum(
                               gr < 1.2) / len(parnames) * 100

                for var in vrs:
                    print(var)

                    try:
                        obs = cbf_pixel['OBS'][var]
                        obs[obs == -9999] = float('nan')
                    except:
                        obs = np.ones(cbf_pixel['nodays']) * np.nan
                    n_obs = np.sum(np.isfinite(obs))

                    fwd_data = autil.get_output(
                        var, model_id_start, flux_pixel, pool_pixel, cbr_pixel,
                        autil.get_nofluxes_nopools_lma(model_id_start)[2])

                    if len(fwd_data) > 0:
                        if fwd_data.shape[1] > cbf_pixel['nodays']:
                            fwd_data = fwd_data[:, :-1]

                        fwd_data = autil.remove_outliers(fwd_data)
                        med = np.nanmedian(fwd_data, axis=0)
                        ub = np.nanpercentile(fwd_data, 75, axis=0)
                        lb = np.nanpercentile(fwd_data, 25, axis=0)

                        ens_spread[comps.index(comp)][
                            pixels.index(pixel),
                            vrs.index(var),
                            n_iters[comps.index(comp)].index(it)] = np.nanmean(
                                abs(ub -
                                    lb)) if metric == 'spread' else np.sqrt(
                                        np.nansum((med - obs)**2) / n_obs)
                        print(ens_spread[comps.index(comp)]
                              [pixels.index(pixel),
                               vrs.index(var),
                               n_iters[comps.index(comp)].index(it)])

    print(ens_spread)
    for var in vrs:
        autil.plot_spread_v_iter(
            ens_spread,
            pixels,
            vrs.index(var),
            var,
            n_iters,
            metric,
            cur_dir + plot_dir + 'spread_v_iter',
            'iter_test' + assim_type + '_' + compare_between + '_' +
            model_id_start + '_' + var + '_' + metric,
            single_val=True
        )  #'iter_test_MCMC'+mcmc_id+'_'+model_id_start+'_'+var + '_' + metric)

    autil.plot_conv_v_iter(conv,
                           pixels,
                           n_iters,
                           cur_dir + plot_dir + 'spread_v_iter',
                           'iter_test' + assim_type + '_' + compare_between +
                           '_' + model_id_start + '_conv',
                           single_val=True)

    return
示例#7
0
def main():
    model_id = sys.argv[1]
    run_type = sys.argv[2] # ALL or SUBSET
    mcmc_id = sys.argv[3] # 119 for normal, 3 for DEMCMC
    n_iter = sys.argv[4]
    ens_size = 500
    assim_type = '_p25adapted'
    use_bestchains_pkl = False
    
    cur_dir = os.getcwd() + '/'
    cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf'+assim_type+'_ef_ic/' + model_id + '/'
    cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr'+assim_type+'_ef/' + model_id + '/'
    output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output'+assim_type+'_ef/' + model_id + '/'
    plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/'
    parnames = autil.get_parnames('../../misc/', model_id)
    
    
    # load list of land pixels
    pixels = list(set([file[-8:-4] for file in glob.glob(cbf_dir + '*.cbf')])) if run_type=='ALL' else ['3809','3524','2224','4170','1945','3813','4054','3264','1271','3457']
    pixels.sort()
    
    # load list of cbrs
    cbr_files = glob.glob(cbr_dir+'*MCMC'+mcmc_id+'_'+n_iter+'_*PLS*forward*.cbr')

    # for loop over pixels    
    gr_pixels = np.zeros(len(pixels))*np.nan # list of GR for each pixel, for mapping
    par_pixels = np.zeros((len(pixels), len(parnames)))*np.nan
    for pixel in pixels:
        print(pixel, pixels.index(pixel))
        
        pixel_chains = autil.find_all_chains(cbr_files, pixel)
        pixel_chains.sort() # filenames
        
        if use_bestchains_pkl:
            conv_chains_pkl = read_pickle(glob.glob(cbr_dir + model_id + assim_type + '*_MCMC'+mcmc_id + '_'+n_iter+'_best_subset.pkl')[0])
            conv_chains_pkl.columns = ['pixel','bestchains','conv'] #rename columns for easier access
            
            if pixel in conv_chains_pkl['pixel'].values:
                bestchains = conv_chains_pkl.loc[conv_chains_pkl['pixel']==pixel]['bestchains'].values[0][1:]
                print(bestchains)
                pixel_chains = [pixel_chain for pixel_chain in pixel_chains if pixel_chain.partition(pixel+'_')[-1][:-4] in bestchains]
            
            else:
                continue

        #cbf_pixel = rwb.read_cbf_file(cur_dir + cbf_dir + pixel_chains[0].partition('_MCMC')[0]+'_'+pixel+'.cbf')
        cbf_filename = glob.glob(cur_dir + cbf_dir + '*'+pixel+'.cbf')[0]
        cbf_pixel = rwb.read_cbf_file(cbf_filename)
        
        cbr_chain_list = []
        for pixel_chain in pixel_chains:
            print(pixel_chain)
            cbr_chain = rwb.read_cbr_file(pixel_chain, {'nopars': len(parnames)})
            cbr_chain = autil.modulus_Bday_Fday(cbr_chain, parnames)
            cbr_pixel = np.copy(cbr_chain) if pixel_chains.index(pixel_chain)==0 else np.concatenate((cbr_pixel, cbr_chain), axis=0)
            #autil.plot_par_histograms(cbr_chain, parnames=parnames, savepath=cur_dir+plot_dir+'dists/', title=model_id+'_'+pixel_chain[:-3]+'png')
            
            try:
                flux_chain = rwb.readbinarymat(cur_dir + output_dir + 'fluxfile_' + pixel_chain.partition(cbr_dir)[-1][:-3]+'bin', [cbf_pixel['nodays'], autil.get_nofluxes_nopools_lma(model_id)[0]])
                pool_chain = rwb.readbinarymat(cur_dir + output_dir + 'poolfile_' + pixel_chain.partition(cbr_dir)[-1][:-3]+'bin', [cbf_pixel['nodays']+1, autil.get_nofluxes_nopools_lma(model_id)[1]])
                #autil.plot_flux_pool_timeseries(cbf_pixel, cbr_chain, flux_chain, pool_chain, autil.get_nofluxes_nopools_lma(model_id)[2], savepath=cur_dir+plot_dir+'timeseries/', title=model_id+'_'+pixel_chain[:-3]+'png')
    
                flux_pixel = np.copy(flux_chain) if pixel_chains.index(pixel_chain)==0 else np.concatenate((flux_pixel, flux_chain), axis=0)
                pool_pixel = np.copy(pool_chain) if pixel_chains.index(pixel_chain)==0 else np.concatenate((pool_pixel, pool_chain), axis=0)
                
            except Exception as e:
                pass
                
            if np.shape(cbr_chain)[0]==ens_size:
                cbr_chain_list.append(cbr_chain)
                #print(np.shape(cbr_chain))
            
        if len(cbr_chain_list)>1:
            gr = autil.gelman_rubin(cbr_chain_list)
            #print(gr)
            print('%i of %i parameters converged' % (sum(gr<1.2), len(parnames)))
            gr_pixels[pixels.index(pixel)] = sum(gr<1.2)/len(parnames)
        else:
            gr = np.nan

        par_pixels[pixels.index(pixel),:] = np.nanmedian(cbr_pixel, axis=0)
        #autil.plot_par_histograms(cbr_pixel, parnames=parnames, savepath=cur_dir+plot_dir+'dists/', title=model_id+assim_type+'_MCMC'+mcmc_id+'_'+cbf_filename.partition(cbf_dir)[-1][:-4]+'.png')    
        #autil.plot_flux_pool_timeseries(cbf_pixel, cbr_pixel, flux_pixel, pool_pixel, autil.get_nofluxes_nopools_lma(model_id)[2], savepath=cur_dir+plot_dir+'timeseries/', title=model_id+assim_type+'_MCMC'+mcmc_id+'_'+cbf_filename.partition(cbf_dir)[-1][:-4]+'.png')
        
    #vmax = [None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,210,200,215,6600,195,24000,None,None,None,900,None,None,None,None,None,None,None] #np.nanpercentile(par_pixels[:,par], 90)
    for par in range(len(parnames)): autil.plot_map(nrows=46, ncols=73, land_pixel_list=[file[-8:-4] for file in glob.glob(cur_dir + cbf_dir + '*.cbf')], pixel_value_list=pixels, value_list=par_pixels[:,par], vmax=np.nanpercentile(par_pixels[:,par], 90), savepath=cur_dir+plot_dir+'maps/', savename='par'+str(par)+'_' + model_id +assim_type+ '_MCMC' + mcmc_id +'_'+ n_iter+'_EF_clipped_PLS_soilgrids_poolobs_rescaled_forward')
    #autil.plot_map(nrows=46, ncols=73, land_pixel_list=[file[-8:-4] for file in glob.glob(cur_dir + cbf_dir + '*.cbf')], pixel_value_list=pixels, value_list=np.ones(len(pixels)), savepath=cur_dir+plot_dir+'maps/', title='test_pixels.png')
    #autil.plot_map(nrows=46, ncols=73, land_pixel_list=[file[-8:-4] for file in glob.glob(cur_dir + cbf_dir + '*.cbf')], pixel_value_list=pixels, value_list=gr_pixels*100, savepath=cur_dir+plot_dir+'maps/', savename='gr_' + model_id + assim_type+ '_' +run_type+ '_MCMC' + mcmc_id + '_' + n_iter)
        
    return