def test_param2res(self): const_params = self.cpa param2res = make_param2res(const_params) param2res_2 = make_param2res_2(const_params) res = param2res(self.epa0) res_2 = param2res_2(self.epa0) day_indices = month_2_day_index(range(self.pa.number_of_months)), fig = plt.figure() plot_solutions(fig, times=day_indices, var_names=Observables._fields, tup=(res, res_2)) fig.savefig('solutions.pdf') self.assertTrue(np.allclose(res, res_2, rtol=1e-2), )
def test_param2res_vs_sym(self): npp, rh, clitter, csoil, cveg, cleaf, croot, cwood = get_example_site_vars( self.dataPath) const_params = self.cpa param2res = make_param2res(const_params) param2res_sym = make_param2res_sym(const_params) res = param2res(self.epa0) res_sym = param2res_sym(self.epa0) day_indices = month_2_day_index(range(self.pa.number_of_months)), fig = plt.figure() plot_solutions(fig, times=day_indices, var_names=Observables._fields, tup=(res, res_sym)) fig.savefig('solutions.pdf') self.assertTrue(np.allclose(res, res_sym, rtol=1e-2), )
def main(client): from model_specific_helpers import (get_example_site_vars, make_param_filter_func, make_weighted_cost_func, make_param2res, make_param2res_2, UnEstimatedParameters, EstimatedParameters, Observables) from general_helpers import (make_uniform_proposer, make_multivariate_normal_proposer, mcmc, make_feng_cost_func, plot_solutions) # fixme: # put the (relative or asolute) location of your data into a small file called 'config.json' and # in my case the content looks like this: # {"dataPath": "/home/data/yuanyuan"} # DO NOT add the file to the repository. It is not only model- but also site specific. # So you are likely to have one for every model on every computer # you run this code on. # (this example uses an absolute path starting with a '/' with Path('config.json').open(mode='r') as f: conf_dict = json.load(f) dataPath = Path(conf_dict['dataPath']) # fixme: # Note that the function is imported from # model_specific_helpers which means that you have to provide # your version of this fuction which will most likely return different # variables npp, rh, clitter, csoil, cveg, cleaf, croot, cwood = get_example_site_vars( dataPath) # combine them to a single array which we will later use as input to the costfunction #nyears=140 nyears = 10 tot_len = 12 * nyears obs = np.stack([cleaf, croot, cwood, clitter, csoil, rh], axis=1)[0:tot_len, :] # leaf, root , wood, metabolic, structural, CWD, microbial, slow, passive # fixme c_min = np.array([ 0.09, 0.09, 0.09, 0.01, 0.01, 1 / (2 * 365), 1 / (365 * 10), 1 / (60 * 365), 0.1 / (0.1 * 365), 0.06 / (0.137 * 365), 0.06 / (5 * 365), 0.06 / (222.22 * 365), clitter[0] / 100, clitter[0] / 100, csoil[0] / 100, csoil[0] / 2 ]) c_max = np.array([ 1, 1, 0.21, 1, 1, 1 / (0.3 * 365), 1 / (0.8 * 365), 1 / 365, 1 / (365 * 0.1), 0.6 / (365 * 0.137), 0.6 / (365 * 5), 0.6 / (222.22 * 365), clitter[0], clitter[0], csoil[0] / 3, csoil[0] ]) # fixme # this function is model specific: It discards parameter proposals # where beta1 and beta2 add up to more than 0.99 isQualified = make_param_filter_func(c_max, c_min) uniform_prop = make_uniform_proposer( c_min, c_max, D=100.0, # this value filter_func=isQualified) cpa = UnEstimatedParameters(C_leaf_0=cleaf[0], C_root_0=croot[0], C_wood_0=cwood[0], clitter_0=clitter[0], csoil_0=csoil[0], rh_0=rh[0], npp=npp, number_of_months=tot_len, clay=0.2028, silt=0.2808, lig_wood=0.4, f_wood2CWD=1, f_metlit2mic=0.45) param2res = make_param2res(cpa) epa_0 = EstimatedParameters( beta_leaf=0.15, beta_root=0.2, lig_leaf=0.15, f_leaf2metlit=0.28, f_root2metlit=0.6, k_leaf=1 / 365, k_root=1 / (365 * 5), k_wood=1 / (365 * 40), k_metlit=0.5 / (365 * 0.1), k_mic=0.3 / (365 * 0.137), k_slowsom=0.3 / (365 * 5), k_passsom=0.3 / (222.22 * 365), C_metlit_0=0.05, CWD_0=0.1, C_mic_0=1, C_passom_0=5, ) # it is sensible to use the same costfunction for both the demo and # the formal run so we define it here for both #costfunction=make_feng_cost_func(obs) costfunction = make_weighted_cost_func(obs) # Look for data from the demo run and use it to compute the covariance matrix if necessarry demo_aa_path = dataPath.joinpath('cable_demo_da_aa.csv') demo_aa_j_path = dataPath.joinpath('cable_demo_da_j_aa.csv') if not demo_aa_path.exists(): print("Did not find demo run results. Will perform demo run") C_demo, J_demo = mcmc(initial_parameters=epa_0, proposer=uniform_prop, param2res=param2res, costfunction=costfunction, nsimu=10000) # save the parameters and costfunctionvalues for postprocessing pd.DataFrame(C_demo).to_csv(demo_aa_path, sep=',') pd.DataFrame(J_demo).to_csv(demo_aa_j_path, sep=',') else: print("""Found {p} from a previous demo run. If you also want to recreate the demo output then move the file! """.format(p=demo_aa_path)) C_demo = pd.read_csv(demo_aa_path).to_numpy() J_demo = pd.read_csv(demo_aa_j_path).to_numpy() # build a new proposer based on a multivariate_normal distribution using the # estimated covariance of the previous run if available first we check how many # accepted parameters we got # and then use part of them to compute a covariance matrix for the # formal run covv = np.cov(C_demo[:, int(C_demo.shape[1] / 10):]) normal_prop = make_multivariate_normal_proposer(covv=covv, filter_func=isQualified) # Look for data from the formal run and use it for postprocessing #formal_aa_path = dataPath.joinpath('cable_formal_da_aa.csv') #formal_aa_j_path = dataPath.joinpath('cable_formal_da_j_aa.csv') #if not formal_aa_path.exists(): # print("Did not find results. Will perform formal run") # C_formal, J_formal = mcmc( # initial_parameters=epa_0, # proposer=normal_prop, # param2res=param2res, # costfunction=costfunction, # nsimu=100 # ) # pd.DataFrame(C_formal).to_csv(formal_aa_path,sep=',') # pd.DataFrame(J_formal).to_csv(formal_aa_j_path,sep=',') #else: # print("""Found {p} from a previous demo run. # If you also want recreate the output then move the file! # """.format(p = formal_aa_path)) #C_formal = pd.read_csv(formal_aa_path).to_numpy() #J_formal = pd.read_csv(formal_aa_j_path).to_numpy() #define parallel mcmc wrapper def parallel_mcmc(_): return (mcmc(initial_parameters=epa_0, proposer=normal_prop, param2res=param2res, costfunction=costfunction, nsimu=20000)) print("before map") print("Client: ", client) #run 10 chains [[c_form1, j_form1], [c_form2, j_form2], [c_form3, j_form3], [c_form4, j_form4], [c_form5, j_form5], [c_form6, j_form6], [c_form7, j_form7], [c_form8, j_form8], [c_form9, j_form9], [c_form10, j_form10]] = client.gather(client.map(parallel_mcmc, range(0, 10))) print("after map") print("Client: ", client) #print chain5 output as test formal_c_path = dataPath.joinpath('chain5_pmcmc_c.csv') formal_j_path = dataPath.joinpath('chain5_pmcmc_j.csv') pd.DataFrame(c_form5).to_csv(formal_c_path, sep=',') pd.DataFrame(j_form5).to_csv(formal_j_path, sep=',') #use output csv file for post processing C_formal = pd.read_csv(formal_c_path).to_numpy() J_formal = pd.read_csv(formal_j_path).to_numpy() # POSTPROCESSING # # The 'solution' of the inverse problem is actually the (joint) posterior # probability distribution of the parameters, which we approximate by the # histogram consisting of the mcmc generated samples. # This joint distribution contains as much information as all its (infinitly # many) projections to curves through the parameter space combined. # Unfortunately, for this very reason, a joint distribution of more than two # parameters is very difficult to visualize in its entirity. # to do: # a) make a movie of color coded samples of the a priori distribution of the parameters. # b) -"- of the a posteriory distribution -'- # Therefore the following visualizations have to be considered with caution: # 1. # The (usual) histograms of the values of a SINGLE parameters can be very # misleading since e.g. we can not see that certain parameter combination only # occure together. In fact this decomposition is only appropriate for # INDEPENDENT distributions of parameters in which case the joint distribution # would be the product of the distributions of the single parameters. This is # however not even to be expected if our prior probability distribution can be # decomposed in this way. (Due to the fact that the Metropolis Hastings Alg. does not # produce independent samples ) df = pd.DataFrame({ name: C_formal[:, i] for i, name in enumerate(EstimatedParameters._fields) }) subplots = df.hist() fig = subplots[0, 0].figure fig.set_figwidth(15) fig.set_figheight(15) fig.savefig('histograms.pdf') # As the next best thing we can create a matrix of plots containing all # projections to possible parameter tuples # (like the pairs plot in the R package FME) but 16x16 plots are too much for one page.. # However the plot shows that we are dealing with a lot of colinearity for this parameter set subplots = pd.plotting.scatter_matrix(df) fig = subplots[0, 0].figure fig.set_figwidth(15) fig.set_figheight(15) fig.savefig('scatter_matrix.pdf') # 2. # another way to get an idea of the quality of the parameter estimation is # to plot trajectories. # A possible aggregation of this histogram to a singe parameter # vector is the mean which is an estimator of the expected value of the # desired distribution. sol_mean = param2res(np.mean(C_formal, axis=1)) fig = plt.figure() plot_solutions(fig, times=range(sol_mean.shape[0]), var_names=Observables._fields, tup=(sol_mean, obs), names=('mean', 'obs')) fig.savefig('solutions.pdf')
tsl=tsl, # C_wood_0=C_wood[0], # C_leaf_0=C_leaf[0], # C_root_0=C_root[0], # C_litter_0=C_litter[0], # C_soil_0=C_soil[0], # rh_0=rh[0], # ra_0=ra[0], # npp=npp, # number_of_months=tot_len, # mrso=mrso, # tsl=tsl, beta_fruit=0.1) #print(cpa) param2res = make_param2res(cpa) c_min = np.array( EstimatedParameters( beta_sapwood1=0, beta_sapwood2=0, beta_leaf=0, beta_root=0, f_sapwood1_heartwood1=0.001, f_sapwood2_heartwood2=0.001, f_wood1_liiter1=0.001, # 2 f_wood2_liiter2=0.001, # 2 f_leaf_liiter3=0.001, # 2 f_root_liiter4=0.001, # 2 f_fruit_liiter3=0.001, # 2 f_litter1_surface_som=0.1, # 2
cpa = UnEstimatedParameters(C_leaf_0=cleaf[0], C_root_0=croot[0], C_wood_0=cwood[0], clitter_0=clitter[0], csoil_0=csoil[0], rh_0=rh[0], npp=npp, number_of_months=tot_len, clay=0.2028, silt=0.2808, lig_wood=0.4, f_wood2CWD=1, f_metlit2mic=0.45) param2res = make_param2res( cpa ) #pa=[beta1,beta2, lig_leaf, f41,f42, kleaf,kroot,kwood,kmet,kmic, kslow,kpass, cmet_init, cstr_init, cmic_init, cpassive_init ] pa = [ 0.15, 0.2, 0.15, 0.28, 0.6, 1 / 365, 1 / (365 * 5), 1 / (365 * 40), 0.5 / (365 * 0.1), 0.3 / (365 * 0.137), 0.3 / (365 * 5), 0.3 / (222.22 * 365), 0.05, 0.1, 1, 5 ] epa_0 = EstimatedParameters( beta_leaf=0.15, beta_root=0.2, lig_leaf=0.15, f_leaf2metlit=0.28, f_root2metlit=0.6, k_leaf=1 / 365, k_root=1 / (365 * 5), k_wood=1 / (365 * 40),
def test_mcmc(self): dataPath = self.dataPath npp, rh, clitter, csoil, cveg, cleaf, croot, cwood = get_example_site_vars( dataPath) nyears = 10 tot_len = 12 * nyears obs = np.stack([cleaf, croot, cwood, clitter, csoil, rh], axis=1)[0:tot_len, :] c_min = np.array([ 0.09, 0.09, 0.09, 0.01, 0.01, 1 / (2 * 365), 1 / (365 * 10), 1 / (60 * 365), 0.1 / (0.1 * 365), 0.06 / (0.137 * 365), 0.06 / (5 * 365), 0.06 / (222.22 * 365), clitter[0] / 100, clitter[0] / 100, csoil[0] / 100, csoil[0] / 2 ]) c_max = np.array([ 1, 1, 0.21, 1, 1, 1 / (0.3 * 365), 1 / (0.8 * 365), 1 / 365, 1 / (365 * 0.1), 0.6 / (365 * 0.137), 0.6 / (365 * 5), 0.6 / (222.22 * 365), clitter[0], clitter[0], csoil[0] / 3, csoil[0] ]) isQualified = make_param_filter_func(c_max, c_min) uniform_prop = make_uniform_proposer( c_min, c_max, #D=10.0, D=20.0, filter_func=isQualified) cpa = UnEstimatedParameters(C_leaf_0=cleaf[0], C_root_0=croot[0], C_wood_0=cwood[0], clitter_0=clitter[0], csoil_0=csoil[0], rh_0=rh[0], npp=npp, number_of_months=tot_len, clay=0.2028, silt=0.2808, lig_wood=0.4, f_wood2CWD=1, f_metlit2mic=0.45) param2res = make_param2res( cpa ) #pa=[beta1,beta2, lig_leaf, f41,f42, kleaf,kroot,kwood,kmet,kmic, kslow,kpass, cmet_init, cstr_init, cmic_init, cpassive_init ] # pa= [0.15, 0.2,0.15,0.28, 0.6, 1/365, 1/(365*5), 1/(365*40), 0.5/(365*0.1), 0.3/(365*0.137), 0.3/(365*5), 0.3/(222.22*365), 0.05, 0.1, 1, 5] epa_0 = EstimatedParameters( beta_leaf=0.15, beta_root=0.2, lig_leaf=0.15, f_leaf2metlit=0.28, f_root2metlit=0.6, k_leaf=1 / 365, k_root=1 / (365 * 5), k_wood=1 / (365 * 40), k_metlit=0.5 / (365 * 0.1), k_mic=0.3 / (365 * 0.137), k_slowsom=0.3 / (365 * 5), k_passsom=0.3 / (222.22 * 365), C_metlit_0=0.05, C_CWD_0=0.1, C_mic_0=1, C_passom_0=5, ) # save the parameters and costfunctionvalues for postprocessing demo_aa_path = Path('cable_demo_da_aa.csv') demo_aa_j_path = Path('cable_demo_da_j_aa.csv') if not demo_aa_path.exists(): print("did not find demo run results. Will perform demo run") nsimu_demo = 200 C_demo, J_demo = mcmc( initial_parameters=epa_0, proposer=uniform_prop, param2res=param2res, #costfunction=make_weighted_cost_func(obs) #costfunction=make_feng_cost_func(obs), costfunction=make_jon_cost_func(obs), nsimu=nsimu_demo) # save the parameters and costfunctionvalues for postprocessing pd.DataFrame(C_demo).to_csv(demo_aa_path, sep=',') pd.DataFrame(J_demo).to_csv(demo_aa_j_path, sep=',') else: print("""Found {p} from a previous demo run. If you also want to recreate the demo output move the file! """.format(p=demo_aa_path)) C_demo = pd.read_csv(demo_aa_path).to_numpy() J_demo = pd.read_csv(demo_aa_j_path).to_numpy() # build a new proposer based on a multivariate_normal distribution using the estimated covariance of the previous run if available # parameter values of the previous run # first we check how many accepted parameters we got n_accept = C_demo.shape[1] # and then use part of them to compute a covariance matrix for the # formal run covv = np.cov(C_demo[:, int(n_accept / 10):]) normal_prop = make_multivariate_normal_proposer( covv=covv, filter_func=isQualified) C_formal, J_formal = mcmc( initial_parameters=epa_0, proposer=normal_prop, param2res=param2res, #costfunction=make_weighted_cost_func(obs) #costfunction=make_feng_cost_func(obs), costfunction=make_jon_cost_func(obs), nsimu=200) # save the parameters and costfunctionvalues for postprocessing formal_aa_path = Path('cable_formal_da_aa.csv') formal_aa_j_path = Path('cable_formal_da_j_aa.csv') pd.DataFrame(C_formal).to_csv(formal_aa_path, sep=',') pd.DataFrame(J_formal).to_csv(formal_aa_j_path, sep=',') sol_mean = param2res(np.mean(C_formal, axis=1)) fig = plt.figure() plot_solutions(fig, times=range(sol_mean.shape[0]), var_names=Observables._fields, tup=(sol_mean, obs), names=('mean', 'obs')) fig.savefig('solutions.pdf')
def test_forward_simulation(self): # compare stored monthly timesteps (although the computation happens in daily steps) t0 = time() npp, rh, ra, csoil, cveg = get_example_site_vars( Path(conf_dict['dataPath'])) print("data_loaded after", time() - t0) print(list(map(lambda a: a.shape, (npp, rh, ra, csoil, cveg)))) nyears = 320 #nyears = 2 obs_tup = Observables(c_veg=cveg, c_soil=csoil, a_respiration=monthly_to_yearly(ra), h_respiration=monthly_to_yearly(rh)) obs = np.stack(obs_tup, axis=1)[0:nyears, :] epa0 = EstimatedParameters( beta_leaf=0.15, # 0 beta_root=0.2, # 1 k_leaf=1 / 365, # 2 k_root=1 / (365 * 5), # 3 k_wood=1 / (365 * 40), # 4 k_cwd=1 / (365 * 5), # 5 k_samet=0.5 / (365 * 0.1), # 6 k_sastr=0.5 / (365 * 0.1), # 7 k_samic=0.3 / (365 * 0.137), # 8 k_slmet=0.3 / (365), # 9 k_slstr=0.3 / (365), # 10 k_slmic=0.3 / (365), # 11 k_slow=0.3 / (365 * 5), # 12 k_arm=0.3 / (222 * 365), # 13 f_samet_leaf=0.3, # 14 f_slmet_root=0.3, # 15 f_samic_cwd=0.3, # 16 C_leaf_0=cveg[0] / 5, # 17 C_root_0=cveg[0] / 5, # 18 C_cwd_0=cveg[0] / 50, # 19 C_samet_0=cveg[0] / 300, # 20 C_sastr_0=cveg[0] / 300, # 21 C_samic_0=cveg[0] / 500, # 22 C_slmet_0=csoil[0] / 10, # 23 C_slstr_0=csoil[0] / 10, # 24 C_slmic_0=csoil[0] / 10, # 25 C_slow_0=csoil[0] / 10 # 26 ) cpa = UnEstimatedParameters(C_soil_0=csoil[0], C_veg_0=cveg[0], rh_0=rh[0], ra_0=ra[0], npp=npp, clay=0.2028, silt=0.2808, nyears=320) param2res = make_param2res(cpa) t1 = time() res = param2res(epa0) print(time() - t1) #from IPython import embed; embed() # the times have to be computed in days fig = plt.figure() plot_solutions(fig, times=np.array(range(nyears)), var_names=Observables._fields, tup=(res, obs), names=["solution with initial params", "observations"]) fig.savefig('solutions.pdf')