def ABCpmc_HOD(T, eps_val, N_part=1000, prior_name='first_try', observables=['nbar', 'xi'], data_dict={'Mr': 21}, output_dir=None): ''' ABC-PMC implementation. Parameters ---------- - T : Number of iterations - eps_val : - N_part : Number of particles - observables : list of observables. Options are 'nbar', 'gmf', 'xi' - data_dict : dictionary that specifies the observation keywords ''' if output_dir is None: output_dir = util.dat_dir() else: pass #Initializing the vector of observables and inverse covariance matrix if observables == ['xi']: fake_obs = Data.data_xi(**data_dict) fake_obs_cov = Data.data_cov(**data_dict)[1:16, 1:16] xi_Cii = np.diag(fake_obs_cov) elif observables == ['nbar', 'xi']: fake_obs = np.hstack( [Data.data_nbar(**data_dict), Data.data_xi(**data_dict)]) fake_obs_cov = Data.data_cov(**data_dict)[:16, :16] Cii = np.diag(fake_obs_cov) xi_Cii = Cii[1:] nbar_Cii = Cii[0] elif observables == ['nbar', 'gmf']: fake_obs = np.hstack( [Data.data_nbar(**data_dict), Data.data_gmf(**data_dict)]) fake_obs_cov = Data.data_cov('nbar_gmf', **data_dict) Cii = np.diag(fake_obs_cov) gmf_Cii = Cii[1:] nbar_Cii = Cii[0] # True HOD parameters data_hod_dict = Data.data_hod_param(Mr=data_dict['Mr']) data_hod = np.array([ data_hod_dict['logM0'], # log M0 np.log(data_hod_dict['sigma_logM']), # log(sigma) data_hod_dict['logMmin'], # log Mmin data_hod_dict['alpha'], # alpha data_hod_dict['logM1'] # log M1 ]) # Priors prior_min, prior_max = PriorRange(prior_name) prior = abcpmc.TophatPrior(prior_min, prior_max) prior_range = np.zeros((len(prior_min), 2)) prior_range[:, 0] = prior_min prior_range[:, 1] = prior_max # simulator our_model = HODsim(Mr=data_dict['Mr']) # initialize model kwargs = {'prior_range': prior_range, 'observables': observables} def simz(tt): sim = our_model.sum_stat(tt, **kwargs) if sim is None: pickle.dump(tt, open("simz_crash_theta.p", 'wb')) pickle.dump(kwargs, open('simz_crash_kwargs.p', 'wb')) raise ValueError('Simulator is giving NonetType') return sim def multivariate_rho(datum, model): #print datum , model dists = [] if observables == ['nbar', 'xi']: dist_nbar = (datum[0] - model[0])**2. / nbar_Cii dist_xi = np.sum((datum[1:] - model[1:])**2. / xi_Cii) dists = [dist_nbar, dist_xi] elif observables == ['nbar', 'gmf']: dist_nbar = (datum[0] - model[0])**2. / nbar_Cii dist_gmf = np.sum((datum[1:] - model[1:])**2. / gmf_Cii) dists = [dist_nbar, dist_gmf] elif observables == ['xi']: dist_xi = np.sum((datum - model)**2. / xi_Cii) dists = [dist_xi] #print np.array(dists) return np.array(dists) mpi_pool = mpi_util.MpiPool() abcpmc_sampler = abcpmc.Sampler( N=N_part, #N_particles Y=fake_obs, #data postfn=simz, #simulator dist=multivariate_rho, #distance function pool=mpi_pool) abcpmc_sampler.particle_proposal_cls = abcpmc.ParticleProposal eps = abcpmc.MultiConstEps(T, eps_val) pools = [] f = open("abc_tolerance.dat", "w") f.close() eps_str = '' for pool in abcpmc_sampler.sample(prior, eps): #while pool.ratio > 0.01: new_eps_str = '\t'.join(eps(pool.t).astype('str')) + '\n' if eps_str != new_eps_str: # if eps is different, open fiel and append f = open("abc_tolerance.dat", "a") eps_str = new_eps_str f.write(eps_str) f.close() print("T:{0},ratio: {1:>.4f}".format(pool.t, pool.ratio)) print eps(pool.t) # plot theta plot_thetas(pool.thetas, pool.ws, pool.t, Mr=data_dict["Mr"], truths=data_hod, plot_range=prior_range, observables=observables, output_dir=output_dir) if (pool.t < 4) and (pool.t > 2): pool.thetas = np.loadtxt( "/home/mj/abc/halo/dat/gold/nbar_xi_Mr21_theta_t3.mercer.dat") pool.ws = np.loadtxt( "/home/mj/abc/halo/dat/gold/nbar_xi_Mr21_w_t3.mercer.dat") eps.eps = [1.12132735353, 127.215586776] # write theta and w to file theta_file = ''.join([ output_dir, util.observable_id_flag(observables), '_Mr', str(data_dict["Mr"]), '_theta_t', str(pool.t), '.mercer.dat' ]) w_file = ''.join([ output_dir, util.observable_id_flag(observables), '_Mr', str(data_dict["Mr"]), '_w_t', str(pool.t), '.mercer.dat' ]) np.savetxt(theta_file, pool.thetas) np.savetxt(w_file, pool.ws) if pool.t < 3: eps.eps = np.percentile(np.atleast_2d(pool.dists), 50, axis=0) elif (pool.t > 2) and (pool.t < 20): eps.eps = np.percentile(np.atleast_2d(pool.dists), 75, axis=0) abcpmc_sampler.particle_proposal_cls = abcpmc.ParticleProposal else: eps.eps = np.percentile(np.atleast_2d(pool.dists), 90, axis=0) abcpmc_sampler.particle_proposal_cls = abcpmc.ParticleProposal #if eps.eps < eps_min: # eps.eps = eps_min pools.append(pool) #abcpmc_sampler.close() return pools
def ABCpmc_HOD(T, eps_val, N_part=1000, prior_name='first_try', observables=['nbar', 'xi'], abcrun=None, data_dict={'Mr':21, 'b_normal':0.25}): ''' ABC-PMC implementation. Parameters ---------- - T : Number of iterations - eps_val : - N_part : Number of particles - observables : list of observables. Options are 'nbar', 'gmf', 'xi' - data_dict : dictionary that specifies the observation keywords ''' if abcrun is None: raise ValueError("Specify the name of the abcrun!") #Initializing the vector of observables and inverse covariance matrix fake_obs, Cii_list = getObvs(observables, **data_dict) # True HOD parameters data_hod_dict = Data.data_hod_param(Mr=data_dict['Mr']) data_hod = np.array([ data_hod_dict['logM0'], # log M0 np.log(data_hod_dict['sigma_logM']), # log(sigma) data_hod_dict['logMmin'], # log Mmin data_hod_dict['alpha'], # alpha data_hod_dict['logM1'] # log M1 ]) # Priors prior_min, prior_max = PriorRange(prior_name) prior = abcpmc.TophatPrior(prior_min, prior_max) prior_range = np.zeros((len(prior_min),2)) prior_range[:,0] = prior_min prior_range[:,1] = prior_max # Simulator our_model = ABC_HODsim(Mr=data_dict['Mr'], b_normal=data_dict['b_normal']) # initialize model kwargs = {'prior_range': prior_range, 'observables': observables} def simz(tt): sim = our_model(tt, **kwargs) if sim is None: pickle.dump(tt, open(util.crash_dir()+"simz_crash_theta.p", 'wb')) pickle.dump(kwargs, open(util.crash_dir()+'simz_crash_kwargs.p', 'wb')) raise ValueError('Simulator is giving NonetType') return sim def multivariate_rho(model, datum): dists = [] if observables == ['nbar','xi']: nbar_Cii = Cii_list[0] xi_Cii = Cii_list[1] dist_nbar = (datum[0] - model[0])**2. / nbar_Cii dist_xi = np.sum((datum[1:] - model[1:])**2. / xi_Cii) dists = [dist_nbar , dist_xi] elif observables == ['nbar','gmf']: nbar_Cii = Cii_list[0] gmf_Cii = Cii_list[1] dist_nbar = (datum[0] - model[0])**2. / nbar_Cii # omitting the first GMF bin in the model ([1:]) dist_gmf = np.sum((datum[1:] - model[1][1:])**2. / gmf_Cii) dists = [dist_nbar , dist_gmf] elif observables == ['xi']: xi_Cii = Cii_list[0] dist_xi = np.sum((datum- model)**2. / xi_Cii) dists = [dist_xi] return np.array(dists) tolerance_file = lambda name: ''.join([util.abc_dir(), "abc_tolerance", '.', name, '.dat']) theta_file = lambda tt, name: ''.join([util.abc_dir(), util.observable_id_flag(observables), '_theta_t', str(tt), '.', name, '.dat']) w_file = lambda tt, name: ''.join([util.abc_dir(), util.observable_id_flag(observables), '_w_t', str(tt), '.', name, '.dat']) dist_file = lambda tt, name: ''.join([util.abc_dir(), util.observable_id_flag(observables), '_dist_t', str(tt), '.', name, '.dat']) def launch(eps_start, init_pool=None): print eps_start eps = abcpmc.ConstEps(T, eps_start) mpi_pool = mpi_util.MpiPool() pools = [] abcpmc_sampler = abcpmc.Sampler( N=N_part, #N_particles Y=fake_obs, #data postfn=simz, #simulator dist=multivariate_rho, #distance function pool=mpi_pool) abcpmc_sampler.particle_proposal_cls = abcpmc.ParticleProposal f = open(tolerance_file(abcrun), "w") f.close() eps_str = '' for pool in abcpmc_sampler.sample(prior, eps): #while pool.ratio > 0.01: new_eps_str = '\t'.join(np.array(pool.eps).astype('str'))+'\n' if eps_str != new_eps_str: # if eps is different, open fiel and append f = open(tolerance_file(abcrun) , "a") eps_str = new_eps_str f.write(eps_str) f.close() print("T:{0},ratio: {1:>.4f}".format(pool.t, pool.ratio)) print pool.eps # write theta, w, and rhos to file np.savetxt(theta_file(pool.t, abcrun), pool.thetas) np.savetxt(w_file(pool.t, abcrun), pool.ws) np.savetxt(dist_file(pool.t, abcrun) , pool.dists) # plot theta plot_thetas(pool.thetas, pool.ws , pool.t, truths=data_hod, plot_range=prior_range, theta_filename=theta_file(pool.t, abcrun), output_dir=util.abc_dir()) eps.eps = np.median(np.atleast_2d(pool.dists), axis = 0) pools.append(pool) abcpmc_sampler.close() return pools print "Initial launch of the sampler" pools = launch(eps_val)
def plot_mcmc(Nwalkers, Niter=1000, Nchains_burn=200, Mr=21, truths=None, observables=['nbar', 'xi'], plot_range=None): ''' Plot MCMC chains ''' if truths is None: data_hod_dict = Data.data_hod_param(Mr=Mr) truths = np.array([ data_hod_dict['logM0'], # log M0 np.log(data_hod_dict['sigma_logM']), # log(sigma) data_hod_dict['logMmin'], # log Mmin data_hod_dict['alpha'], # alpha data_hod_dict['logM1'] # log M1 ]) if plot_range is None: prior_min, prior_max = PriorRange(None) plot_range = np.zeros((len(prior_min),2)) plot_range[:,0] = prior_min plot_range[:,1] = prior_max # chain files chain_file = ''.join([util.dat_dir(), util.observable_id_flag(observables), '_Mr', str(Mr), '.mcmc_chain.dat']) #f = h5py.File(chain_file, 'r') #sample = f['positions'][:] sample = np.loadtxt(chain_file) # Posterior Likelihood Corner Plot fig = corner.corner( sample[Nchains_burn*Nwalkers:], truths=truths, truth_color='#ee6a50', labels=[ r'$\mathtt{\log\;M_{0}}$', r'$\mathtt{\log\;\sigma_{\logM}}$', r'$\mathtt{\log\;M_{min}}$', r'$\mathtt{\alpha}$', r'$\mathtt{\log\;M_{1}}$' ], label_kwargs={'fontsize': 25}, range=plot_range, quantiles=[0.16,0.5,0.84], show_titles=True, title_args={"fontsize": 12}, plot_datapoints=True, fill_contours=True, levels=[0.68, 0.95], color='b', bins=16, smooth=1.0) fig_file = ''.join([util.fig_dir(), util.observable_id_flag(observables), '_Mr', str(Mr), '.Niter', str(Niter), '.Nburn', str(Nchains_burn), '.mcmc_samples.test.png']) #print fig_file plt.savefig(fig_file) plt.close() # MCMC Chain plot Ndim = len(sample[0]) Nchain = len(sample)/Nwalkers chain_ensemble = sample.reshape(Nchain, Nwalkers, Ndim) fig , axes = plt.subplots(5, 1 , sharex=True, figsize=(10, 12)) labels=[ r'$\mathtt{\log\;M_{0}}$', r'$\mathtt{\log\;\sigma_{\logM}}$', r'$\mathtt{\log\;M_{min}}$', r'$\mathtt{\alpha}$', r'$\mathtt{\log\;M_{1}}$' ] for i in xrange(5): axes[i].plot(chain_ensemble[:, :, i], color="k", alpha=0.4) axes[i].yaxis.set_major_locator(MaxNLocator(5)) axes[i].axhline(truths[i], color="#888888", lw=2) axes[i].vlines(Nchains_burn, plot_range[i,0], plot_range[i,1], colors='#ee6a50', linewidth=4, alpha=1) axes[i].set_ylim([plot_range[i,0], plot_range[i,1]]) axes[i].set_xlim(0, 6000) axes[i].set_ylabel(labels[i], fontsize=25) axes[4].set_xlabel("Step Number", fontsize=25) fig.tight_layout(h_pad=0.0) fig_file = ''.join([util.fig_dir(), util.observable_id_flag(observables), '_Mr', str(Mr), '.Niter', str(Niter), '.Nburn', str(Nchains_burn), '.mcmc_time.test.png']) plt.savefig(fig_file) plt.close()
def ABCpmc_HOD(T, eps_val, N_part=1000, prior_name='first_try', observables=['nbar', 'xi'], data_dict={'Mr':21}, output_dir=None): ''' ABC-PMC implementation. Parameters ---------- - T : Number of iterations - eps_val : - N_part : Number of particles - observables : list of observables. Options are 'nbar', 'gmf', 'xi' - data_dict : dictionary that specifies the observation keywords ''' if output_dir is None: output_dir = util.dat_dir() else: pass #Initializing the vector of observables and inverse covariance matrix if observables == ['xi']: fake_obs = Data.data_xi(**data_dict) fake_obs_cov = Data.data_cov(**data_dict)[1:16 , 1:16] xi_Cii = np.diag(fake_obs_cov) elif observables == ['nbar','xi']: fake_obs = np.hstack([Data.data_nbar(**data_dict), Data.data_xi(**data_dict)]) fake_obs_cov = Data.data_cov(**data_dict)[:16 , :16] Cii = np.diag(fake_obs_cov) xi_Cii = Cii[1:] nbar_Cii = Cii[0] elif observables == ['nbar','gmf']: fake_obs = np.hstack([Data.data_nbar(**data_dict), Data.data_gmf(**data_dict)]) fake_obs_cov = Data.data_cov('nbar_gmf', **data_dict) Cii = np.diag(fake_obs_cov) gmf_Cii = Cii[1:] nbar_Cii = Cii[0] # True HOD parameters data_hod_dict = Data.data_hod_param(Mr=data_dict['Mr']) data_hod = np.array([ data_hod_dict['logM0'], # log M0 np.log(data_hod_dict['sigma_logM']), # log(sigma) data_hod_dict['logMmin'], # log Mmin data_hod_dict['alpha'], # alpha data_hod_dict['logM1'] # log M1 ]) # Priors prior_min, prior_max = PriorRange(prior_name) prior = abcpmc.TophatPrior(prior_min, prior_max) prior_range = np.zeros((len(prior_min),2)) prior_range[:,0] = prior_min prior_range[:,1] = prior_max # simulator our_model = HODsim(Mr=data_dict['Mr']) # initialize model kwargs = {'prior_range': prior_range, 'observables': observables} def simz(tt): sim = our_model.sum_stat(tt, **kwargs) if sim is None: pickle.dump(tt, open("simz_crash_theta.p", 'wb')) pickle.dump(kwargs, open('simz_crash_kwargs.p', 'wb')) raise ValueError('Simulator is giving NonetType') return sim def multivariate_rho(datum, model): dists = [] if observables == ['nbar','xi']: dist_nbar = (datum[0] - model[0])**2. / nbar_Cii dist_xi = np.sum((datum[1:] - model[1:])**2. / xi_Cii) dists = [dist_nbar , dist_xi] elif observables == ['nbar','gmf']: dist_nbar = (datum[0] - model[0])**2. / nbar_Cii dist_gmf = np.sum((datum[1:] - model[1:])**2. / gmf_Cii) dists = [dist_nbar , dist_gmf] elif observables == ['xi']: dist_xi = np.sum((datum- model)**2. / xi_Cii) dists = [dist_xi] print np.array(dists) return np.array(dists) mpi_pool = mpi_util.MpiPool() abcpmc_sampler = abcpmc.Sampler( N=N_part, #N_particles Y=fake_obs, #data postfn=simz, #simulator dist=multivariate_rho, #distance function pool=mpi_pool) abcpmc_sampler.particle_proposal_cls = abcpmc.ParticleProposal eps = abcpmc.MultiConstEps(T, eps_val) pools = [] f = open("abc_tolerance.dat" , "w") f.close() eps_str = '' for pool in abcpmc_sampler.sample(prior, eps): #while pool.ratio > 0.01: new_eps_str = '\t'.join(eps(pool.t).astype('str'))+'\n' if eps_str != new_eps_str: # if eps is different, open fiel and append f = open("abc_tolerance.dat" , "a") eps_str = new_eps_str f.write(eps_str) f.close() print("T:{0},ratio: {1:>.4f}".format(pool.t, pool.ratio)) print eps(pool.t) # plot theta plot_thetas(pool.thetas, pool.ws , pool.t, Mr=data_dict["Mr"], truths=data_hod, plot_range=prior_range, observables=observables, output_dir=output_dir) # write theta and w to file theta_file = ''.join([output_dir, util.observable_id_flag(observables), '_Mr', str(data_dict["Mr"]), '_theta_t', str(pool.t), '.mercer.dat']) w_file = ''.join([output_dir, util.observable_id_flag(observables), '_Mr', str(data_dict["Mr"]), '_w_t', str(pool.t), '.mercer.dat']) np.savetxt(theta_file, pool.thetas) np.savetxt(w_file, pool.ws) if pool.t < 3: eps.eps = np.percentile(np.atleast_2d(pool.dists), 50 , axis = 0) elif (pool.t > 2) and (pool.t < 20): eps.eps = np.percentile(np.atleast_2d(pool.dists), 75 , axis = 0) abcpmc_sampler.particle_proposal_cls = abcpmc.ParticleProposal else: eps.eps = np.percentile(np.atleast_2d(pool.dists), 90 , axis = 0) abcpmc_sampler.particle_proposal_cls = abcpmc.ParticleProposal #if eps.eps < eps_min: # eps.eps = eps_min pools.append(pool) #abcpmc_sampler.close() return pools
def mcmc_mpi(Nwalkers, Nchains, observables=['nbar', 'xi'], data_dict={ 'Mr': 21, 'b_normal': 0.25 }, prior_name='first_try', mcmcrun=None): ''' Standard MCMC implementaion Parameters ----------- - Nwalker : Number of walkers - Nchains : Number of MCMC chains - observables : list of observables. Options are: ['nbar','xi'],['nbar','gmf'],['xi'] - data_dict : dictionary that specifies the observation keywords ''' #Initializing the vector of observables and inverse covariance matrix if observables == ['xi']: fake_obs = Data.data_xi(**data_dict) #fake_obs_icov = Data.data_inv_cov('xi', **data_dict) fake_obs_icov = Data.data_cov(inference='mcmc', **data_dict)[1:16, 1:16] if observables == ['nbar', 'xi']: fake_obs = np.hstack( [Data.data_nbar(**data_dict), Data.data_xi(**data_dict)]) fake_obs_icov = Data.data_cov(inference='mcmc', **data_dict)[:16, :16] if observables == ['nbar', 'gmf']: ##### FIRST BIN OF GMF DROPPED ############### # CAUTION: hardcoded fake_obs = np.hstack( [Data.data_nbar(**data_dict), Data.data_gmf(**data_dict)[1:]]) fake_obs_icov = np.zeros((10, 10)) #print Data.data_cov(**data_dict)[17: , 17:].shape # Covariance matrix being adjusted accordingly fake_obs_icov[1:, 1:] = Data.data_cov(inference='mcmc', **data_dict)[17:, 17:] fake_obs_icov[0, 1:] = Data.data_cov(inference='mcmc', **data_dict)[0, 17:] fake_obs_icov[1:, 0] = Data.data_cov(inference='mcmc', **data_dict)[17:, 0] fake_obs_icov[0, 0] = Data.data_cov(inference='mcmc', **data_dict)[0, 0] # True HOD parameters data_hod_dict = Data.data_hod_param(Mr=data_dict['Mr']) data_hod = np.array([ data_hod_dict['logM0'], # log M0 np.log(data_hod_dict['sigma_logM']), # log(sigma) data_hod_dict['logMmin'], # log Mmin data_hod_dict['alpha'], # alpha data_hod_dict['logM1'] # log M1 ]) Ndim = len(data_hod) # Priors prior_min, prior_max = PriorRange(prior_name) prior_range = np.zeros((len(prior_min), 2)) prior_range[:, 0] = prior_min prior_range[:, 1] = prior_max # mcmc chain output file chain_file = ''.join([ util.mcmc_dir(), util.observable_id_flag(observables), '.', mcmcrun, '.mcmc_chain.dat' ]) #print chain_file if os.path.isfile(chain_file) and continue_chain: print 'Continuing previous MCMC chain!' sample = np.loadtxt(chain_file) Nchain = Niter - (len(sample) / Nwalkers ) # Number of chains left to finish if Nchain > 0: pass else: raise ValueError print Nchain, ' iterations left to finish' # Initializing Walkers from the end of the chain pos0 = sample[-Nwalkers:] else: # new chain f = open(chain_file, 'w') f.close() Nchain = Niter # Initializing Walkers random_guess = data_hod pos0 = np.repeat(random_guess, Nwalkers).reshape(Ndim, Nwalkers).T + \ 5.e-2 * np.random.randn(Ndim * Nwalkers).reshape(Nwalkers, Ndim) #print pos0.shape # Initializing MPIPool pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) # Initializing the emcee sampler hod_kwargs = { 'prior_range': prior_range, 'data': fake_obs, 'data_icov': fake_obs_icov, 'observables': observables, 'Mr': data_dict['Mr'] } sampler = emcee.EnsembleSampler(Nwalkers, Ndim, lnPost, pool=pool, kwargs=hod_kwargs) # Initializing Walkers for result in sampler.sample(pos0, iterations=Nchain, storechain=False): position = result[0] #print position f = open(chain_file, 'a') for k in range(position.shape[0]): output_str = '\t'.join(position[k].astype('str')) + '\n' f.write(output_str) f.close() pool.close()
def mcmc_ipython_par(Nwalkers, Nchains_burn, Nchains_pro, observables=['nbar', 'xi'], data_dict={'Mr':20, 'Nmock':500}, prior_name = 'first_try', threads=1): ''' Standard MCMC implementaion Parameters ----------- - Nwalker : Number of walkers - Nchains_burn : Number of burn-in chains - Nchains_pro : Number of production chains - observables : list of observables. Options are 'nbar', 'gmf', 'xi' - data_dict : dictionary that specifies the observation keywords ''' # data observables fake_obs = [] # list of observables fake_obs_cov = [] for obv in observables: if obv == 'nbar': data_nbar, data_nbar_var = Data.data_nbar(**data_dict) fake_obs.append(data_nbar) fake_obs_cov.append(data_nbar_var) if obv == 'gmf': data_gmf, data_gmf_sigma = Data.data_gmf(**data_dict) fake_obs.append(data_gmf) fake_obs_cov.append(data_gmf) if obv == 'xi': # import xir and full covariance matrix of xir data_xi, data_xi_cov = Data.data_xi_full_cov(**data_dict) data_xi_invcov = Data.data_xi_inv_cov(**data_dict) fake_obs.append(data_xi) fake_obs_cov.append(data_xi_invcov) # True HOD parameters data_hod_dict = Data.data_hod_param(Mr=data_dict['Mr']) data_hod = np.array([ data_hod_dict['logM0'], # log M0 np.log(data_hod_dict['sigma_logM']), # log(sigma) data_hod_dict['logMmin'], # log Mmin data_hod_dict['alpha'], # alpha data_hod_dict['logM1'] # log M1 ]) Ndim = len(data_hod) # Priors prior_min, prior_max = PriorRange(prior_name) prior_range = np.zeros((len(prior_min),2)) prior_range[:,0] = prior_min prior_range[:,1] = prior_max # Initializing Walkers random_guess = np.array([11. , np.log(.4) , 11.5 , 1.0 , 13.5]) pos0 = np.repeat(random_guess, Nwalkers).reshape(Ndim, Nwalkers).T + \ 1e-1 * np.random.randn(Ndim * Nwalkers).reshape(Nwalkers, Ndim) # Initializing the emcee sampler hod_kwargs = { 'prior_range': prior_range, 'data': fake_obs, 'data_cov': fake_obs_cov, 'observables': observables, 'Mr': data_dict['Mr'] } # Set up the interface to the ipcluster. c = Client() view = c[:] view.push({"lnPost": lnPost}) # Modules necessary in posterior calculation should be called here view.execute("import numpy as np") view.execute("from hod_sim import HODsimulator") # Setting up the Sampler sampler = emcee.EnsembleSampler(Nwalkers, Ndim, lnPost, kwargs=hod_kwargs, pool = view) # Setting up a file for saving the chains chain_file = ''.join([util.dat_dir(), util.observable_id_flag(observables), '_Mr', str(data_dict["Mr"]), '_theta.mcmc_chain.dat']) f = open(chain_file, "w") f.close() # Running the Sampler and writing out the chains for result in sampler.sample(pos0, iterations=Nchains_burn + Nchains_pro, storechain=False): position = result[0] f = open(chain_file, "a") for k in range(position.shape[0]): output_str = '\t'.join(position[k].astype('str')) + '\n' f.write(output_str) f.close()
def mcmc_mpi( Nwalkers, Nchains, observables=["nbar", "xi"], data_dict={"Mr": 21, "b_normal": 0.25}, prior_name="first_try", mcmcrun=None, ): """ Standard MCMC implementaion Parameters ----------- - Nwalker : Number of walkers - Nchains : Number of MCMC chains - observables : list of observables. Options are: ['nbar','xi'],['nbar','gmf'],['xi'] - data_dict : dictionary that specifies the observation keywords """ # Initializing the vector of observables and inverse covariance matrix if observables == ["xi"]: fake_obs = Data.data_xi(**data_dict) # fake_obs_icov = Data.data_inv_cov('xi', **data_dict) fake_obs_icov = Data.data_cov(inference="mcmc", **data_dict)[1:16, 1:16] if observables == ["nbar", "xi"]: fake_obs = np.hstack([Data.data_nbar(**data_dict), Data.data_xi(**data_dict)]) fake_obs_icov = Data.data_cov(inference="mcmc", **data_dict)[:16, :16] if observables == ["nbar", "gmf"]: ##### FIRST BIN OF GMF DROPPED ############### # CAUTION: hardcoded fake_obs = np.hstack([Data.data_nbar(**data_dict), Data.data_gmf(**data_dict)[1:]]) fake_obs_icov = np.zeros((10, 10)) # print Data.data_cov(**data_dict)[17: , 17:].shape # Covariance matrix being adjusted accordingly fake_obs_icov[1:, 1:] = Data.data_cov(inference="mcmc", **data_dict)[17:, 17:] fake_obs_icov[0, 1:] = Data.data_cov(inference="mcmc", **data_dict)[0, 17:] fake_obs_icov[1:, 0] = Data.data_cov(inference="mcmc", **data_dict)[17:, 0] fake_obs_icov[0, 0] = Data.data_cov(inference="mcmc", **data_dict)[0, 0] # True HOD parameters data_hod_dict = Data.data_hod_param(Mr=data_dict["Mr"]) data_hod = np.array( [ data_hod_dict["logM0"], # log M0 np.log(data_hod_dict["sigma_logM"]), # log(sigma) data_hod_dict["logMmin"], # log Mmin data_hod_dict["alpha"], # alpha data_hod_dict["logM1"], # log M1 ] ) Ndim = len(data_hod) # Priors prior_min, prior_max = PriorRange(prior_name) prior_range = np.zeros((len(prior_min), 2)) prior_range[:, 0] = prior_min prior_range[:, 1] = prior_max # mcmc chain output file chain_file = "".join([util.mcmc_dir(), util.observable_id_flag(observables), ".", mcmcrun, ".mcmc_chain.dat"]) # print chain_file if os.path.isfile(chain_file) and continue_chain: print "Continuing previous MCMC chain!" sample = np.loadtxt(chain_file) Nchain = Niter - (len(sample) / Nwalkers) # Number of chains left to finish if Nchain > 0: pass else: raise ValueError print Nchain, " iterations left to finish" # Initializing Walkers from the end of the chain pos0 = sample[-Nwalkers:] else: # new chain f = open(chain_file, "w") f.close() Nchain = Niter # Initializing Walkers random_guess = data_hod pos0 = np.repeat(random_guess, Nwalkers).reshape(Ndim, Nwalkers).T + 5.0e-2 * np.random.randn( Ndim * Nwalkers ).reshape(Nwalkers, Ndim) # print pos0.shape # Initializing MPIPool pool = MPIPool() if not pool.is_master(): pool.wait() sys.exit(0) # Initializing the emcee sampler hod_kwargs = { "prior_range": prior_range, "data": fake_obs, "data_icov": fake_obs_icov, "observables": observables, "Mr": data_dict["Mr"], } sampler = emcee.EnsembleSampler(Nwalkers, Ndim, lnPost, pool=pool, kwargs=hod_kwargs) # Initializing Walkers for result in sampler.sample(pos0, iterations=Nchain, storechain=False): position = result[0] # print position f = open(chain_file, "a") for k in range(position.shape[0]): output_str = "\t".join(position[k].astype("str")) + "\n" f.write(output_str) f.close() pool.close()
def plot_mcmc(Nwalkers, Niter=1000, Nchains_burn=200, Mr=21, truths=None, observables=['nbar', 'xi'], plot_range=None): ''' Plot MCMC chains ''' if truths is None: data_hod_dict = Data.data_hod_param(Mr=Mr) truths = np.array([ data_hod_dict['logM0'], # log M0 np.log(data_hod_dict['sigma_logM']), # log(sigma) data_hod_dict['logMmin'], # log Mmin data_hod_dict['alpha'], # alpha data_hod_dict['logM1'] # log M1 ]) if plot_range is None: prior_min, prior_max = PriorRange(None) plot_range = np.zeros((len(prior_min), 2)) plot_range[:, 0] = prior_min plot_range[:, 1] = prior_max # chain files chain_file = ''.join([ util.dat_dir(), util.observable_id_flag(observables), '_Mr', str(Mr), '.mcmc_chain.dat' ]) #f = h5py.File(chain_file, 'r') #sample = f['positions'][:] sample = np.loadtxt(chain_file) # Posterior Likelihood Corner Plot fig = corner.corner(sample[Nchains_burn * Nwalkers:], truths=truths, truth_color='#ee6a50', labels=[ r'$\mathtt{\log\;M_{0}}$', r'$\mathtt{\log\;\sigma_{\logM}}$', r'$\mathtt{\log\;M_{min}}$', r'$\mathtt{\alpha}$', r'$\mathtt{\log\;M_{1}}$' ], label_kwargs={'fontsize': 25}, range=plot_range, quantiles=[0.16, 0.5, 0.84], show_titles=True, title_args={"fontsize": 12}, plot_datapoints=True, fill_contours=True, levels=[0.68, 0.95], color='b', bins=16, smooth=1.0) fig_file = ''.join([ util.fig_dir(), util.observable_id_flag(observables), '_Mr', str(Mr), '.Niter', str(Niter), '.Nburn', str(Nchains_burn), '.mcmc_samples.test.png' ]) #print fig_file plt.savefig(fig_file) plt.close() # MCMC Chain plot Ndim = len(sample[0]) Nchain = len(sample) / Nwalkers chain_ensemble = sample.reshape(Nchain, Nwalkers, Ndim) fig, axes = plt.subplots(5, 1, sharex=True, figsize=(10, 12)) labels = [ r'$\mathtt{\log\;M_{0}}$', r'$\mathtt{\log\;\sigma_{\logM}}$', r'$\mathtt{\log\;M_{min}}$', r'$\mathtt{\alpha}$', r'$\mathtt{\log\;M_{1}}$' ] for i in xrange(5): axes[i].plot(chain_ensemble[:, :, i], color="k", alpha=0.4) axes[i].yaxis.set_major_locator(MaxNLocator(5)) axes[i].axhline(truths[i], color="#888888", lw=2) axes[i].vlines(Nchains_burn, plot_range[i, 0], plot_range[i, 1], colors='#ee6a50', linewidth=4, alpha=1) axes[i].set_ylim([plot_range[i, 0], plot_range[i, 1]]) axes[i].set_xlim(0, 6000) axes[i].set_ylabel(labels[i], fontsize=25) axes[4].set_xlabel("Step Number", fontsize=25) fig.tight_layout(h_pad=0.0) fig_file = ''.join([ util.fig_dir(), util.observable_id_flag(observables), '_Mr', str(Mr), '.Niter', str(Niter), '.Nburn', str(Nchains_burn), '.mcmc_time.test.png' ]) plt.savefig(fig_file) plt.close()
def mcmc_ipython_par(Nwalkers, Nchains_burn, Nchains_pro, observables=['nbar', 'xi'], data_dict={ 'Mr': 20, 'Nmock': 500 }, prior_name='first_try', threads=1): ''' Standard MCMC implementaion Parameters ----------- - Nwalker : Number of walkers - Nchains_burn : Number of burn-in chains - Nchains_pro : Number of production chains - observables : list of observables. Options are 'nbar', 'gmf', 'xi' - data_dict : dictionary that specifies the observation keywords ''' # data observables fake_obs = [] # list of observables fake_obs_cov = [] for obv in observables: if obv == 'nbar': data_nbar, data_nbar_var = Data.data_nbar(**data_dict) fake_obs.append(data_nbar) fake_obs_cov.append(data_nbar_var) if obv == 'gmf': data_gmf, data_gmf_sigma = Data.data_gmf(**data_dict) fake_obs.append(data_gmf) fake_obs_cov.append(data_gmf) if obv == 'xi': # import xir and full covariance matrix of xir data_xi, data_xi_cov = Data.data_xi_full_cov(**data_dict) data_xi_invcov = Data.data_xi_inv_cov(**data_dict) fake_obs.append(data_xi) fake_obs_cov.append(data_xi_invcov) # True HOD parameters data_hod_dict = Data.data_hod_param(Mr=data_dict['Mr']) data_hod = np.array([ data_hod_dict['logM0'], # log M0 np.log(data_hod_dict['sigma_logM']), # log(sigma) data_hod_dict['logMmin'], # log Mmin data_hod_dict['alpha'], # alpha data_hod_dict['logM1'] # log M1 ]) Ndim = len(data_hod) # Priors prior_min, prior_max = PriorRange(prior_name) prior_range = np.zeros((len(prior_min), 2)) prior_range[:, 0] = prior_min prior_range[:, 1] = prior_max # Initializing Walkers random_guess = np.array([11., np.log(.4), 11.5, 1.0, 13.5]) pos0 = np.repeat(random_guess, Nwalkers).reshape(Ndim, Nwalkers).T + \ 1e-1 * np.random.randn(Ndim * Nwalkers).reshape(Nwalkers, Ndim) # Initializing the emcee sampler hod_kwargs = { 'prior_range': prior_range, 'data': fake_obs, 'data_cov': fake_obs_cov, 'observables': observables, 'Mr': data_dict['Mr'] } # Set up the interface to the ipcluster. c = Client() view = c[:] view.push({"lnPost": lnPost}) # Modules necessary in posterior calculation should be called here view.execute("import numpy as np") view.execute("from hod_sim import HODsimulator") # Setting up the Sampler sampler = emcee.EnsembleSampler(Nwalkers, Ndim, lnPost, kwargs=hod_kwargs, pool=view) # Setting up a file for saving the chains chain_file = ''.join([ util.dat_dir(), util.observable_id_flag(observables), '_Mr', str(data_dict["Mr"]), '_theta.mcmc_chain.dat' ]) f = open(chain_file, "w") f.close() # Running the Sampler and writing out the chains for result in sampler.sample(pos0, iterations=Nchains_burn + Nchains_pro, storechain=False): position = result[0] f = open(chain_file, "a") for k in range(position.shape[0]): output_str = '\t'.join(position[k].astype('str')) + '\n' f.write(output_str) f.close()