def test_sample(self): N = 10 T = 2 postfn = lambda theta: None dist = 1.0 distfn = lambda X, Y: dist prior = abcpmc.TophatPrior([0], [100]) sampler = abcpmc.Sampler(N, 0, postfn, distfn) eps = 10 eps_proposal = abcpmc.ConstEps(T, eps) for i, pool in enumerate(sampler.sample(prior, eps_proposal)): assert pool is not None assert pool.t == i assert pool.ratio == 1.0 assert pool.eps == eps assert len(pool.thetas) == N assert np.all(pool.thetas != 0.0) assert len(pool.dists) == N assert np.all(pool.dists == dist) assert len(pool.ws) == N assert np.allclose(np.sum(pool.ws), 1.0) assert i + 1 == T
def sample(T, eps_val, eps_min): abcpmc_sampler = abcpmc.Sampler(N=1000, Y=data, postfn=simz, dist=distance, pool=mpi_pool) abcpmc_sampler.particle_proposal_cls = abcpmc.OLCMParticleProposal eps = abcpmc.ConstEps(T, eps_val) pools = [] for pool in abcpmc_sampler.sample(prior, eps): print("T: {0}, eps: {1:>.4f}, ratio: {2:>.4f}".format( pool.t, eps(pool.t), pool.ratio)) plot_thetas(pool.thetas, pool.ws, pool.t) if (pool.t < 5): eps.eps = np.percentile(pool.dists, 50) else: eps.eps = np.percentile(pool.dists, 75) if eps.eps < eps_min: eps.eps = eps_min pools.append(pool) #abcpmc_sampler.close() return pools
def sample(T, eps_val, eps_min): abcpmc_sampler = abcpmc.Sampler(N=100, Y=data, postfn=simz, dist=distance, pool=mpi_pool) abcpmc_sampler.particle_proposal_cls = abcpmc.ParticleProposal eps = abcpmc.MultiConstEps(T, [1.e6, 1.e6]) #eps = abcpmc.MultiExponentialEps(T,[1.e41 , 1.e12] , [eps_min , eps_min]) pools = [] for pool in abcpmc_sampler.sample(prior, eps): print("T: {0}, ratio: {1:>.4f}".format(pool.t, pool.ratio)) print eps(pool.t) plot_thetas(pool.thetas, pool.ws, pool.t) if (pool.t < 6): eps.eps = np.median(np.atleast_2d(pool.dists), axis=0) #elif (pool.t < 3): # eps.eps = np.percentile(np.atleast_2d(pool.dists), 60 , axis = 0) else: #abcpmc_sampler.particle_proposal_cls = abcpmc.ParticleProposal eps.eps = np.median(np.atleast_2d(pool.dists), axis=0) #for i in xrange(len(eps.eps)): # if eps.eps[i] < eps_min[i]: # eps.eps[i] = eps_min[i] pools.append(pool) #abcpmc_sampler.close() return pools
def abc(pewl, name=None, niter=None, npart=None, restart=None): if restart is not None: # read pool theta_init = np.loadtxt( os.path.join(abc_dir, 'theta.t%i.dat' % restart)) rho_init = np.loadtxt( os.path.join(abc_dir, 'rho.t%i.dat' % restart)) w_init = np.loadtxt( os.path.join(abc_dir, 'w.t%i.dat' % restart)) init_pool = abcpmc.PoolSpec(restart, None, None, theta_init, rho_init, w_init) npart = len(theta_init) print('%i particles' % npart) else: init_pool = None #--- inference with ABC-PMC below --- # prior prior = abcpmc.TophatPrior(prior_min, prior_max) # sampler abcpmc_sampler = abcpmc.Sampler( N=npart, # N_particles Y=x_obs, # data postfn=_sumstat_model_wrap, # simulator dist=_distance_metric_wrap, # distance metric pool=pewl, postfn_kwargs={'dem': dem}#, dist_kwargs={'method': 'L2', 'phi_err': phi_err} ) # threshold eps = abcpmc.ConstEps(niter, eps0) print('eps0', eps.eps) for pool in abcpmc_sampler.sample(prior, eps, pool=init_pool): eps_str = ", ".join(["{0:>.4f}".format(e) for e in pool.eps]) print("T: {0}, eps: [{1}], ratio: {2:>.4f}".format(pool.t, eps_str, pool.ratio)) for i, (mean, std) in enumerate(zip(*abcpmc.weighted_avg_and_std(pool.thetas, pool.ws, axis=0))): print(u" theta[{0}]: {1:>.4f} \u00B1 {2:>.4f}".format(i, mean,std)) print('dist', pool.dists) # write out theta, weights, and distances to file dustInfer.writeABC('eps', pool, abc_dir=abc_dir) dustInfer.writeABC('theta', pool, abc_dir=abc_dir) dustInfer.writeABC('w', pool, abc_dir=abc_dir) dustInfer.writeABC('rho', pool, abc_dir=abc_dir) # update epsilon based on median thresholding eps.eps = np.median(pool.dists, axis=0) print('eps%i' % pool.t, eps.eps) print('----------------------------------------') #if pool.ratio <0.2: break abcpmc_sampler.close() return None
def test_new_particle(self): eps = 1 prior = lambda: 1 thetai = 1 postfn = lambda theta: thetai p = 0.5 dist = lambda x, y: p Y = None sampler = abcpmc.Sampler(2, Y, postfn, dist) wrapper = abcpmc.sampler._RejectionSamplingWrapper(sampler, eps, prior) rthetai, rp, cnt = wrapper(0) assert thetai == rthetai assert p == rp assert cnt == 1
def test_sample(self): N = 10 T = 2 postfn = lambda theta: None dist = lambda X, Y: 0 prior = abcpmc.TophatPrior([0], [100]) sampler = abcpmc.Sampler(N, 0, postfn, dist) eps_proposal = abcpmc.ConstEps(T, 10) for i, pool in enumerate(sampler.sample(prior, eps_proposal)): assert pool is not None assert len(pool.thetas) == N assert i + 1 == T
def test_new_particle_multidist(self): eps = 1. threshold = [eps, eps] prior = lambda: 1 thetai = 1 postfn = lambda theta: thetai dist = Mock() distances = [[eps * 2, eps * 2], [eps / 2, eps * 2], [eps * 2, eps / 2], [eps, eps]] dist.side_effect = distances Y = None sampler = abcpmc.Sampler(2, Y, postfn, dist) wrapper = abcpmc.sampler._RejectionSamplingWrapper( sampler, threshold, prior) _, _, cnt = wrapper(0) assert cnt == len(distances)
def test_propose(self): eps = 1 thetai = 1 postfn = lambda theta: thetai p = 0.5 dist = lambda x, y: p Y = None sampler = abcpmc.Sampler(1, Y, postfn, dist) sigma = 0 thetas = np.array([[1], [1], [2]]) dists = np.array([1, 1, 2]) ws = np.array([1, 1, 1]) pool = abcpmc.sampler.PoolSpec(1, eps, 1, thetas, dists, ws) wrapper = abcpmc.sampler.KNNParticleProposal(sampler, eps, pool, {}) assert wrapper._get_sigma(thetas[0], 2) == sigma
def sample(T, eps_val, eps_min): prior = abcpmc.TophatPrior([10., np.log(.1), 11.02, .8, 13.], [13., np.log(.7), 13.02, 1.3, 14.]) abcpmc_sampler = abcpmc.Sampler(N=1000, Y=data, postfn=simz, dist=distance, pool=mpi_pool) abcpmc_sampler.particle_proposal_cls = abcpmc.ParticleProposal #abcpmc.Sampler.particle_proposal_kwargs = {'k': 50} #abcpmc_sampler.particle_proposal_cls = abcpmc.KNNParticleProposal eps = abcpmc.ConstEps(T, [1.e13, 1.e13]) pools = [] for pool in abcpmc_sampler.sample(prior, eps): print("T:{0},ratio: {1:>.4f}".format(pool.t, pool.ratio)) print eps(pool.t) plot_thetas(pool.thetas, pool.ws, pool.t) np.savetxt( "/home/mj/public_html/nbar_gmf5_Mr20_theta_t" + str(t) + ".dat", theta) np.savetxt("/home/mj/public_html/nbar_gmf5_Mr20_w_t" + str(t) + ".dat", w) if pool.t < 3: eps.eps = np.percentile(np.atleast_2d(pool.dists), 50, axis=0) elif (pool.t > 2) and (pool.t < 20): eps.eps = np.percentile(np.atleast_2d(pool.dists), 75, axis=0) abcpmc_sampler.particle_proposal_cls = abcpmc.ParticleProposal else: eps.eps = np.percentile(np.atleast_2d(pool.dists), 90, axis=0) abcpmc_sampler.particle_proposal_cls = abcpmc.ParticleProposal #if eps.eps < eps_min: # eps.eps = eps_min pools.append(pool) #abcpmc_sampler.close() return pools
def test_propose_multidist(self): eps = 1. threshold = [eps, eps] thetai = 1 postfn = lambda theta: thetai dist = Mock() distances = [[eps * 2, eps * 2], [eps / 2, eps * 2], [eps * 2, eps / 2], [eps, eps]] dist.side_effect = distances Y = None sampler = abcpmc.Sampler(2, Y, postfn, dist) thetas = np.array([[0.5], [1]]) weights = np.array([0.75, 0.25]) pool = abcpmc.sampler.PoolSpec(1, threshold, 1, thetas, None, weights) wrapper = abcpmc.sampler.ParticleProposal(sampler, threshold, pool, {}) _, _, cnt = wrapper(0) assert cnt == len(distances)
def test_propose(self): eps = 1 thetai = 1 postfn = lambda theta: thetai p = 0.5 dist = lambda x, y: p Y = None sampler = abcpmc.Sampler(2, Y, postfn, dist) thetas = np.array([[0.5], [1]]) weights = np.array([0.75, 0.25]) pool = abcpmc.sampler.PoolSpec(1, eps, 1, thetas, None, weights) wrapper = abcpmc.sampler.ParticleProposal(sampler, eps, pool, {}) sigma = 0.25 assert wrapper._get_sigma(None) == sigma rthetai, rp, cnt = wrapper(0) assert rthetai > (thetai - 5 * sigma) and rthetai < (thetai + 5 * sigma) assert p == rp assert cnt == 1
def test_get_sigma_multidist(self): eps = 1. threshold = [eps, eps] thetai = 1 postfn = lambda theta: thetai p = 0.5 dist = lambda x, y: p Y = None sampler = abcpmc.Sampler(1, Y, postfn, dist) dists = np.array([[eps / 2, eps / 2], [eps / 2, eps / 2], [eps * 2, eps * 2], [eps / 2, eps * 2], [eps * 2, eps / 2]]) thetas = np.array([[1], [1], [2], [2], [2]]) ws = np.array([1] * len(dists)) pool = abcpmc.sampler.PoolSpec(1, threshold, 1, thetas, dists, ws) wrapper = abcpmc.sampler.OLCMParticleProposal(sampler, threshold, pool, {}) sigma = np.var(thetas[:1]) assert wrapper._get_sigma(thetas[0]) == sigma
if __name__ == '__main__': # check the variability of the distances at the correct parameters #distances = [MSE(data, generate_data_quick([theta1_fid,theta2_fid])) for _ in range(1000)] #sns.distplot(distances, axlabel="distances", ) #plt.title("Variablility of distance from simulations") #plt.savefig('./Figures/multivariate_gaussian/variability_of_distances.png') #plt.show() #plt.close() # Shows variability is between roughly 1 sigma upper/lower bounds: 2.5 and 3.5 threads = 10 # Create an instance of the sampler. 5000 particles # The sampler HAS to be created in the __main__ thread else multiprocessing # does not work, and then still it might not work.. sampler = abcpmc.Sampler(N=5000, Y=data, postfn=generate_data_quick, dist=summary_statistic_distance, threads=threads) # Optional: customize the proposal creation. # Here we use Optimal Local Covariance Matrix - kernel. (Filipi et al. 2012) sampler.particle_proposal_cls = abcpmc.OLCMParticleProposal import time t0 = time.time() pools = launch(threads) print("took %.2f seconds" % (time.time() - t0)) postprocessing(pools)
def ABC(T, eps_input, Npart=1000, cen_tf=None, cen_prior_name=None, cen_abcrun=None): ''' ABC-PMC implementation. Parameters ---------- T : (int) Number of iterations eps_input : (float) Starting epsilon threshold value N_part : (int) Number of particles prior_name : (string) String that specifies what prior to use. abcrun : (string) String that specifies abc run information ''' abcinh = ABCInherit(cen_tf, abcrun=cen_abcrun, prior_name=cen_prior_name) # Data (Group Catalog Satellite fQ) grpcat = GroupCat(Mrcut=18, position='satellite') grpcat.Read() qfrac = Fq() m_bin = np.array([9.7, 10.1, 10.5, 10.9, 11.3]) M_mid = 0.5 * (m_bin[:-1] + m_bin[1:]) sfq = qfrac.Classify(grpcat.mass, grpcat.sfr, np.median(grpcat.z), sfms_prop=abcinh.sim_kwargs['sfr_prop']['sfms']) ngal, dum = np.histogram(grpcat.mass, bins=m_bin) ngal_q, dum = np.histogram(grpcat.mass[sfq == 'quiescent'], bins=m_bin) data_sum = [M_mid, ngal_q.astype('float') / ngal.astype('float')] # Simulator cen_assigned_sat_file = ''.join([ '/data1/hahn/pmc_abc/pickle/', 'satellite', '.cenassign', '.', cen_abcrun, '_ABC', '.', cen_prior_name, '_prior', '.p' ]) if not os.path.isfile(cen_assigned_sat_file): sat_cen = AssignCenSFR(cen_tf, abcrun=cen_abcrun, prior_name=cen_prior_name) pickle.dump(sat_cen, open(cen_assigned_sat_file, 'wb')) else: sat_cen = pickle.load(open(cen_assigned_sat_file, 'rb')) def Simz(tt): # Simulator (forward model) tqdel_dict = {'name': 'explin', 'm': tt[0], 'b': tt[1]} sat_evol = EvolveSatSFR(sat_cen, tqdelay_dict=tqdel_dict) sfq_sim = qfrac.Classify(sat_evol.mass, sat_evol.sfr, sat_evol.zsnap, sfms_prop=sat_evol.sfms_prop) ngal_sim, dum = np.histogram(sat_evol.mass, bins=m_bin) ngal_q_sim, dum = np.histogram(sat_evol.mass[sfq_sim == 'quiescent'], bins=m_bin) sim_sum = [ M_mid, ngal_q_sim.astype('float') / ngal_sim.astype('float') ] return sim_sum # Priors prior_min = [-11.75, 2.] prior_max = [-10.25, 4.] prior = abcpmc.TophatPrior(prior_min, prior_max) # ABCPMC prior object def rho(simum, datum): datum_dist = datum[1] simum_dist = simum[1] drho = np.sum((datum_dist - simum_dist)**2) return drho abcrun_flag = cen_abcrun + '_central' theta_file = lambda pewl: ''.join([ code_dir(), 'dat/pmc_abc/', 'Satellite.tQdelay.theta_t', str(pewl), '_', abcrun_flag, '.dat' ]) w_file = lambda pewl: ''.join([ code_dir(), 'dat/pmc_abc/', 'Satellite.tQdelay.w_t', str(pewl), '_', abcrun_flag, '.dat' ]) dist_file = lambda pewl: ''.join([ code_dir(), 'dat/pmc_abc/', 'Satellite.tQdelay.dist_t', str(pewl), '_', abcrun_flag, '.dat' ]) eps_file = ''.join([ code_dir(), 'dat/pmc_abc/Satellite.tQdelay.epsilon_', abcrun_flag, '.dat' ]) eps = abcpmc.ConstEps(T, eps_input) try: mpi_pool = mpi_util.MpiPool() abcpmc_sampler = abcpmc.Sampler( N=Npart, # N_particles Y=data_sum, # data postfn=Simz, # simulator dist=rho, # distance function pool=mpi_pool) except AttributeError: abcpmc_sampler = abcpmc.Sampler( N=Npart, # N_particles Y=data_sum, # data postfn=Simz, # simulator dist=rho) # distance function abcpmc_sampler.particle_proposal_cls = abcpmc.ParticleProposal pools = [] f = open(eps_file, "w") f.close() eps_str = '' for pool in abcpmc_sampler.sample(prior, eps, pool=None): print '----------------------------------------' print 'eps ', pool.eps new_eps_str = '\t' + str(pool.eps) + '\n' if eps_str != new_eps_str: # if eps is different, open fiel and append f = open(eps_file, "a") eps_str = new_eps_str f.write(eps_str) f.close() print("T:{0},ratio: {1:>.4f}".format(pool.t, pool.ratio)) print eps(pool.t) # write theta, weights, and distances to file np.savetxt(theta_file(pool.t), pool.thetas, header='tQdelay_slope, tQdelay_offset') np.savetxt(w_file(pool.t), pool.ws) np.savetxt(dist_file(pool.t), pool.dists) # update epsilon based on median thresholding eps.eps = np.median(pool.dists) pools.append(pool) return pools
threads = 1 # for some reason threads>1 gives a Broken pipe error # check the variability of the distances at the correct parameters # distances = [std(data, create_new_sample(means)) for _ in range(1000)] # sns.distplot(distances, axlabel="distances", ) # plt.title("Variablility of distance from simulations") # plt.savefig('./Figures/abcpmc/variability_of_distances.png') # plt.show() # plt.close() # time.sleep(5) # Create an instance of the sampler. 5000 particles # The sampler HAS to be created in the __main__ thread else multiprocessing # does not work sampler = abcpmc.Sampler(N=1000, Y=data, postfn=create_new_sample, dist=dist_measure, threads=threads) # Optional: customize the proposal creation. # Here we use Optimal Local Covariance Matrix - kernel. (Filipi et al. 2012) sampler.particle_proposal_cls = abcpmc.OLCMParticleProposal # sampler = create_sampler(threads) t0 = time.time() pools = launch(threads) print("took %.2f seconds" % (time.time() - t0)) postprocessing(pools)
if __name__ == '__main__': # check the variability of the distances at the correct parameters # distances = [MSE(data, generate_data_quick([theta1_fid,theta2_fid])) for _ in range(5000)] # sns.distplot(distances, axlabel="distances", ) # plt.title("Variablility of distance from simulations") # plt.savefig('./Figures/multivariate_gaussian/variability_of_distances.png') # plt.show() # plt.close() # Shows variability is between roughly 1 sigma upper/lower bounds: 2.5 and 3.5 threads = 10 # Create an instance of the sampler. 5000 particles # The sampler HAS to be created in the __main__ thread else multiprocessing # does not work, and then still it might not work.. sampler = abcpmc.Sampler(N=5000, Y=data, postfn=generate_data_quick, dist=std, threads=threads) # Optional: customize the proposal creation. # Here we use Optimal Local Covariance Matrix - kernel. (Filipi et al. 2012) sampler.particle_proposal_cls = abcpmc.OLCMParticleProposal import time t0 = time.time() pools = launch(threads) print("took %.2f seconds" % (time.time() - t0)) postprocessing(pools)
def main_abcpmc_MUSIC2(conf, test=False): """ config should contain [][]: a list etc. eps_start is actually important as the next iteration will only start if the number of computed trials within these boundaries will be Nw. So in one case I had to draw and compute twice as many particles than Nw. About the treads: 14-16 treads are fine, as more treats wont be fully used and just sit in the taskqueue """ # Loads the real data to compare with (and if neccessary also test data) data = iom.unpickleObject(conf['paths']['surveyreal']) if test: dataMUSIC2 = iom.unpickleObject(conf['paths']['surveysim']) print(type(dataMUSIC2.Rmodel), conf['paths']['surveysim']) surmet.abcpmc_dist_severalMetrices(dataMUSIC2, data, metrics=json.loads(conf['metrics']['used']), delal=False, stochdrop=conf['flavor']['stochdrop'], phoenixdrop = conf['flavor']['phoenixdrop'], outpath='/data/') return 0 """ The abcpmc part starts: Define thetas i.e. parameter values to be inferred and priors""" if conf['prior']['type'] == 'tophat': bounds = json.loads(conf['prior']['bounds']) prior = abcpmc.TophatPrior(bounds[0], bounds[1]) elif conf['prior']['type'] == 'gaussian': means = json.loads(conf['prior']['means']) COV = json.loads(conf['prior']['covariance']) prior = abcpmc.GaussianPrior(mu=means, sigma=COV) else: print('inference_abcpmc::main_abcpmc_MUSIC2: prior %s is unknown!' % (conf['prior']['type'])) return 0 eps = abcpmc.ConstEps(conf.getint('pmc', 'T'), json.loads(conf['metrics']['eps_startlimits'])) if test: sampler = abcpmc.Sampler(N=conf.getint('pmc', 'Nw'), Y=data, postfn=testrand, dist=testmetric, threads=conf.getint('mp', 'Nthreads'), maxtasksperchild=conf.getint('mp', 'maxtasksperchild')) else: sampler = abcpmc.Sampler(N=conf.getint('pmc', 'Nw'), Y=data, postfn=partial(music2run.main_ABC, parfile=conf['simulation']['parfile']), dist=partial(surmet.abcpmc_dist_severalMetrices, metrics=json.loads(conf['metrics']['used']), outpath=conf['paths']['abcpmc'], stochdrop=conf['flavor']['stochdrop'], phoenixdrop = conf['flavor']['phoenixdrop']), threads=conf.getint('mp', 'Nthreads'), maxtasksperchild=conf.getint('mp', 'maxtasksperchild')) # Prepares the file for counting with open(conf['paths']['abcpmc'] + 'count.txt', 'w+') as f: f.write('0') sampler.particle_proposal_cls = abcpmc.OLCMParticleProposal """ compare with AstroABC sampler = astroabc.ABC_class(Ndim,walkers,data,tlevels,niter,priors,**prop) sampler.sample(music2run.main_astroABC) """ # startfrom=iom.unpickleObject('/data/ClusterBuster-Output/MUSIC_NVSS02_Test01/launch_pools') pool = None #startfrom[-1] launch(sampler, prior, conf.getfloat('pmc','alpha'), eps, surveypath=conf['paths']['abcpmc'], pool=pool)
def FixedTauABC(T, eps_input, fixtau='satellite', Npart=1000, prior_name='try0', observables=['fqz_multi'], abcrun=None, restart=False, t_restart=None, eps_restart=None, **sim_kwargs): ''' Run ABC-PMC analysis for central galaxy SFH model with *FIXED* quenching timescale Parameters ---------- T : (int) Number of iterations eps_input : (float) Starting epsilon threshold value N_part : (int) Number of particles prior_name : (string) String that specifies what prior to use. abcrun : (string) String that specifies abc run information ''' if isinstance(eps_input, list): if len(eps_input) != len(observables): raise ValueError if len(observables) > 1 and isinstance(eps_input, float): raise ValueError # output abc run details sfinherit_kwargs, abcrun_flag = MakeABCrun( abcrun=abcrun, Niter=T, Npart=Npart, prior_name=prior_name, eps_val=eps_input, restart=restart, **sim_kwargs) # Data data_sum = DataSummary(observables=observables) # Priors prior_min, prior_max = PriorRange(prior_name) prior = abcpmc.TophatPrior(prior_min, prior_max) # ABCPMC prior object def Simz(tt): # Simulator (forward model) gv_slope = tt[0] gv_offset = tt[1] fudge_slope = tt[2] fudge_offset = tt[3] sim_kwargs = sfinherit_kwargs.copy() sim_kwargs['sfr_prop']['gv'] = {'slope': gv_slope, 'fidmass': 10.5, 'offset': gv_offset} sim_kwargs['evol_prop']['fudge'] = {'slope': fudge_slope, 'fidmass': 10.5, 'offset': fudge_offset} sim_kwargs['evol_prop']['tau'] = {'name': fixtau} sim_output = SimSummary(observables=observables, **sim_kwargs) return sim_output theta_file = lambda pewl: ''.join([code_dir(), 'dat/pmc_abc/', 'CenQue_theta_t', str(pewl), '_', abcrun_flag, '.fixedtau.', fixtau, '.dat']) w_file = lambda pewl: ''.join([code_dir(), 'dat/pmc_abc/', 'CenQue_w_t', str(pewl), '_', abcrun_flag, '.fixedtau.', fixtau, '.dat']) dist_file = lambda pewl: ''.join([code_dir(), 'dat/pmc_abc/', 'CenQue_dist_t', str(pewl), '_', abcrun_flag, '.fixedtau.', fixtau, '.dat']) eps_file = ''.join([code_dir(), 'dat/pmc_abc/epsilon_', abcrun_flag, '.fixedtau.', fixtau, '.dat']) distfn = RhoFq if restart: if t_restart is None: raise ValueError last_thetas = np.loadtxt(theta_file(t_restart)) last_ws = np.loadtxt(w_file(t_restart)) last_dist = np.loadtxt(dist_file(t_restart)) init_pool = abcpmc.PoolSpec(t_restart, None, None, last_thetas, last_dist, last_ws) else: init_pool = None eps = abcpmc.ConstEps(T, eps_input) try: mpi_pool = mpi_util.MpiPool() abcpmc_sampler = abcpmc.Sampler( N=Npart, # N_particles Y=data_sum, # data postfn=Simz, # simulator dist=distfn, # distance function pool=mpi_pool) except AttributeError: abcpmc_sampler = abcpmc.Sampler( N=Npart, # N_particles Y=data_sum, # data postfn=Simz, # simulator dist=distfn) # distance function abcpmc_sampler.particle_proposal_cls = abcpmc.ParticleProposal pools = [] if init_pool is None: f = open(eps_file, "w") f.close() eps_str = '' for pool in abcpmc_sampler.sample(prior, eps, pool=init_pool): print '----------------------------------------' print 'eps ', pool.eps new_eps_str = str(pool.eps)+'\t'+str(pool.ratio)+'\n' if eps_str != new_eps_str: # if eps is different, open fiel and append f = open(eps_file, "a") eps_str = new_eps_str f.write(eps_str) f.close() print("T:{0},ratio: {1:>.4f}".format(pool.t, pool.ratio)) print eps(pool.t) # write theta, weights, and distances to file np.savetxt(theta_file(pool.t), pool.thetas, header='gv_slope, gv_offset, fudge_slope, fudge_offset') np.savetxt(w_file(pool.t), pool.ws) np.savetxt(dist_file(pool.t), pool.dists) # update epsilon based on median thresholding if len(observables) == 1: eps.eps = np.median(pool.dists) else: #print pool.dists print np.median(np.atleast_2d(pool.dists), axis = 0) eps.eps = np.median(np.atleast_2d(pool.dists), axis = 0) print '----------------------------------------' pools.append(pool) return pools
def config(self): try: import abcpmc except ImportError: raise ValueError("To use ABC PMC you need to install it with pip install abcpmc") global abc_pipeline abc_pipeline = self.pipeline self.threshold = self.read_ini("threshold",str, 'LinearEps') self.metric_kw = self.read_ini("metric",str, 'chi2') #mean, chi2 or other if self.metric_kw =='other': self.distance_func = self.read_ini("distance_func",str, None) #only for other metric, self.metric = self.distance_func[1:-1] self.epimax = self.read_ini('epimax', float,5.0) self.epimin = self.read_ini('epimin',float, 1.0) self.part_prop = self.read_ini("particle_prop",str,'weighted_cov') self.set_prior = self.read_ini("set_prior",str,'uniform') self.param_cov = self.read_ini("param_cov_file",str,'None') self.knn = self.read_ini("num_nn",int, 10) self.npart = self.read_ini("npart",int,100) self.niter = self.read_ini("niter",int,2) self.ngauss = self.read_ini("ngauss",int,4) self.run_multigauss = self.read_ini("run_multigauss",bool,False) self.diag_cov = self.read_ini("diag_cov",bool,False) self.ndim = len(self.pipeline.varied_params) #options for decreasing threshold if self.threshold == 'ConstEps': self.eps = abcpmc.ConstEps(self.niter, self.epimax) elif self.threshold == 'ExpEps': self.eps = abcpmc.ExponentialEps(self.niter, self.epimax,self.epimin) else: self.eps = abcpmc.LinearEps(self.niter, self.epimax, self.epimin) print("\nRunning ABC PMC") print("with %d particles, %s prior, %s threshold, %d iterations over (%f,%f), %s kernal \n" % (self.npart,self.set_prior,self.threshold,self.niter,self.epimax,self.epimin,self.part_prop)) #Initial positions for all of the parameters self.p0 = np.array([param.start for param in self.pipeline.varied_params]) #Data file is read for use in dist() for each step #parameter covariance used in the prior self.data, self.cov, self.invcov = self.load_data() #At the moment the same prior (with variable hyperparameters) is # used for all parameters - would be nice to change this to be more flexible self.pmin = np.zeros(self.ndim) self.pmax = np.zeros(self.ndim) for i,pi in enumerate(self.pipeline.varied_params): self.pmin[i] = pi.limits[0] self.pmax[i] = pi.limits[1] if self.set_prior.lower() == 'uniform': self.prior = abcpmc.TophatPrior(self.pmin,self.pmax) elif self.set_prior.lower() == 'gaussian': sigma2 = np.loadtxt(self.param_cov) if len(np.atleast_2d(sigma2)[0][:]) != self.ndim: raise ValueError("Cov matrix for Gaussian prior has %d columns for %d params" % len(np.atleast_2d(sigma2)[0][:]), self.ndim) else: self.prior = abcpmc.GaussianPrior(self.p0, np.atleast_2d(sigma2)) else: raise ValueError("Please set the ABC option 'set_prior' to either 'uniform' or 'gaussian'. At the moment only 'uniform' works in the general case.") #create sampler self.sampler = abcpmc.Sampler(N=self.npart, Y=self.data, postfn=abc_model, dist=self.dist) #set particle proposal kernal abcpmc.Sampler.particle_proposal_kwargs = {} if self.part_prop == 'KNN': abcpmc.Sampler.particle_proposal_kwargs = {'k':self.knn} self.sampler.particle_proposal_cls = abcpmc.KNNParticleProposal elif self.part_prop == 'OLCM': self.sampler.particle_proposal_cls = abcpmc.OLCMParticleProposal self.converged = False
def ABCpmc_HOD(T, eps_val, N_part=1000, prior_name='first_try', observables=['nbar', 'xi'], data_dict={'Mr': 21}, output_dir=None): ''' ABC-PMC implementation. Parameters ---------- - T : Number of iterations - eps_val : - N_part : Number of particles - observables : list of observables. Options are 'nbar', 'gmf', 'xi' - data_dict : dictionary that specifies the observation keywords ''' if output_dir is None: output_dir = util.dat_dir() else: pass #Initializing the vector of observables and inverse covariance matrix if observables == ['xi']: fake_obs = Data.data_xi(**data_dict) fake_obs_cov = Data.data_cov(**data_dict)[1:16, 1:16] xi_Cii = np.diag(fake_obs_cov) elif observables == ['nbar', 'xi']: fake_obs = np.hstack( [Data.data_nbar(**data_dict), Data.data_xi(**data_dict)]) fake_obs_cov = Data.data_cov(**data_dict)[:16, :16] Cii = np.diag(fake_obs_cov) xi_Cii = Cii[1:] nbar_Cii = Cii[0] elif observables == ['nbar', 'gmf']: fake_obs = np.hstack( [Data.data_nbar(**data_dict), Data.data_gmf(**data_dict)]) fake_obs_cov = Data.data_cov('nbar_gmf', **data_dict) Cii = np.diag(fake_obs_cov) gmf_Cii = Cii[1:] nbar_Cii = Cii[0] # True HOD parameters data_hod_dict = Data.data_hod_param(Mr=data_dict['Mr']) data_hod = np.array([ data_hod_dict['logM0'], # log M0 np.log(data_hod_dict['sigma_logM']), # log(sigma) data_hod_dict['logMmin'], # log Mmin data_hod_dict['alpha'], # alpha data_hod_dict['logM1'] # log M1 ]) # Priors prior_min, prior_max = PriorRange(prior_name) prior = abcpmc.TophatPrior(prior_min, prior_max) prior_range = np.zeros((len(prior_min), 2)) prior_range[:, 0] = prior_min prior_range[:, 1] = prior_max # simulator our_model = HODsim(Mr=data_dict['Mr']) # initialize model kwargs = {'prior_range': prior_range, 'observables': observables} def simz(tt): sim = our_model.sum_stat(tt, **kwargs) if sim is None: pickle.dump(tt, open("simz_crash_theta.p", 'wb')) pickle.dump(kwargs, open('simz_crash_kwargs.p', 'wb')) raise ValueError('Simulator is giving NonetType') return sim def multivariate_rho(datum, model): #print datum , model dists = [] if observables == ['nbar', 'xi']: dist_nbar = (datum[0] - model[0])**2. / nbar_Cii dist_xi = np.sum((datum[1:] - model[1:])**2. / xi_Cii) dists = [dist_nbar, dist_xi] elif observables == ['nbar', 'gmf']: dist_nbar = (datum[0] - model[0])**2. / nbar_Cii dist_gmf = np.sum((datum[1:] - model[1:])**2. / gmf_Cii) dists = [dist_nbar, dist_gmf] elif observables == ['xi']: dist_xi = np.sum((datum - model)**2. / xi_Cii) dists = [dist_xi] #print np.array(dists) return np.array(dists) mpi_pool = mpi_util.MpiPool() abcpmc_sampler = abcpmc.Sampler( N=N_part, #N_particles Y=fake_obs, #data postfn=simz, #simulator dist=multivariate_rho, #distance function pool=mpi_pool) abcpmc_sampler.particle_proposal_cls = abcpmc.ParticleProposal eps = abcpmc.MultiConstEps(T, eps_val) pools = [] f = open("abc_tolerance.dat", "w") f.close() eps_str = '' for pool in abcpmc_sampler.sample(prior, eps): #while pool.ratio > 0.01: new_eps_str = '\t'.join(eps(pool.t).astype('str')) + '\n' if eps_str != new_eps_str: # if eps is different, open fiel and append f = open("abc_tolerance.dat", "a") eps_str = new_eps_str f.write(eps_str) f.close() print("T:{0},ratio: {1:>.4f}".format(pool.t, pool.ratio)) print eps(pool.t) # plot theta plot_thetas(pool.thetas, pool.ws, pool.t, Mr=data_dict["Mr"], truths=data_hod, plot_range=prior_range, observables=observables, output_dir=output_dir) if (pool.t < 4) and (pool.t > 2): pool.thetas = np.loadtxt( "/home/mj/abc/halo/dat/gold/nbar_xi_Mr21_theta_t3.mercer.dat") pool.ws = np.loadtxt( "/home/mj/abc/halo/dat/gold/nbar_xi_Mr21_w_t3.mercer.dat") eps.eps = [1.12132735353, 127.215586776] # write theta and w to file theta_file = ''.join([ output_dir, util.observable_id_flag(observables), '_Mr', str(data_dict["Mr"]), '_theta_t', str(pool.t), '.mercer.dat' ]) w_file = ''.join([ output_dir, util.observable_id_flag(observables), '_Mr', str(data_dict["Mr"]), '_w_t', str(pool.t), '.mercer.dat' ]) np.savetxt(theta_file, pool.thetas) np.savetxt(w_file, pool.ws) if pool.t < 3: eps.eps = np.percentile(np.atleast_2d(pool.dists), 50, axis=0) elif (pool.t > 2) and (pool.t < 20): eps.eps = np.percentile(np.atleast_2d(pool.dists), 75, axis=0) abcpmc_sampler.particle_proposal_cls = abcpmc.ParticleProposal else: eps.eps = np.percentile(np.atleast_2d(pool.dists), 90, axis=0) abcpmc_sampler.particle_proposal_cls = abcpmc.ParticleProposal #if eps.eps < eps_min: # eps.eps = eps_min pools.append(pool) #abcpmc_sampler.close() return pools
size = 5000 sigma = np.eye(4) * 0.25 means = np.array([1.1, 1.5, 1.1, 1.5]) data = np.random.multivariate_normal(means, sigma, size) print(data) #------- #distance function: sum of abs mean differences def dist(x, y): return np.sum(np.abs(np.mean(x, axis=0) - np.mean(y, axis=0))) #our "model", a gaussian with varying means def postfn(theta): return np.random.multivariate_normal(theta, sigma, size) eps = abcpmc.LinearEps(10, 5, 0.75) prior = abcpmc.GaussianPrior(means * 1.1, sigma * 2) #our best guess sampler = abcpmc.Sampler(N=10, Y=data, postfn=postfn, dist=dist) for pool in sampler.sample(prior, eps): print("T: {0}, eps: {1:>.4f}, ratio: {2:>.4f}".format( pool.t, pool.eps, pool.ratio)) for i, (mean, std) in enumerate( zip(np.mean(pool.thetas, axis=0), np.std(pool.thetas, axis=0))): print(u" theta[{0}]: {1:>.4f} \u00B1 {2:>.4f}".format(i, mean, std))