def __init__(self, filename): self.filename = filename self.model = ModelParams(filename) self.particles = numpy.loadtxt(filename)[:, 0:6] if vel_error != 0: print "Assumed error of", vel_error, "km/s in velocity" if phase_space_info_mode <= 5: self.particles[:, 2] *= numpy.nan # remove z-coordinate if phase_space_info_mode <= 3: self.particles[:, 3:5] *= numpy.nan # remove vx and vy if phase_space_info_mode != 6 or vel_error != 0: self.samples, self.weights = sampleMissingData( numpy.hstack( (self.particles, numpy.ones( (self.particles.shape[0], 3)) * vel_error)), num_subsamples) # check if we may restart the search from already existing parameters try: self.values = numpy.loadtxt(self.filename + ".best") if self.values.ndim == 1: # only one set of parameters - this occurs after the deterministic search self.values = self.values[: -1] # the last column is the likelihood, strip it else: # a number of MCMC walkers, each with its own set of parameters self.values = self.values[:, :-1] print "Loaded from saved file: (nwalkers,nparams)=", self.values.shape except: self.values = None return
def __init__(self, filename): self.filename = filename self.model = ModelParams(filename) try: self.particles = numpy.loadtxt(filename)[:,0:6] except Exception as ex: print(str(ex)+"\nYou need to run this script from a directory containing files "\ "from the Gaia Challenge spherical/triaxial mock data.") exit() if vel_error!=0: print("Assumed error of %f km/s in velocity" % vel_error) if phase_space_info_mode <= 5: self.particles[:,2] *= numpy.nan # remove z-coordinate if phase_space_info_mode <= 3: self.particles[:,3:5] *= numpy.nan # remove vx and vy if phase_space_info_mode != 6 or vel_error != 0: self.samples, self.weights = sampleMissingData( numpy.hstack((self.particles, numpy.ones((self.particles.shape[0], 3)) * vel_error)), num_subsamples ) # check if we may restart the search from already existing parameters try: self.values = numpy.loadtxt(self.filename+".best") if self.values.ndim==1: # only one set of parameters - this occurs after the deterministic search self.values = self.values[:-1] # the last column is the likelihood, strip it else: # a number of MCMC walkers, each with its own set of parameters self.values = self.values[:,:-1] print("Loaded from saved file: (nwalkers,nparams)=" + str(self.values.shape)) except: self.values = None return
def __init__(self, filename): self.filename = filename self.model = ModelParams(filename) self.particles = numpy.loadtxt(filename)[:,0:6] if vel_error!=0: print "Assumed error of",vel_error,"km/s in velocity" if phase_space_info_mode <= 5: self.particles[:,2] *= numpy.nan # remove z-coordinate if phase_space_info_mode <= 3: self.particles[:,3:5] *= numpy.nan # remove vx and vy if phase_space_info_mode != 6 or vel_error != 0: self.samples, self.weights = sampleMissingData( numpy.hstack((self.particles, numpy.ones((self.particles.shape[0], 3)) * vel_error)), num_subsamples ) # check if we may restart the search from already existing parameters try: self.values = numpy.loadtxt(self.filename+".best") if self.values.ndim==1: # only one set of parameters - this occurs after the deterministic search self.values = self.values[:-1] # the last column is the likelihood, strip it else: # a number of MCMC walkers, each with its own set of parameters self.values = self.values[:,:-1] print "Loaded from saved file: (nwalkers,nparams)=",self.values.shape except: self.values = None return
class ModelSearcher: ''' Class that encompasses the computation of likelihood for the given parameters, and implements model-searching algorithms (deterministic and MCMC) ''' def __init__(self, filename): self.filename = filename self.model = ModelParams(filename) self.particles = numpy.loadtxt(filename)[:, 0:6] if vel_error != 0: print "Assumed error of", vel_error, "km/s in velocity" if phase_space_info_mode <= 5: self.particles[:, 2] *= numpy.nan # remove z-coordinate if phase_space_info_mode <= 3: self.particles[:, 3:5] *= numpy.nan # remove vx and vy if phase_space_info_mode != 6 or vel_error != 0: self.samples, self.weights = sampleMissingData( numpy.hstack( (self.particles, numpy.ones( (self.particles.shape[0], 3)) * vel_error)), num_subsamples) # check if we may restart the search from already existing parameters try: self.values = numpy.loadtxt(self.filename + ".best") if self.values.ndim == 1: # only one set of parameters - this occurs after the deterministic search self.values = self.values[: -1] # the last column is the likelihood, strip it else: # a number of MCMC walkers, each with its own set of parameters self.values = self.values[:, :-1] print "Loaded from saved file: (nwalkers,nparams)=", self.values.shape except: self.values = None return def modelLikelihood(self, params): ''' Compute the likelihood of model (df+potential specified by scaled params) against the data (array of Nx6 position/velocity coordinates of tracer particles). This is the function to be maximized; if parameters are outside the allowed range, it returns -infinity ''' prior = self.model.prior(params) if prior == -numpy.inf: print "Out of range" return prior try: # Compute log-likelihood of DF with given params against an array of actions pot = self.model.createPotential(params) df = self.model.createDF(params) if phase_space_info_mode == 6: # actions of tracer particles if self.particles.shape[ 0] > 2000: # create an action finder object for a faster evaluation actions = agama.ActionFinder(pot)(self.particles) else: actions = agama.actions(self.particles, pot) df_val = df(actions) # values of DF for these actions else: # have full phase space info for resampled input particles (missing components are filled in) af = agama.ActionFinder(pot) actions = af( self.samples) # actions of resampled tracer particles # compute values of DF for these actions, multiplied by sample weights df_val = df(actions) * self.weights # compute the weighted sum of likelihoods of all samples for a single particle, # replacing the improbable samples (with NaN as likelihood) with zeroes df_val = numpy.sum(numpy.nan_to_num( df_val.reshape(-1, num_subsamples)), axis=1) loglike = numpy.sum(numpy.log(df_val)) if numpy.isnan(loglike): loglike = -numpy.inf loglike += prior print "LogL=%.8g" % loglike return loglike except ValueError as err: print "Exception ", err return -numpy.inf def deterministicSearch(self): ''' do a deterministic search to find the best-fit parameters of potential and distribution function. perform several iterations of search, to avoid getting stuck in a local minimum, until the log-likelihood ceases to improve ''' if self.values is None: # just started self.values = self.model.initValues # get the first guess from the model-scaling object elif self.values.ndim == 2: # entire ensemble of values (after MCMC) self.values = self.values[ 0, :] # leave only one set of values from the ensemble prevloglike = -deterministicSearchFnc(self.values, self) # initial likelihood while True: print 'Starting deterministic search' result = scipy.optimize.minimize(deterministicSearchFnc, \ self.values, args=(self,), method='Nelder-Mead', \ options=dict(maxfev=nsteps_deterministic, disp=True)) self.values = result.x loglike = -result.fun print 'result=', result.x, 'LogL=', loglike, # store the latest best-fit parameters and their likelihood numpy.savetxt(self.filename + '.best', numpy.hstack((self.values, loglike)).reshape(1, -1), fmt='%.8g') if loglike - prevloglike < 1.0: print 'Converged' return else: print 'Improved log-likelihood by', loglike - prevloglike prevloglike = loglike def monteCarloSearch(self): ''' Explore the parameter space around the best-fit values using the MCMC method ''' if self.values.ndim == 1: # initial coverage of parameter space (dispersion around the current best-fit values) nparams = len(self.values) ensemble = numpy.empty((nwalkers_mcmc, len(self.values))) for i in range(nwalkers_mcmc): while True: # ensure that we initialize walkers with feasible values walker = self.values + (numpy.random.randn(nparams) * initial_disp_mcmc if i > 0 else 0) prob = monteCarloSearchFnc(walker, self) if numpy.isfinite(prob): ensemble[i, :] = walker break print '*', self.values = ensemble else: # check that all walkers have finite likelihood prob = numpy.zeros((self.values.shape[0], 1)) for i in range(self.values.shape[0]): prob[i, 0] = monteCarloSearchFnc(self.values[i, :], self) if not numpy.isfinite(prob[i, 0]): print 'Invalid parameters for', i, '-th walker (likelihood is bogus)' else: print prob[i, 0] nwalkers, nparams = self.values.shape sampler = emcee.EnsembleSampler(nwalkers, nparams, monteCarloSearchFnc, args=(self, ), threads=nthreads_mcmc) prevmaxloglike = None while True: # run several passes until convergence print 'Starting MCMC' sampler.run_mcmc(self.values, nsteps_mcmc) # restart the next pass from the latest values in the Markov chain self.values = sampler.chain[:, -1, :] # store the latest best-fit parameters and their likelihood, and the entire chain for the last nsteps_mcmc steps numpy.savetxt(self.filename+'.best', \ numpy.hstack((self.values, sampler.lnprobability[:,-1].reshape(-1,1))), fmt='%.8g') numpy.savetxt(self.filename+".chain", \ numpy.hstack((sampler.chain[-nsteps_mcmc:].reshape(-1,nparams), sampler.lnprobability[-nsteps_mcmc:].reshape(-1,1))), fmt='%.8g') print "Acceptance fraction: ", numpy.mean( sampler.acceptance_fraction) # should be in the range 0.2-0.5 print "Autocorrelation time: ", sampler.acor # should be considerably shorter than the total number of steps maxloglike = numpy.max(sampler.lnprobability[:, -nsteps_mcmc:]) avgloglike = numpy.mean(sampler.lnprobability[:, -nsteps_mcmc:] ) # avg.log-likelihood during the pass avgparams = numpy.array([ numpy.mean(sampler.chain[:, -nsteps_mcmc:, i]) for i in range(nparams) ]) rmsparams = numpy.array([ numpy.std(sampler.chain[:, -nsteps_mcmc:, i]) for i in range(nparams) ]) print "Max log-likelihood= %.8g, avg log-likelihood= %.8g" % ( maxloglike, avgloglike) for i in range(nparams): sorted_values = numpy.sort(sampler.chain[:, -nsteps_mcmc:, i], axis=None) print "Parameter %20s avg= %8.5g; one-sigma range = (%8.5f, %8.5f)" \ % (self.model.labels[i], avgparams[i], \ sorted_values[int(len(sorted_values)*0.16)], sorted_values[int(len(sorted_values)*0.84)] ) # plot the chain evolution and the posterior distribution + correlations between parameters self.plot(sampler.chain, sampler.lnprobability, self.model.labels) # check for convergence if not prevmaxloglike is None: if maxloglike-prevmaxloglike < 1.0 and \ abs(avgloglike-prevavgloglike) < 1.0 and \ numpy.all(avgparams-prevavgparams < 0.1) and \ numpy.all(rmsparams-prevrmsparams < 0.1): print "Converged" return prevmaxloglike = maxloglike prevavgloglike = avgloglike prevavgparams = avgparams prevrmsparams = rmsparams def plot(self, chain, loglike, labels): ''' Show the time evolution of parameters carried by the ensemble of walkers (time=number of MC steps), and the posterior distribution of parameters for the last nsteps_mcmc only ''' ndim = chain.shape[2] fig, axes = matplotlib.pyplot.subplots(ndim + 1, 1, sharex=True, figsize=(20, 15)) for i in range(ndim): axes[i].plot(chain[:, :, i].T, color='k', alpha=0.5) axes[i].set_ylabel(self.model.labels[i]) # last panel shows the evolution of log-likelihood for the ensemble of walkers axes[-1].plot(loglike.T, color='k', alpha=0.5) axes[-1].set_ylabel('log(L)') maxloglike = numpy.max(loglike) axes[-1].set_ylim( maxloglike - 3 * ndim, maxloglike ) # restrict the range of log-likelihood arount its maximum fig.tight_layout(h_pad=0.) matplotlib.pyplot.savefig(self.filename + "_chain.png") try: corner.corner(chain[-nsteps_mcmc:].reshape((-1, chain.shape[2])), \ quantiles=[0.16, 0.5, 0.84], labels=labels) matplotlib.pyplot.savefig(self.filename + "_posterior.png") except ValueError as err: print "Can't plot posterior distribution:", err def run(self): if self.values is None: # first attempt a deterministic search to find the best-fit params self.deterministicSearch() self.monteCarloSearch()
#axes[1,indx].legend(loc='lower left') axes[1,indx].set_xlim(rmin, rmax) axes[1,indx].set_ylim(densmin, densmax) axes[1,indx].set_xlabel('$r$') axes[1,indx].set_ylabel(r'$\rho$') axes[1,indx].text( (rmin*rmax)**0.5, densmin*2, label, ha='center') ################ MAIN PROGRAM ################## #base = "gs010_bs050_rcrs100_rarcinf_core_0400mpc3_df" if len(sys.argv)<=1: print("Provide the data file name as the command-line argument") exit() agama.setUnits(mass=1, length=1, velocity=1) base = sys.argv[1] model = ModelParams(base) rmin = 0.01 rmax = 100. velmin = 0. velmax = 40. radii = numpy.logspace(numpy.log10(rmin), numpy.log10(rmax), 25) midradii = (radii[1:] * radii[:-1])**0.5 xyz = numpy.vstack((radii, numpy.zeros_like(radii), numpy.zeros_like(radii))).T # plot the inferred density of dark matter and its log-slope as functions of radius fig,axes = pyplot.subplots(2, 3, figsize=(12,8)) plot_profiles("6"+base+"/"+base+"_1000_0.dat", 0, '6d, no errors') plot_profiles("5"+base+"/"+base+"_1000_0_err.dat", 1, r'5d, $\delta v$=2 km/s') plot_profiles("3"+base+"/"+base+"_1000_0_err.dat", 2, r'3d, $\delta v$=2 km/s') fig.tight_layout() pyplot.savefig(base+"_darkmatter.png")
class ModelSearcher: ''' Class that encompasses the computation of likelihood for the given parameters, and implements model-searching algorithms (deterministic and MCMC) ''' def __init__(self, filename): self.filename = filename self.model = ModelParams(filename) self.particles = numpy.loadtxt(filename)[:,0:6] if vel_error!=0: print "Assumed error of",vel_error,"km/s in velocity" if phase_space_info_mode <= 5: self.particles[:,2] *= numpy.nan # remove z-coordinate if phase_space_info_mode <= 3: self.particles[:,3:5] *= numpy.nan # remove vx and vy if phase_space_info_mode != 6 or vel_error != 0: self.samples, self.weights = sampleMissingData( numpy.hstack((self.particles, numpy.ones((self.particles.shape[0], 3)) * vel_error)), num_subsamples ) # check if we may restart the search from already existing parameters try: self.values = numpy.loadtxt(self.filename+".best") if self.values.ndim==1: # only one set of parameters - this occurs after the deterministic search self.values = self.values[:-1] # the last column is the likelihood, strip it else: # a number of MCMC walkers, each with its own set of parameters self.values = self.values[:,:-1] print "Loaded from saved file: (nwalkers,nparams)=",self.values.shape except: self.values = None return def modelLikelihood(self, params): ''' Compute the likelihood of model (df+potential specified by scaled params) against the data (array of Nx6 position/velocity coordinates of tracer particles). This is the function to be maximized; if parameters are outside the allowed range, it returns -infinity ''' prior = self.model.prior(params) if prior == -numpy.inf: print "Out of range" return prior try: # Compute log-likelihood of DF with given params against an array of actions pot = self.model.createPotential(params) df = self.model.createDF(params) if phase_space_info_mode == 6: # actions of tracer particles if self.particles.shape[0] > 2000: # create an action finder object for a faster evaluation actions = agama.ActionFinder(pot)(self.particles) else: actions = agama.actions(self.particles, pot) df_val = df(actions) # values of DF for these actions else: # have full phase space info for resampled input particles (missing components are filled in) af = agama.ActionFinder(pot) actions = af(self.samples) # actions of resampled tracer particles # compute values of DF for these actions, multiplied by sample weights df_val = df(actions) * self.weights # compute the weighted sum of likelihoods of all samples for a single particle, # replacing the improbable samples (with NaN as likelihood) with zeroes df_val = numpy.sum(numpy.nan_to_num(df_val.reshape(-1, num_subsamples)), axis=1) loglike = numpy.sum( numpy.log( df_val ) ) if numpy.isnan(loglike): loglike = -numpy.inf loglike += prior print "LogL=%.8g" % loglike return loglike except ValueError as err: print "Exception ", err return -numpy.inf def deterministicSearch(self): ''' do a deterministic search to find the best-fit parameters of potential and distribution function. perform several iterations of search, to avoid getting stuck in a local minimum, until the log-likelihood ceases to improve ''' if self.values is None: # just started self.values = self.model.initValues # get the first guess from the model-scaling object elif self.values.ndim == 2: # entire ensemble of values (after MCMC) self.values = self.values[0,:] # leave only one set of values from the ensemble prevloglike = -deterministicSearchFnc(self.values, self) # initial likelihood while True: print 'Starting deterministic search' result = scipy.optimize.minimize(deterministicSearchFnc, \ self.values, args=(self,), method='Nelder-Mead', \ options=dict(maxfev=nsteps_deterministic, disp=True)) self.values = result.x loglike= -result.fun print 'result=', result.x, 'LogL=', loglike, # store the latest best-fit parameters and their likelihood numpy.savetxt(self.filename+'.best', numpy.hstack((self.values, loglike)).reshape(1,-1), fmt='%.8g') if loglike - prevloglike < 1.0: print 'Converged' return else: print 'Improved log-likelihood by', loglike - prevloglike prevloglike = loglike def monteCarloSearch(self): ''' Explore the parameter space around the best-fit values using the MCMC method ''' if self.values.ndim == 1: # initial coverage of parameter space (dispersion around the current best-fit values) nparams = len(self.values) ensemble = numpy.empty((nwalkers_mcmc, len(self.values))) for i in range(nwalkers_mcmc): while True: # ensure that we initialize walkers with feasible values walker = self.values + (numpy.random.randn(nparams)*initial_disp_mcmc if i>0 else 0) prob = monteCarloSearchFnc(walker, self) if numpy.isfinite(prob): ensemble[i,:] = walker break print '*', self.values = ensemble else: # check that all walkers have finite likelihood prob = numpy.zeros((self.values.shape[0],1)) for i in range(self.values.shape[0]): prob[i,0] = monteCarloSearchFnc(self.values[i,:], self) if not numpy.isfinite(prob[i,0]): print 'Invalid parameters for',i,'-th walker (likelihood is bogus)' else: print prob[i,0] nwalkers, nparams = self.values.shape sampler = emcee.EnsembleSampler(nwalkers, nparams, monteCarloSearchFnc, args=(self,), threads=nthreads_mcmc) prevmaxloglike = None while True: # run several passes until convergence print 'Starting MCMC' sampler.run_mcmc(self.values, nsteps_mcmc) # restart the next pass from the latest values in the Markov chain self.values = sampler.chain[:,-1,:] # store the latest best-fit parameters and their likelihood, and the entire chain for the last nsteps_mcmc steps numpy.savetxt(self.filename+'.best', \ numpy.hstack((self.values, sampler.lnprobability[:,-1].reshape(-1,1))), fmt='%.8g') numpy.savetxt(self.filename+".chain", \ numpy.hstack((sampler.chain[-nsteps_mcmc:].reshape(-1,nparams), sampler.lnprobability[-nsteps_mcmc:].reshape(-1,1))), fmt='%.8g') print "Acceptance fraction: ", numpy.mean(sampler.acceptance_fraction) # should be in the range 0.2-0.5 print "Autocorrelation time: ", sampler.acor # should be considerably shorter than the total number of steps maxloglike = numpy.max(sampler.lnprobability[:,-nsteps_mcmc:]) avgloglike = numpy.mean(sampler.lnprobability[:,-nsteps_mcmc:]) # avg.log-likelihood during the pass avgparams = numpy.array([numpy.mean(sampler.chain[:,-nsteps_mcmc:,i]) for i in range(nparams)]) rmsparams = numpy.array([numpy.std (sampler.chain[:,-nsteps_mcmc:,i]) for i in range(nparams)]) print "Max log-likelihood= %.8g, avg log-likelihood= %.8g" % (maxloglike, avgloglike) for i in range(nparams): sorted_values = numpy.sort(sampler.chain[:,-nsteps_mcmc:,i], axis=None) print "Parameter %20s avg= %8.5g; one-sigma range = (%8.5f, %8.5f)" \ % (self.model.labels[i], avgparams[i], \ sorted_values[int(len(sorted_values)*0.16)], sorted_values[int(len(sorted_values)*0.84)] ) # plot the chain evolution and the posterior distribution + correlations between parameters self.plot(sampler.chain, sampler.lnprobability, self.model.labels) # check for convergence if not prevmaxloglike is None: if maxloglike-prevmaxloglike < 1.0 and \ abs(avgloglike-prevavgloglike) < 1.0 and \ numpy.all(avgparams-prevavgparams < 0.1) and \ numpy.all(rmsparams-prevrmsparams < 0.1): print "Converged" return prevmaxloglike = maxloglike prevavgloglike = avgloglike prevavgparams = avgparams prevrmsparams = rmsparams def plot(self, chain, loglike, labels): ''' Show the time evolution of parameters carried by the ensemble of walkers (time=number of MC steps), and the posterior distribution of parameters for the last nsteps_mcmc only ''' ndim = chain.shape[2] fig,axes = matplotlib.pyplot.subplots(ndim+1, 1, sharex=True, figsize=(20,15)) for i in range(ndim): axes[i].plot(chain[:,:,i].T, color='k', alpha=0.5) axes[i].set_ylabel(self.model.labels[i]) # last panel shows the evolution of log-likelihood for the ensemble of walkers axes[-1].plot(loglike.T, color='k', alpha=0.5) axes[-1].set_ylabel('log(L)') maxloglike = numpy.max(loglike) axes[-1].set_ylim(maxloglike-3*ndim, maxloglike) # restrict the range of log-likelihood arount its maximum fig.tight_layout(h_pad=0.) matplotlib.pyplot.savefig(self.filename+"_chain.png") try: corner.corner(chain[-nsteps_mcmc:].reshape((-1, chain.shape[2])), \ quantiles=[0.16, 0.5, 0.84], labels=labels) matplotlib.pyplot.savefig(self.filename+"_posterior.png") except ValueError as err: print "Can't plot posterior distribution:", err def run(self): if self.values is None: # first attempt a deterministic search to find the best-fit params self.deterministicSearch() self.monteCarloSearch()