def test_sampler_seed(): nwalkers = 32 ndim = 3 nsteps = 25 np.random.seed(456) coords = np.random.randn(nwalkers, ndim) sampler1 = EnsembleSampler(nwalkers, ndim, normal_log_prob, seed=1234) sampler2 = EnsembleSampler(nwalkers, ndim, normal_log_prob, seed=2) sampler3 = EnsembleSampler(nwalkers, ndim, normal_log_prob, seed=1234) sampler4 = EnsembleSampler(nwalkers, ndim, normal_log_prob, seed=deepcopy(sampler1._random)) for sampler in (sampler1, sampler2, sampler3, sampler4): sampler.run_mcmc(coords, nsteps) for k in ["get_chain", "get_log_prob"]: attr1 = getattr(sampler1, k)() attr2 = getattr(sampler2, k)() attr3 = getattr(sampler3, k)() attr4 = getattr(sampler4, k)() assert not np.allclose(attr1, attr2), "inconsistent {0}".format(k) np.testing.assert_allclose(attr1, attr3, err_msg="inconsistent {0}".format(k)) np.testing.assert_allclose(attr1, attr4, err_msg="inconsistent {0}".format(k))
def test_blob_shape(backend): with backend() as be: np.random.seed(42) nblobs = 5 model = BlobLogProb(lambda x: np.random.randn(nblobs)) coords = np.random.randn(32, 3) nwalkers, ndim = coords.shape sampler = EnsembleSampler(nwalkers, ndim, model, backend=be) nsteps = 10 sampler.run_mcmc(coords, nsteps) assert sampler.get_blobs().shape == (nsteps, nwalkers, nblobs) model = BlobLogProb(lambda x: np.random.randn()) be.reset(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, model, backend=be) sampler.run_mcmc(coords, nsteps) assert sampler.get_blobs().shape == (nsteps, nwalkers) # HDF and FITS backends don't support the object type if backend in (backends.TempHDFBackend, backends.TempFITSBackend): return model = BlobLogProb(lambda x: "face") be.reset(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, model, backend=be) sampler.run_mcmc(coords, nsteps) assert sampler.get_blobs().shape == (nsteps, nwalkers) model = BlobLogProb(lambda x: (np.random.randn(nblobs), "face")) be.reset(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, model, backend=be) sampler.run_mcmc(coords, nsteps) assert sampler.get_blobs().shape == (nsteps, nwalkers, 2)
def test_sampler_generator(): nwalkers = 32 ndim = 3 nsteps = 5 np.random.seed(456) coords = np.random.randn(nwalkers, ndim) seed1 = np.random.default_rng(1) sampler1 = EnsembleSampler(nwalkers, ndim, normal_log_prob, seed=seed1) sampler1.run_mcmc(coords, nsteps) seed2 = np.random.default_rng(1) sampler2 = EnsembleSampler(nwalkers, ndim, normal_log_prob, seed=seed2) sampler2.run_mcmc(coords, nsteps) np.testing.assert_allclose(sampler1.get_chain(), sampler2.get_chain()) np.testing.assert_allclose(sampler1.get_log_prob(), sampler2.get_log_prob())
def run_emcee(x, lnprob, args, nwalkers, nruns, fudge, chain_name, burns, pool=None, nthreads=1, namearray=[], resume=False, w=False): ndim = len(x) p0 = [] if resume == True: p0, ndone = resume_file(chain_name, ndim, nwalkers) nruns -= ndone n = (ndone + burns) / nwalkers else: for i in range(0, nwalkers): shuffle = (10 ** (fudge * (np.random.rand(ndim) - 0.5))) p0 += [list(shuffle * x)] initiate_file(chain_name, ndim, blob_list=namearray, w=w) n = 0 iterations = int(nruns / nwalkers) if pool != None: sampler = EnsembleSampler(nwalkers, ndim, lnprob, args=args, pool=pool) else: sampler = EnsembleSampler(nwalkers, ndim, lnprob, args=args, threads=nthreads) for result in sampler.sample(p0, iterations=iterations, storechain=False): n += 1 if (n > burns / nwalkers): position = result[0] logl = result[1] with fFITS(chain_name, 'rw') as fits: for k in range(position.shape[0]): output = { 'lp': np.array([logl[k]]), 'x': np.array([position[k]]) } for i in range(0, len(namearray)): blob = result[3][k][i] output[namearray[i]] = np.array([blob]) if np.isfinite(logl[k]): fits['MCMC'].append(output) pool.close()
def make_sampler(self): ens_samp=EnsembleSampler(self.nwalkers, len(list(self.model.parameters)), self.model.lnposterior, threads=self.threads, args=[self.data]) if self.seed is not None: seed_state=np.random.mtrand.RandomState(self.seed).get_state() ens_samp.random_state=seed_state return ens_samp
def sample_orbit(sampler, nsteps=0, theta0=None, processes=None): """Run the MCMC sampler Note: For improved parallel performance this function is not implemented as a class method of MCMCSampler. """ with Pool(processes) as pool: worker = EnsembleSampler(sampler.nwalkers, sampler.ndim, sampler.objective, backend=sampler.backend, pool=pool) if worker.backend.iteration == 0: logger.info("Starting new run") if theta0 is None: theta = np.array([[prior.draw() for prior in sampler.priors] for n in range(sampler.nwalkers)]) else: theta = theta0 else: logger.info("Resuming last run") theta = worker._previous_state assert theta is not None if nsteps is not None: assert nsteps >= 0 worker.run_mcmc(theta, nsteps, progress=True) logger.info("finished MCMC run") return worker
def run_burn_in(self, pool=None): # Initialise sampler for burn-in self.burn_in_sampler = EnsembleSampler( self.p['mcmc']['walkers_initial'], len(self.likelihood.mu), self.likelihood, pool=pool ) # Record start time self.burn_start_time = dt.now() # Initialise walkers self.walker_init() # Run the sampler and write progress to file for i, a in enumerate( self.burn_in_sampler.sample(self.pre_burn_position, iterations=self.p['mcmc']['burn_in_iterations']) ): if check_master(pool): with open(self.prog_fname, 'w') as f: f.write(self.write_progress(i, self.p['mcmc']['burn_in_iterations'], self.burn_start_time, 'B')) # Save the chain self.burn_chain = self.burn_in_sampler.chain
def run_sampling(self, pool=None): # Take the time at the start of sampling self.sample_start_time = dt.now() # Respawn the walkers from the final burn-in position self.redistribute_walkers() # Initialise new sampler for final chain self.final_sampler = EnsembleSampler( self.p['mcmc']['walkers_initial'] * self.p['mcmc']['walkers_factor'], len(self.likelihood.mu), self.likelihood, pool=pool ) # Run the sampler and write progress to file for i, a in enumerate( self.final_sampler.sample(self.post_burn_position, iterations=(self.p['mcmc']['final_iterations'] + 10)) ): if check_master(pool): with open(self.prog_fname, 'w') as f: f.write(self.write_progress(i, self.p['mcmc']['final_iterations'] + 10, self.sample_start_time, 'S')) # Record the finish time self.sample_finish_time = dt.now() # Prune the chain to remove dead walkers and drop second burn-in self.format_chain()
def sample_emcee(model, data, nwalkers, nsamples, walker_initial_pos, threads='auto', cleanup_threads=True, seed=None): sampler = EnsembleSampler(nwalkers, len(list(model.parameters)), model.lnposterior, threads=autothreads(threads), args=[data]) if seed is not None: np.random.seed(seed) seed_state = np.random.mtrand.RandomState(seed).get_state() sampler.random_state = seed_state sampler.run_mcmc(walker_initial_pos, nsamples) if sampler.pool is not None and cleanup_threads: sampler.pool.terminate() sampler.pool.join() return sampler
def do_mcmc(self, nwalker=100, nburn=50, nchain=50, threads=1, set_prior=True): # initial walkers for MCMC ndim = 2 pinit = np.zeros((nwalker, ndim)) pinit[:, 0] = np.random.uniform(-10, -2, nwalker) pinit[:, 1] = np.random.uniform(np.log10(self.lc.dt_min / 10), np.log10(self.lc.dt_tot * 10), nwalker) #start sampling sampler = EnsembleSampler(nwalker, ndim, self.lnprob, args=(self.lc, set_prior), threads=threads) # burn-in pos, prob, state = sampler.run_mcmc(pinit, nburn) sampler.reset() # actual samples sampler.run_mcmc(pos, nchain, rstate0=state) self.sampler = sampler self.flatchain = sampler.flatchain self.lnprobability = sampler.lnprobability
def test_errors(backend, nwalkers=32, ndim=3, nsteps=5, seed=1234): # Set up the random number generator. np.random.seed(seed) with backend() as be: # Initialize the ensemble, proposal, and sampler. coords = np.random.randn(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, normal_log_prob, backend=be) # Test for not running. with pytest.raises(AttributeError): sampler.chain with pytest.raises(AttributeError): sampler.lnprobability # What about not storing the chain. sampler.run_mcmc(coords, nsteps, store=False) with pytest.raises(AttributeError): sampler.chain # Now what about if we try to continue using the sampler with an # ensemble of a different shape. sampler.run_mcmc(coords, nsteps, store=False) coords2 = np.random.randn(nwalkers, ndim + 1) with pytest.raises(ValueError): list(sampler.run_mcmc(coords2, nsteps))
def test_blob_shape(backend, blob_spec): # HDF backends don't support the object type hdf_able, ragged, blob_shape, func = blob_spec if backend in (backends.TempHDFBackend,) and not hdf_able: return with backend() as be: np.random.seed(42) model = BlobLogProb(func) coords = np.random.randn(32, 3) nwalkers, ndim = coords.shape sampler = EnsembleSampler(nwalkers, ndim, model, backend=be) nsteps = 10 if ragged: with warnings.catch_warnings(): warnings.simplefilter("ignore", DeprecationWarning) sampler.run_mcmc(coords, nsteps) else: sampler.run_mcmc(coords, nsteps) shape = [nsteps, nwalkers] if isinstance(blob_shape, tuple): shape += blob_shape elif blob_shape > 0: shape += [blob_shape] assert sampler.get_blobs().shape == tuple(shape) if not hdf_able: assert sampler.get_blobs().dtype == np.dtype("object")
def run_sampler(backend, nwalkers=32, ndim=3, nsteps=25, seed=1234, thin_by=1): np.random.seed(seed) coords = np.random.randn(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, normal_log_prob, backend=backend) sampler.run_mcmc(coords, nsteps, thin_by=thin_by) return sampler
def fitMcmc(self, u, v, *theta0, **kwargs): """! @brief Markov chain monte carlo fit method @param u <b>np_1darray</b> Rank data vector @param v <b>np_1darray</b> Rank data vector @param theta0 Initial guess for copula parameter list @return <b>tuple</b> : (<b>np_array</b> Array of MLE fit copula parameters, <b>np_2darray</b> sample array of shape (nparams, nsamples)) """ from emcee import EnsembleSampler wgts = kwargs.pop("weights", np.ones(len(u))) rotation = 0 ln_prob = lambda theta: self._ln_prior(*theta, **kwargs) + \ self._ln_like(u, v, wgts, rotation, *theta) if None in theta0: params0 = self.theta0 else: params0 = theta0 ndim = len(params0) ngen = kwargs.get("ngen", 200) nburn = kwargs.get("nburn", 100) nwalkers = kwargs.get("nwalkers", 50) # initilize walkers in gaussian ball around theta0 pos_0 = [ np.array(params0) + 1e-6 * np.asarray(params0) * np.random.randn(ndim) for i in range(nwalkers) ] emcee_mcmc = EnsembleSampler(nwalkers, ndim, ln_prob) emcee_mcmc.run_mcmc(pos_0, ngen) samples = emcee_mcmc.chain[:, nburn:, :].reshape((-1, ndim)) res = np.mean(samples, axis=0) self._fittedParams = res return res, samples
def sample_mcmc(self, niter: int = 500, thin: int = 5, repeats: int = 1, npop: int = None, population=None, label='MCMC sampling', reset=True, leave=True, save=False, use_tqdm: bool = True): if save and self.result_dir is None: raise ValueError('The MCMC sampler is set to save the results, but the result directory is not set.') if self.sampler is None: if population is not None: pop0 = population elif hasattr(self, '_local_minimization') and self._local_minimization is not None: pop0 = multivariate_normal(self._local_minimization.x, diag(full(len(self.ps), 0.001 ** 2)), size=npop) elif self.de is not None: pop0 = self.de.population.copy() else: raise ValueError('Sample MCMC needs an initial population.') self.sampler = EnsembleSampler(pop0.shape[0], pop0.shape[1], self.lnposterior, vectorize=True) else: pop0 = self.sampler.chain[:,-1,:].copy() for i in tqdm(range(repeats), desc='MCMC sampling', disable=(not use_tqdm)): if reset or i > 0: self.sampler.reset() for _ in tqdm(self.sampler.sample(pop0, iterations=niter, thin=thin), total=niter, desc='Run {:d}/{:d}'.format(i+1, repeats), leave=False, disable=(not use_tqdm)): pass if save: self.save(self.result_dir) pop0 = self.sampler.chain[:,-1,:].copy()
def runsample(self, sed_obs, sed_obs_err, vpi_obs, vpi_obs_err, Lvpi=1.0, Lprior=1.0, nsteps=(1000, 1000, 2000), p0try=None): ndim = 4 # 4 stands for [Teff, logg, Av, DM] nwalkers = len(p0try) # number of chains for i in range(len(nsteps)): if i == 0: # initialize sampler sampler = EnsembleSampler(nwalkers, ndim, costfun, args=(self.r, self.p_bounds, self.Alambda, sed_obs, sed_obs_err, vpi_obs, vpi_obs_err, Lvpi, Lprior)) # guess Av and DM for p0try p0try = np.array([initial_guess(_, self.r, self.Alambda, sed_obs, sed_obs_err) for _ in p0try]) # run sampler pos, _, __ = sampler.run_mcmc(p0try, nsteps[i]) else: # generate new p p_rand = random_p(sampler, nloopmax=1000, method="mle", costfun=costfun, args=(self.r, self.p_bounds, self.Alambda, sed_obs, sed_obs_err, vpi_obs, vpi_obs_err, Lvpi, Lprior)) # reset sampler sampler.reset() # run at new p pos1, lnprob1, rstate1 = sampler.run_mcmc(p_rand, nsteps[i]) return sampler
def run_sampler( backend, nwalkers=32, ndim=3, nsteps=25, seed=1234, thin=None, thin_by=1, progress=False, store=True, ): np.random.seed(seed) coords = np.random.randn(nwalkers, ndim) np.random.seed(None) sampler = EnsembleSampler(nwalkers, ndim, normal_log_prob, backend=backend, seed=seed) sampler.run_mcmc( coords, nsteps, thin=thin, thin_by=thin_by, progress=progress, store=store, ) return sampler
def lt_taum(pTeff, plogLstar, grid_name='MIST', ntrials=10000, burn=0, nwalkers=10): # set up parser parser = argparse.ArgumentParser(description="Given a set of MCMC samples of T, log L, use scipy.kde to approximate the density field.") parser.add_argument("--config", default="config.yaml", help="The config file specifying everything we need.") args = parser.parse_args() f = open(args.config) config = yaml.load(f) f.close() # collate the Teff, logLstar samples (presumed independent here) TlL_samples = np.column_stack((np.log10(pTeff), plogLstar)) # initialize MCMC walkers ndim = 2 age_low, age_high = 0.2, 20. # in Myr Mstar_low, Mstar_high = 0.1, 3. # in Msun p0 = np.array([np.log10(1e6*np.random.uniform(age_low, age_high, nwalkers)), np.log10(np.random.uniform(Mstar_low, Mstar_high, nwalkers))]).T # KDE for Teff, logLstar samples = TlL_samples.T kernel = gaussian_kde(samples) # define the likelihood function def lnprob(p, grid): age, mass = p #if ((age < 0.0) or (mass < 0.0)): # return -np.inf # smooth interpolation in H-R diagram temp = grid.interp_T(p) lL = grid.interp_lL(p) # land outside the grid, you get a NaN; convert to -np.inf to sample if np.isnan(temp) or np.isnan(lL): return -np.inf # evaluate the KDE kernel lnp = kernel.logpdf([temp, lL]) # return the log-likelihood return lnp # *** sample the {age, Mstar} posterior # assign the model grid grid = model_dict[grid_name](**config[grid_name]) # initialize and run the EMCEE sampler sampler = EnsembleSampler(nwalkers, ndim, lnprob, args=[grid]) pos, prob, state = sampler.run_mcmc(p0, ntrials) # flatten the resulting chain to give joint samples of {age, Mstar} ptauMstar = (sampler.chain[:,burn:,:]).reshape(-1, ndim) return ptauMstar
def test_hybrid_sampling(pipe): n_walkers, p0, hybrid_lnpost = get_walkers(pipe, lnpost_fn=lnpost) n_walkers *= 2 p0 = np.concatenate([p0, p0]) with pipe.worker_mode: if pipe._is_controller: sampler = EnsembleSampler(n_walkers, pipe._modellink._n_par, hybrid_lnpost, args=[pipe]) sampler.run_mcmc(p0, 10)
def test_infinite_iterations(backend, nwalkers=32, ndim=3): with backend() as be: coords = np.random.randn(nwalkers, ndim) for state in islice( EnsembleSampler(nwalkers, ndim, normal_log_prob, backend=be).sample(coords, iterations=None, store=False), 10): pass
def test_infinite_iterations_store(backend, nwalkers=32, ndim=3): with backend() as be: coords = np.random.randn(nwalkers, ndim) with pytest.raises(ValueError): next( EnsembleSampler(nwalkers, ndim, normal_log_prob, backend=be).sample(coords, iterations=None, store=True))
def __init__(self, lnpost, p0, keys, nwalkers=120): self.lnpost = lnpost self.sampler = EnsembleSampler(nwalkers, p0.shape[1], lnpost, threads=15) self.p0 = p0 self.p = p0 self.keys = keys self.ndim = len(keys)
def __init__(self,lnpost,p0,nwalkers=120): """ init """ self.lnpost = lnpost blobs_dtype = float # Note: Here dtype must be specified, otherwise an error happens. #[("lnlike",float),] self.sampler = EnsembleSampler(nwalkers,p0.shape[1],lnpost,blobs_dtype=blobs_dtype) # NOTE: dtype must be list of tuple (not tuple of tuple) self.p0 = p0 self.p_last = p0 self.ndim = p0.shape[1]
def test_shapes(backend, moves, nwalkers=32, ndim=3, nsteps=10, seed=1234): # Set up the random number generator. np.random.seed(seed) with backend() as be: # Initialize the ensemble, moves and sampler. coords = np.random.randn(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, normal_log_prob, moves=moves, backend=be) # Run the sampler. sampler.run_mcmc(coords, nsteps) chain = sampler.get_chain() assert len(chain) == nsteps, "wrong number of steps" tau = sampler.get_autocorr_time(quiet=True) assert tau.shape == (ndim, ) # Check the shapes. with pytest.warns(DeprecationWarning): assert sampler.chain.shape == ( nwalkers, nsteps, ndim, ), "incorrect coordinate dimensions" with pytest.warns(DeprecationWarning): assert sampler.lnprobability.shape == ( nwalkers, nsteps, ), "incorrect probability dimensions" assert sampler.get_chain().shape == ( nsteps, nwalkers, ndim, ), "incorrect coordinate dimensions" assert sampler.get_log_prob().shape == ( nsteps, nwalkers, ), "incorrect probability dimensions" assert sampler.acceptance_fraction.shape == ( nwalkers, ), "incorrect acceptance fraction dimensions" # Check the shape of the flattened coords. assert sampler.get_chain(flat=True).shape == ( nsteps * nwalkers, ndim, ), "incorrect coordinate dimensions" assert sampler.get_log_prob(flat=True).shape == ( nsteps * nwalkers, ), "incorrect probability dimensions"
def test_vectorize(): def lp_vec(p): return -0.5 * np.sum(p**2, axis=1) np.random.seed(42) nwalkers, ndim = 32, 3 coords = np.random.randn(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, lp_vec, vectorize=True) sampler.run_mcmc(coords, 10) assert sampler.get_chain().shape == (10, nwalkers, ndim)
def _get_sampler(self, **kwargs): # This is bad, but I have to access this before passing kwargs, # otherwise nwalkers is passed twice. if "nwalkers" in kwargs: del kwargs["nwalkers"] return EnsembleSampler( log_prob_fn=self.likelihood, ndim=self.nparams, nwalkers=self.nwalkers, **kwargs, )
def test_overwrite(seed=1234): np.random.seed(seed) def ll(x): return -0.5 * np.sum(x**2) nwalkers = 64 p0 = np.random.normal(size=(nwalkers, 1)) init = np.copy(p0) sampler = EnsembleSampler(nwalkers, 1, ll) sampler.run_mcmc(p0, 10) assert np.allclose(init, p0)
def mcmc(self, n_walkers, n_iter, n_burnin, lnprob, args, pos0, chain_labels, pool=None, progress=True, out_file=None): """ PARAMETERS ---------- `n_walkers` (int): the number of walkers to use `n_iter` (int): the number of sample iterations to perform post burn-in `n_burnin` (int): the number of burn-in steps to perform `lnprob` (func): function returning the log-posterior probability `args` (tuple): arguments to be passed to `lnprob` `pos0` (list-like): list of initial walker positions `chain_labels` (list of str): list of column labels for the sample chains `out_file` (str, optional): the user has the option to save the sample chains and blobs to a csv or pickle file. This is the path to the output filename. RETURNS ------- `output`: a pandas DataFrame containing all the sample chains and blobs """ n_dim = len(chain_labels) sampler = EnsembleSampler(n_walkers, n_dim, lnprob, args=args, pool=pool, blobs_dtype=[("star", pd.Series)]) # Burn-in phase if n_burnin != 0: print("Burn-in phase...", end="\r") pos, prob, state, blobs = sampler.run_mcmc(pos0, n_burnin) sampler.reset() else: pos = pos0 # Sampling phase pos, prob, state, blobs = sampler.run_mcmc(pos, n_iter, progress=progress) samples = pd.DataFrame(sampler.flatchain, columns=chain_labels) blobs = sampler.get_blobs(flat=True) blobs = pd.concat(blobs["star"], axis=1).T output = pd.concat([samples, blobs], axis=1) if out_file is not None: if "csv" in out_file: output.to_csv(out_file, index=False) else: output.to_pickle(out_file) return sampler, output
def sample(self, niter=500, thin=5, label='MCMC sampling', reset=False): """MCMC sampling using emcee""" if self.sampler is None: self.sampler = EnsembleSampler(self.de.n_pop, self.de.n_par, self.lnposterior) pop0 = self.de.population else: pop0 = self.sampler.chain[:, -1, :].copy() if reset: self.sampler.reset() for _ in tqdm(self.sampler.sample(pop0, iterations=niter, thin=thin), total=niter, desc=label): pass
def __call__(self, nw=None, nt=None, nb=None, ns=None): if nw is None: nw = self.nWalkers else: self.nWalkers = nw self._initial_parameters() if nt is None: nt = self.nThreads if nb is None: nb = self.nBurnin if ns is None: ns = self.nSteps # setup emcee sampler sampler = EnsembleSampler(nw, self.nDim, self.lnProb, threads=nt) if nb: # Run burn-in steps pos, prob, state = sampler.run_mcmc(self.pos0, nb) # Reset the chain to remove the burn-in samples sampler.reset() # from the final position in burn-in chain, sample for nsteps sampler.run_mcmc(pos, ns, rstate0=state) else: # sample for nsteps sampler.run_mcmc(self.pos0, ns) samples = sampler.flatchain lnprobs = sampler.flatlnprobability indxs = np.where(lnprobs > -float_info.max)[0] if self.scale == 'linear': samples = samples[indxs] elif self.scale == 'log': samples = np.power(10, samples[indxs]) else: raise Exception("prior scale must be set") lnprobs = lnprobs[indxs] Xmin = max(lnprobs) indmin = np.where(lnprobs == Xmin)[0][0] vals = samples[indmin] return vals, samples, lnprobs