def test_blob_shape(backend): with backend() as be: np.random.seed(42) nblobs = 5 model = BlobLogProb(lambda x: np.random.randn(nblobs)) coords = np.random.randn(32, 3) nwalkers, ndim = coords.shape sampler = EnsembleSampler(nwalkers, ndim, model, backend=be) nsteps = 10 sampler.run_mcmc(coords, nsteps) assert sampler.get_blobs().shape == (nsteps, nwalkers, nblobs) model = BlobLogProb(lambda x: np.random.randn()) be.reset(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, model, backend=be) sampler.run_mcmc(coords, nsteps) assert sampler.get_blobs().shape == (nsteps, nwalkers) # HDF and FITS backends don't support the object type if backend in (backends.TempHDFBackend, backends.TempFITSBackend): return model = BlobLogProb(lambda x: "face") be.reset(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, model, backend=be) sampler.run_mcmc(coords, nsteps) assert sampler.get_blobs().shape == (nsteps, nwalkers) model = BlobLogProb(lambda x: (np.random.randn(nblobs), "face")) be.reset(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, model, backend=be) sampler.run_mcmc(coords, nsteps) assert sampler.get_blobs().shape == (nsteps, nwalkers, 2)
def test_blob_shape(backend): with backend() as be: np.random.seed(42) nblobs = 5 model = BlobLogProb(lambda x: np.random.randn(nblobs)) coords = np.random.randn(32, 3) nwalkers, ndim = coords.shape sampler = EnsembleSampler(nwalkers, ndim, model, backend=be) nsteps = 10 sampler.run_mcmc(coords, nsteps) assert sampler.get_blobs().shape == (nsteps, nwalkers, nblobs) model = BlobLogProb(lambda x: np.random.randn()) be.reset(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, model, backend=be) sampler.run_mcmc(coords, nsteps) assert sampler.get_blobs().shape == (nsteps, nwalkers) # HDF backends don't support the object type if backend in (backends.TempHDFBackend, ): return model = BlobLogProb(lambda x: "face") be.reset(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, model, backend=be) sampler.run_mcmc(coords, nsteps) assert sampler.get_blobs().shape == (nsteps, nwalkers) model = BlobLogProb(lambda x: (np.random.randn(nblobs), "face")) be.reset(nwalkers, ndim) sampler = EnsembleSampler(nwalkers, ndim, model, backend=be) sampler.run_mcmc(coords, nsteps) assert sampler.get_blobs().shape == (nsteps, nwalkers, 2)
def test_blob_shape(backend, blob_spec): # HDF backends don't support the object type hdf_able, ragged, blob_shape, func = blob_spec if backend in (backends.TempHDFBackend,) and not hdf_able: return with backend() as be: np.random.seed(42) model = BlobLogProb(func) coords = np.random.randn(32, 3) nwalkers, ndim = coords.shape sampler = EnsembleSampler(nwalkers, ndim, model, backend=be) nsteps = 10 if ragged: with warnings.catch_warnings(): warnings.simplefilter("ignore", DeprecationWarning) sampler.run_mcmc(coords, nsteps) else: sampler.run_mcmc(coords, nsteps) shape = [nsteps, nwalkers] if isinstance(blob_shape, tuple): shape += blob_shape elif blob_shape > 0: shape += [blob_shape] assert sampler.get_blobs().shape == tuple(shape) if not hdf_able: assert sampler.get_blobs().dtype == np.dtype("object")
def mcmc(self, n_walkers, n_iter, n_burnin, lnprob, args, pos0, chain_labels, pool=None, progress=True, out_file=None): """ PARAMETERS ---------- `n_walkers` (int): the number of walkers to use `n_iter` (int): the number of sample iterations to perform post burn-in `n_burnin` (int): the number of burn-in steps to perform `lnprob` (func): function returning the log-posterior probability `args` (tuple): arguments to be passed to `lnprob` `pos0` (list-like): list of initial walker positions `chain_labels` (list of str): list of column labels for the sample chains `out_file` (str, optional): the user has the option to save the sample chains and blobs to a csv or pickle file. This is the path to the output filename. RETURNS ------- `output`: a pandas DataFrame containing all the sample chains and blobs """ n_dim = len(chain_labels) sampler = EnsembleSampler(n_walkers, n_dim, lnprob, args=args, pool=pool, blobs_dtype=[("star", pd.Series)]) # Burn-in phase if n_burnin != 0: print("Burn-in phase...", end="\r") pos, prob, state, blobs = sampler.run_mcmc(pos0, n_burnin) sampler.reset() else: pos = pos0 # Sampling phase pos, prob, state, blobs = sampler.run_mcmc(pos, n_iter, progress=progress) samples = pd.DataFrame(sampler.flatchain, columns=chain_labels) blobs = sampler.get_blobs(flat=True) blobs = pd.concat(blobs["star"], axis=1).T output = pd.concat([samples, blobs], axis=1) if out_file is not None: if "csv" in out_file: output.to_csv(out_file, index=False) else: output.to_pickle(out_file) return sampler, output
def test_blob_shape(backend, blob_spec): # HDF backends don't support the object type if backend in (backends.TempHDFBackend, ) and not blob_spec[0]: return with backend() as be: np.random.seed(42) model = BlobLogProb(blob_spec[2]) coords = np.random.randn(32, 3) nwalkers, ndim = coords.shape sampler = EnsembleSampler(nwalkers, ndim, model, backend=be) nsteps = 10 sampler.run_mcmc(coords, nsteps) shape = [nsteps, nwalkers] if blob_spec[1] > 0: shape += [blob_spec[1]] assert sampler.get_blobs().shape == tuple(shape)
class Sampler: """ wrapper of emcee.EnsembleSampler. """ def __init__(self, lnpost, p0, nwalkers=120, blobs_dtype=float): """ init """ self.lnpost = lnpost blobs_dtype = blobs_dtype # Note: Here dtype must be specified, otherwise an error happens. #[("lnlike",float),] self.sampler = EnsembleSampler( nwalkers, p0.shape[1], lnpost, blobs_dtype=blobs_dtype ) # NOTE: dtype must be list of tuple (not tuple of tuple) self.p0 = p0 self.p_last = p0 self.ndim = p0.shape[1] def reset_sampler(self): self.sampler.reset() def sample(self, n_sample, burnin=False, use_pool=False): """ execute mcmc for given iteration steps. """ desc = "burnin" if burnin else "sample" with Pool() as pool: self.sampler.pool = pool if use_pool else None iteration = tqdm(self.sampler.sample(self.p_last, iterations=n_sample), total=n_sample, desc=desc) for _ret in iteration: self.p_last = _ret.coords # if uses_emcee3 else _ret[0] # for emcee2 lnposts = _ret.log_prob # if uses_emcee3 else _ret[1] # for emcee2 iteration.set_postfix(lnpost_min=np.min(lnposts), lnpost_max=np.max(lnposts), lnpost_mean=np.mean(lnposts)) if burnin: self.reset_sampler() def get_chain(self, **kwargs): return self.sampler.get_chain(**kwargs) def get_log_prob(self, **kwargs): return self.sampler.get_log_prob(**kwargs) def get_blobs(self, **kwargs): return self.sampler.get_blobs(**kwargs) def get_last_sample(self, **kwargs): return self.sampler.get_last_sample(**kwargs) def _save(self, fname_base): np.save(fname_base + "_chain.npy", self.get_chain()) np.save(fname_base + "_lnprob.npy", self.get_log_prob()) np.save(fname_base + "_lnlike.npy", self.get_blobs()) def save(self, fname_base): ''' Save MCMC results into "<fname_base>_chain/lnprob/lnlike.npy". If fname_base is like "your_directory/your_prefix", create "your_directory" before saving. ''' dirname = os.path.dirname(fname_base) if dirname == "": self._save(fname_base) else: if not os.path.isdir(dirname): os.mkdir(dirname) self._save(fname_base) def save_pickle(self, fname_base, overwrite=False): fname = fname_base + '_.gz' if os.path.exists(fname): if overwrite: warn(f"{fname} exsits already. It will be overwritten.") else: raise RuntimeError( f"{fname} exsits already. If you want to overwrite it, set \"overwrite=True\"." ) data = pickle.dumps(self) with gzip.open(fname, mode='wb') as fp: fp.write(data)