def test_post_prior(tmpdir): # Generate original chain info: InputDict = { "output": os.path.join(tmpdir, "gaussian"), "force": True, "params": info_params, "sampler": info_sampler, "likelihood": {"one": None}, "prior": {"gaussian": sampled_pdf}} info_post: InputDict = { "output": info["output"], "force": True, "post": {"suffix": "foo", 'skip': 0.1, "remove": {"prior": {"gaussian": None}}, "add": {"prior": {"target": target_pdf_prior}}}} _, sampler = run(info) if mpi.is_main_process(): mcsamples_in = loadMCSamples(info["output"], settings={'ignore_rows': 0.1}) target_mean, target_cov = mpi.share(_get_targets(mcsamples_in)) else: target_mean, target_cov = mpi.share() for mem in [False, True]: post(info_post, sample=sampler.products()["sample"] if mem else None) # Load with GetDist and compare if mpi.is_main_process(): mcsamples = loadMCSamples( info_post["output"] + _post_ + info_post["post"]["suffix"]) new_mean = mcsamples.mean(["a", "b"]) new_cov = mcsamples.getCovMat().matrix mpi.share((new_mean, new_cov)) else: new_mean, new_cov = mpi.share() assert np.allclose(new_mean, target_mean) assert np.allclose(new_cov, target_cov)
def __init__(self, *args, **kwargs): if is_main_process(): Output.__init__(self, *args, **kwargs) if more_than_one_process(): to_broadcast = ( "folder", "prefix", "kind", "ext", "_resuming", "prefix_regexp_str") values = share_mpi([getattr(self, var) for var in to_broadcast] if is_main_process() else None) for name, var in zip(to_broadcast, values): setattr(self, name, var)
def is_installed(cls, **kwargs): log = get_logger(cls.__name__) if not kwargs.get("code", True): return True check = kwargs.get("check", True) func = log.info if check else log.error path: Optional[str] = kwargs["path"] if path is not None and path.lower() == "global": path = None if path and not kwargs.get("allow_global"): if is_main_process(): log.info("Importing *local* PolyChord from '%s'.", path) if not os.path.exists(path): if is_main_process(): func("The given folder does not exist: '%s'", path) return False poly_build_path = cls.get_import_path(path) if not poly_build_path: return False elif not path: if is_main_process(): log.info("Importing *global* PolyChord.") poly_build_path = None else: if is_main_process(): log.info( "Importing *auto-installed* PolyChord (but defaulting to *global*)." ) poly_build_path = cls.get_import_path(path) cls._poly_build_path = poly_build_path try: # TODO: add min_version when polychord module version available return load_module('pypolychord', path=poly_build_path, min_version=None) except ModuleNotFoundError: if path is not None and path.lower() != "global": log.error( "Couldn't find the PolyChord python interface at '%s'. " "Are you sure it has been installed there?", path) elif not check: log.error( "Could not import global PolyChord installation. " "Specify a Cobaya or PolyChord installation path, " "or install the PolyChord Python interface globally with " "'cd /path/to/polychord/ ; python setup.py install'") return False except ImportError as e: log.error( "Couldn't load the PolyChord python interface in %s:\n" "%s", poly_build_path or "global", e) return False except VersionCheckError as e: log.error(str(e)) return False
def __init__(self, info_sampler: SamplerDict, model: Model, output=Optional[Output], packages_path: Optional[str] = None, name: Optional[str] = None): """ Actual initialization of the class. Loads the default and input information and call the custom ``initialize`` method. [Do not modify this one.] """ self._model = model self._output = output self._updated_info = deepcopy_where_possible(info_sampler) super().__init__(info_sampler, packages_path=packages_path, name=name, initialize=False, standalone=False) if not model.parameterization.sampled_params(): self.mpi_warning("No sampled parameters requested! " "This will fail for non-mock samplers.") # Load checkpoint info, if resuming if self.output.is_resuming() and not isinstance(self, Minimizer): checkpoint_info = None if mpi.is_main_process(): try: checkpoint_info = yaml_load_file( self.checkpoint_filename()) if self.get_name() not in checkpoint_info["sampler"]: raise LoggedError( self.log, "Checkpoint file found at '%s' " "but it corresponds to a different sampler.", self.checkpoint_filename()) except (IOError, TypeError): pass checkpoint_info = mpi.share_mpi(checkpoint_info) if checkpoint_info: self.set_checkpoint_info(checkpoint_info) self.mpi_info("Resuming from previous sample!") elif not isinstance(self, Minimizer) and mpi.is_main_process(): try: output.delete_file_or_folder(self.checkpoint_filename()) output.delete_file_or_folder(self.progress_filename()) except (OSError, TypeError): pass self._set_rng() self.initialize() model.set_cache_size(self._get_requested_cache_size()) # Add to the updated info some values which are # only available after initialisation self._updated_info["version"] = self.get_version()
def _run(self): """ Runs the sampler. """ self.mpi_info( "Sampling!" + (" (NB: no accepted step will be saved until %d burn-in samples " % self.burn_in.value + "have been obtained)" if self.burn_in.value else "")) self.n_steps_raw = 0 last_output = 0 last_n = self.n() while last_n < self.max_samples and not self.converged: self.get_new_sample() self.n_steps_raw += 1 if self.output_every.unit: # if output_every in sec, print some info and dump at fixed time intervals now = datetime.datetime.now() now_sec = now.timestamp() if now_sec >= last_output + self.output_every.value: self.do_output(now) last_output = now_sec if self.current_point.weight == 1: # have added new point # Callback function n = self.n() if n != last_n: # and actually added last_n = n if (hasattr(self, "callback_function_callable") and not (max(n, 1) % self.callback_every.value) and self.current_point.weight == 1): self.callback_function_callable(self) self.last_point_callback = len(self.collection) # Checking convergence and (optionally) learning # the covmat of the proposal if self.check_all_ready(): self.check_convergence_and_learn_proposal() if is_main_process(): self.i_learn += 1 if last_n == self.max_samples: self.log.info("Reached maximum number of accepted steps allowed. " "Stopping.") # Make sure the last batch of samples ( < output_every (not in sec)) are written self.collection.out_update() if more_than_one_process(): Ns = (lambda x: np.array(get_mpi_comm().gather(x)))(self.n()) if not is_main_process(): Ns = [] else: Ns = [self.n()] self.mpi_info("Sampling complete after %d accepted steps.", sum(Ns))
def _load_covmat(self, prefer_load_old, auto_params=None): if prefer_load_old and os.path.exists(self.covmat_filename()): if is_main_process(): covmat = np.atleast_2d(np.loadtxt(self.covmat_filename())) else: covmat = None covmat = share_mpi(covmat) self.mpi_info("Covariance matrix from previous sample.") return covmat, [] else: return share_mpi( self.initial_proposal_covmat( auto_params=auto_params) if is_main_process() else None)
def lock_error(self): if not self.has_lock(): assert self.lock_error_file try: # make lock_err so process holding lock can check # another process had an error with open(self.lock_error_file, 'wb'): pass except OSError: pass if mpi.get_mpi(): import mpi4py else: mpi4py = None if mpi.is_main_process() and use_portalocker() is None: self.log.warning('install "portalocker" for better file lock control.') raise LoggedError(self.log, "File %s is locked.\nYou may be running multiple jobs with " "the same output when you intended to run with MPI. " "Check that mpi4py is correctly installed and " "configured (using the same mpi as mpirun/mpiexec); " "e.g. try the test at\n" "https://cobaya.readthedocs.io/en/latest/installation." "html#mpi-parallelization-optional-but-encouraged\n" + ("Your current mpi4py config is:" "\n %s" % mpi4py.get_config() if mpi4py is not None else "mpi4py is NOT currently installed."), self.lock_file)
def reload_updated_info(self, cache=False, use_cache=False) -> Optional[InputDict]: if mpi.is_main_process(): if use_cache and hasattr(self, "_old_updated_info"): return self._old_updated_info try: if os.path.isfile(self.dump_file_updated): loaded = load_info_dump(self.dump_file_updated) else: loaded = yaml_load_file(self.file_updated) # type: ignore if cache: self._old_updated_info = deepcopy_where_possible(loaded) return loaded except IOError: if cache: self._old_updated_info = None return None else: # Only cached possible when non main process if not use_cache: raise LoggedError( self.log, "Cannot call `reload_updated_info` from " "non-main process unless cached version " "(`use_cache=True`) requested.") return getattr(self, "_old_updated_info", None)
def products(self): r""" Returns a dictionary containing: - ``minimum``: :class:`OnePoint` that maximizes the posterior or likelihood (depending on ``ignore_prior``). - ``result_object``: instance of results class of `scipy <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html>`_ or `pyBOBYQA <https://numericalalgorithmsgroup.github.io/pybobyqa/build/html/userguide.html>`_. - ``M``: inverse of the affine transform matrix (see below). ``None`` if no transformation applied. - ``X0``: offset of the affine transform matrix (see below) ``None`` if no transformation applied. If non-trivial ``M`` and ``X0`` are returned, this means that the minimizer has been working on an affine-transformed parameter space :math:`x^\prime`, from which the real space points can be obtained as :math:`x = M x^\prime + X_0`. This inverse transformation needs to be applied to the coordinates appearing inside the ``result_object``. """ if is_main_process(): return { "minimum": self.minimum, "result_object": self.result, "M": self._inv_affine_transform_matrix, "X0": self._affine_transform_baseline }
def check_force_resume(cls, output, info=None): """ Performs the necessary checks on existing files if resuming or forcing (including deleting some output files when forcing). """ if not output: return resuming: Optional[bool] if mpi.is_main_process(): resuming = False if output.force: cls.delete_output_files(output, info=info) elif any(find_with_regexp(regexp, root or output.folder) for (regexp, root) in cls.output_files_regexps(output=output, info=info, minimal=True)): if output.is_resuming(): output.log.info("Found an old sample. Resuming.") resuming = True else: raise LoggedError( output.log, "Delete the previous output manually, automatically " "('-%s', '--%s', '%s: True')" % ( "force"[0], "force", "force") + " or request resuming ('-%s', '--%s', '%s: True')" % ( "resume"[0], "resume", "resume")) else: if output.is_resuming(): output.log.info( "Did not find an old sample. Cleaning up and starting anew.") # Clean up old files, and set resuming=False, # regardless of requested value cls.delete_output_files(output, info=info) else: resuming = None output.set_resuming(resuming)
def test_minimize_gaussian(tmpdir): maxloglik = 0 for method in reversed(valid_methods): NoisyCovLike.noise = 0.005 if method == 'bobyqa' else 0 info: InputDict = { 'likelihood': { 'like': NoisyCovLike }, "sampler": { "minimize": { "ignore_prior": True, "method": method } } } products = run(info).sampler.products() error = abs(maxloglik - -products["minimum"]["minuslogpost"]) assert error < 0.01 info['output'] = os.path.join(tmpdir, 'testmin') products = run(info, force=True).sampler.products() if mpi.is_main_process(): from getdist.types import BestFit res = BestFit(info['output'] + '.bestfit').getParamDict() assert np.isclose(res["loglike"], products["minimum"]["minuslogpost"]) for p, v in list(res.items())[:-2]: assert np.isclose(products["minimum"][p], v)
def random_cov(ranges, O_std_min=1e-2, O_std_max=1, n_modes=1, mpi_warn=True): """ Returns a random covariance matrix, with standard deviations sampled log-uniformly from the length of the parameter ranges times ``O_std_min`` and ``O_std_max``, and uniformly sampled correlation coefficients between ``rho_min`` and ``rho_max``. The output of this function can be used directly as the value of the option ``cov`` of the :class:`likelihoods.gaussian`. If ``n_modes>1``, returns a list of such matrices. """ if not is_main_process() and mpi_warn: print("WARNING! " "Using with MPI: different process will produce different random results.") dim = len(ranges) scales = np.array([r[1] - r[0] for r in ranges]) cov = [] for i in range(n_modes): stds = scales * 10 ** (uniform.rvs(size=dim, loc=np.log10(O_std_min), scale=np.log10(O_std_max / O_std_min))) this_cov = np.diag(stds).dot( (random_correlation.rvs(dim * stds / sum(stds)) if dim > 1 else np.eye(1)) .dot(np.diag(stds))) # Symmetrize (numerical noise is usually introduced in the last step) cov += [(this_cov + this_cov.T) / 2] if n_modes == 1: cov = cov[0] return cov
def write_checkpoint(self): if is_main_process() and self.output: checkpoint_filename = self.checkpoint_filename() self.dump_covmat(self.proposer.get_covariance()) checkpoint_info = { "sampler": { self.get_name(): dict([ ("converged", self.converged), ("Rminus1_last", self.Rminus1_last), ( "burn_in", ( self.burn_in. value # initial: repeat burn-in if not finished if not self.n() and self.burn_in_left else 0) ), # to avoid overweighting last point of prev. run ("mpi_size", get_mpi_size()) ]) } } yaml_dump_file(checkpoint_filename, checkpoint_info, error_if_exists=False) if not self.progress.empty: with open(self.progress_filename(), "a", encoding="utf-8") as progress_file: fmts = {"N": lambda x: "{:9d}".format(x)} # TODO: next one is ignored when added to the dict # "acceptance_rate": lambda x: "{:15.8g}".format(x)} progress_file.write( self.progress.tail(1).to_string( header=False, index=False, formatters=fmts) + "\n") self.log.debug( "Dumped checkpoint and progress info, and current covmat.")
def bib_script(): from cobaya.mpi import is_main_process if not is_main_process(): return warn_deprecation() # Parse arguments and launch import argparse parser = argparse.ArgumentParser( prog="cobaya bib", description= "Prints bibliography to be cited for a component or input file.") parser.add_argument( "components_or_files", action="store", nargs="+", metavar="component_name or input_file.yaml", help="Component(s) or input file(s) whose bib info is requested.") kind_opt, kind_opt_ishort = "kind", 0 parser.add_argument( "-" + kind_opt[kind_opt_ishort], "--" + kind_opt, action="store", nargs=1, default=None, metavar="component_kind", help=("If component name given, " "kind of component whose bib is requested: " + ", ".join(['%s' % kind for kind in kinds]) + ". " + "Use only when component name is not unique (it would fail).")) arguments = parser.parse_args() # Case of files are_yaml = [(os.path.splitext(f)[1] in _yaml_extensions) for f in arguments.components_or_files] if all(are_yaml): infos = [load_input(f) for f in arguments.components_or_files] print(prettyprint_bib(*get_bib_info(*infos))) elif not any(are_yaml): if arguments.kind: arguments.kind = arguments.kind[0].lower() for component in arguments.components_or_files: try: print( create_banner(component, symbol=_default_symbol, length=_default_length)) print(get_bib_component(component, arguments.kind)) return except: if not arguments.kind: print( "Specify its kind with '--%s [component_kind]'." % kind_opt + "(NB: all requested components must have the same kind, " "or be requested separately).") print("") else: print("Give either a list of input yaml files, " "or of component names (not a mix of them).") return 1 return
def run(self): """ Prepares the posterior function and calls ``PolyChord``'s ``run`` function. """ # Prepare the polychord likelihood def loglikelihood(params_values): result = self.model.logposterior(params_values) loglikes = result.loglikes if len(loglikes) != self.n_likes: loglikes = np.full(self.n_likes, np.nan) derived = result.derived if len(derived) != self.n_derived: derived = np.full(self.n_derived, np.nan) derived = list(derived) + list(result.logpriors) + list(loglikes) return max(loglikes.sum(), self.pc_settings.logzero), derived def prior(cube): theta = np.empty_like(cube) for i, xi in enumerate(np.array(cube)[self.ordering]): theta[i] = self.model.prior.pdf[i].ppf(xi) return theta if is_main_process(): self.dump_paramnames(self.raw_prefix) sync_processes() self.mpi_info("Calling PolyChord...") self.pc.run_polychord(loglikelihood, self.nDims, self.nDerived, self.pc_settings, prior, self.dumper) self.process_raw_output()
def _load_covmat(self, from_old_chain, default_not_found=None, auto_params=None): if from_old_chain and os.path.exists(self.covmat_filename()): if is_main_process(): covmat = np.atleast_2d(np.loadtxt(self.covmat_filename())) else: covmat = None covmat = share_mpi(covmat) self.mpi_info("Covariance matrix from checkpoint.") return covmat, [] elif default_not_found is not None: return default_not_found, [] else: return share_mpi( self.initial_proposal_covmat( auto_params=auto_params) if is_main_process() else None)
def info_random_gaussian_mixture( ranges, n_modes=1, input_params_prefix="", output_params_prefix="", O_std_min=1e-2, O_std_max=1, derived=False, mpi_aware=True): """ Wrapper around ``random_mean`` and ``random_cov`` to generate the likelihood and parameter info for a random Gaussian. If ``mpi_aware=True``, it draws the random stuff only once, and communicates it to the rest of the MPI processes. """ if is_main_process() or not mpi_aware: cov = random_cov(ranges, n_modes=n_modes, O_std_min=O_std_min, O_std_max=O_std_max, mpi_warn=False) if n_modes == 1: cov = [cov] # Make sure it stays away from the edges mean = [[]] * n_modes for i in range(n_modes): std = np.sqrt(cov[i].diagonal()) factor = 3 ranges_mean = [[l[0] + factor * s, l[1] - +factor * s] for l, s in zip(ranges, std)] # If this implies min>max, take the centre ranges_mean = [ (l if l[0] <= l[1] else 2 * [(l[0] + l[1]) / 2]) for l in ranges_mean] mean[i] = random_mean(ranges_mean, n_modes=1, mpi_warn=False) if mpi_aware: mean, cov = share_mpi((mean, cov) if is_main_process() else None) dimension = len(ranges) info = {kinds.likelihood: {"gaussian_mixture": { "means": mean, "covs": cov, _input_params_prefix: input_params_prefix, _output_params_prefix: output_params_prefix, "derived": derived}}} info[_params] = dict( # sampled [(input_params_prefix + "_%d" % i, {"prior": {"min": ranges[i][0], "max": ranges[i][1]}, "latex": r"\alpha_{%i}" % i}) for i in range(dimension)] + # derived ([[output_params_prefix + "_%d" % i, {"min": -3, "max": 3, "latex": r"\beta_{%i}" % i}] for i in range(dimension * n_modes)] if derived else [])) return info
def reload_updated_info(self, *args, **kwargs): if is_main_process(): return Output.reload_updated_info(self, *args, **kwargs) else: # Only cached possible when non main process if not kwargs.get("use_cache"): raise ValueError( "Cannot call `reload_updated_info` from non-main process " "unless cached version (`use_cache=True`) requested.") return self._old_updated_info
def products(self): """ Auxiliary function to define what should be returned in a scripted call. Returns: The sample ``SampleCollection`` containing the accepted steps. """ products = {"sample": self.collection} if is_main_process(): products["progress"] = self.progress return products
def check_force_resume(cls, output, info=None): """ Performs the necessary checks on existing files if resuming or forcing (including deleting some output files when forcing). """ if output.is_resuming(): if mpi.is_main_process(): raise LoggedError( output.log, "Minimizer does not support resuming. " "If you want to start over, force " "('-f', '--force', 'force: True')") super().check_force_resume(output, info=info)
def delete_output_files(cls, output, info=None): if output and is_main_process(): for (regexp, root) in cls.output_files_regexps(output, info=info): # Special case: CovmatSampler's may have been given a covmat with the same # name that the output one. In that case, don't delete it! if issubclass(cls, CovmatSampler) and info: if regexp.pattern.rstrip("$").endswith(_covmat_extension): covmat_file = info.get("covmat", "") if (isinstance(covmat_file, str) and covmat_file == getattr( regexp.match(covmat_file), "group", lambda: None)()): continue output.delete_with_regexp(regexp, root)
def check_all_ready(self): """ Checks if the chain(s) is(/are) ready to check convergence and, if requested, learn a new covariance matrix for the proposal distribution. """ msg_ready = ("Ready to check convergence" + (" and learn a new proposal covmat" if self.learn_proposal else "")) n = len(self.collection) # If *just* (weight==1) got ready to check+learn if not (n % self.learn_every.value) and n > 0: self.log.info("Learn + convergence test @ %d samples accepted.", n) if more_than_one_process(): self.been_waiting += 1 if self.been_waiting > self.max_waiting: self.send_error_signal() raise LoggedError( self.log, "Waiting for too long for all chains to be ready. " "Maybe one of them is stuck or died unexpectedly?") self.model.dump_timing() # If not MPI size > 1, we are ready if not more_than_one_process(): self.log.debug(msg_ready) return True # Error check in case any process already sent an error signal self.check_error_signal() # If MPI, tell the rest that we are ready -- we use a "gather" # ("reduce" was problematic), but we are in practice just pinging if not hasattr(self, "req"): # just once! self.all_ready = np.empty(get_mpi_size()) self.req = get_mpi_comm().Iallgather(np.array([1.]), self.all_ready) self.log.info(msg_ready + " (waiting for the rest...)") # If all processes are ready to learn (= communication finished) if self.req.Test() if hasattr(self, "req") else False: # Sanity check: actually all processes have finished assert np.all(self.all_ready == 1), ( "This should not happen! Notify the developers. (Got %r)", self.all_ready) if more_than_one_process() and is_main_process(): self.log.info("All chains are r" + msg_ready[1:]) delattr(self, "req") self.been_waiting = 0 # Another error check, in case the error occurred after sending "ready" signal self.check_error_signal() # Just in case, a barrier here sync_processes() return True return False
def info_random_gaussian_mixture(ranges, n_modes=1, input_params_prefix="", output_params_prefix="", O_std_min=1e-2, O_std_max=1, derived=False, mpi_aware=True, random_state=None): """ Wrapper around ``random_mean`` and ``random_cov`` to generate the likelihood and parameter info for a random Gaussian. If ``mpi_aware=True``, it draws the random stuff only once, and communicates it to the rest of the MPI processes. """ cov: Any mean: Any if is_main_process() or not mpi_aware: cov = random_cov(ranges, n_modes=n_modes, O_std_min=O_std_min, O_std_max=O_std_max, mpi_warn=False, random_state=random_state) if n_modes == 1: cov = [cov] # Make sure it stays away from the edges mean = [[]] * n_modes for i in range(n_modes): std = np.sqrt(cov[i].diagonal()) factor = 3 ranges_mean = [[r[0] + factor * s, r[1] - +factor * s] for r, s in zip(ranges, std)] # If this implies min>max, take the centre ranges_mean = [ (r if r[0] <= r[1] else 2 * [(r[0] + r[1]) / 2]) for r in ranges_mean] mean[i] = random_mean(ranges_mean, n_modes=1, mpi_warn=False, random_state=random_state) else: mean, cov = None, None if mpi_aware: mean, cov = share_mpi((mean, cov)) dimension = len(ranges) info: InputDict = {"likelihood": {"gaussian_mixture": { "means": mean, "covs": cov, "input_params_prefix": input_params_prefix, "output_params_prefix": output_params_prefix, "derived": derived}}, "params": dict( # sampled tuple((input_params_prefix + "_%d" % i, {"prior": {"min": ranges[i][0], "max": ranges[i][1]}, "latex": r"\alpha_{%i}" % i}) for i in range(dimension)) + # derived (tuple((output_params_prefix + "_%d" % i, {"latex": r"\beta_{%i}" % i}) for i in range(dimension * n_modes)) if derived else ()))} return info
def products(self): """ Auxiliary function to define what should be returned in a scripted call. Returns: The sample ``SampleCollection`` containing the sequentially discarded live points. """ if is_main_process(): products = { "sample": self.collection, "logZ": self.logZ, "logZstd": self.logZstd} if self.pc_settings.do_clustering: products.update({"clusters": self.clusters}) return products else: return {}
def _set_rng(self): """ Initialize random generator stream. For seeded runs, sets the state reproducibly. """ # TODO: checkpointing save of self._rng.bit_generator.state per process if mpi.is_main_process(): seed = getattr(self, "seed", None) if seed is not None: self.mpi_warning("This run has been SEEDED with seed %s", seed) ss = SeedSequence(seed) child_seeds = ss.spawn(mpi.size()) else: child_seeds = None ss = mpi.scatter(child_seeds) self._entropy = ss.entropy # for debugging store for reproducibility self._rng = default_rng(ss)
def set_difflogmax(): nonlocal difflogmax difflog = (collection_in[OutPar.minuslogpost].to_numpy( dtype=np.float64)[:len(collection_out)] - collection_out[OutPar.minuslogpost].to_numpy(dtype=np.float64)) difflogmax = np.max(difflog) if abs(difflogmax) < 1: difflogmax = 0 # keep simple when e.g. very similar log.debug("difflogmax: %g", difflogmax) if mpi.more_than_one_process(): difflogmax = max(mpi.allgather(difflogmax)) if mpi.is_main_process(): log.debug("Set difflogmax: %g", difflogmax) _weights = np.exp(difflog - difflogmax) importance_weights.extend(_weights) collection_out.reweight(_weights)
def write_checkpoint(self): if is_main_process() and self.output: checkpoint_filename = self.checkpoint_filename() self.dump_covmat(self.proposer.get_covariance()) checkpoint_info = {kinds.sampler: {self.get_name(): dict([ ("converged", bool(self.converged)), ("Rminus1_last", self.Rminus1_last), ("burn_in", (self.burn_in.value # initial: repeat burn-in if not finished if not self.n() and self.burn_in_left else 0)), # to avoid overweighting last point of prev. run ("mpi_size", get_mpi_size())])}} yaml_dump_file(checkpoint_filename, checkpoint_info, error_if_exists=False) if not self.progress.empty: with open(self.progress_filename(), "a", encoding="utf-8") as progress_file: progress_file.write( self.progress.tail(1).to_string(header=False, index=False) + "\n") self.log.debug("Dumped checkpoint and progress info, and current covmat.")
def random_mean(ranges, n_modes=1, mpi_warn=True): """ Returns a uniformly sampled point (as an array) within a list of bounds ``ranges``. The output of this function can be used directly as the value of the option ``mean`` of the :class:`likelihoods.gaussian`. If ``n_modes>1``, returns an array of such points. """ if not is_main_process() and mpi_warn: print("WARNING! " "Using with MPI: different process will produce different random results.") mean = np.array([uniform.rvs(loc=r[0], scale=r[1] - r[0], size=n_modes) for r in ranges]) mean = mean.T if n_modes == 1: mean = mean[0] return mean
def initialize(self): """Imports the PolyChord sampler and prepares its arguments.""" # Allow global import if no direct path specification allow_global = not self.path if not self.path and self.packages_path: self.path = self.get_path(self.packages_path) self.pc = self.is_installed(path=self.path, allow_global=allow_global) if not self.pc: raise NotInstalledError( self.log, "Could not find PolyChord. Check error message above. " "To install it, run 'cobaya-install polychord --%s " "[packages_path]'", _packages_path_arg) # Prepare arguments and settings from pypolychord.settings import PolyChordSettings self.n_sampled = len(self.model.parameterization.sampled_params()) self.n_derived = len(self.model.parameterization.derived_params()) self.n_priors = len(self.model.prior) self.n_likes = len(self.model.likelihood) self.nDims = self.model.prior.d() self.nDerived = (self.n_derived + self.n_priors + self.n_likes) if self.logzero is None: self.logzero = np.nan_to_num(-np.inf) if self.max_ndead == np.inf: self.max_ndead = -1 self._quants_d_units = ["nlive", "max_ndead"] for p in self._quants_d_units: if getattr(self, p) is not None: setattr( self, p, NumberWithUnits(getattr(self, p), "d", scale=self.nDims, dtype=int).value) self._quants_nlive_units = ["nprior"] for p in self._quants_nlive_units: if getattr(self, p) is not None: setattr( self, p, NumberWithUnits(getattr(self, p), "nlive", scale=self.nlive, dtype=int).value) # Fill the automatic ones if getattr(self, "feedback", None) is None: values = { logging.CRITICAL: 0, logging.ERROR: 0, logging.WARNING: 0, logging.INFO: 1, logging.DEBUG: 2 } self.feedback = values[self.log.getEffectiveLevel()] # Prepare output folders and prefixes if self.output: self.file_root = self.output.prefix self.read_resume = self.output.is_resuming() else: output_prefix = share_mpi( hex(int(random() * 16**6))[2:] if is_main_process() else None) self.file_root = output_prefix # dummy output -- no resume! self.read_resume = False self.base_dir = self.get_base_dir(self.output) self.raw_clusters_dir = os.path.join(self.base_dir, self._clusters_dir) self.output.create_folder(self.base_dir) if self.do_clustering: self.clusters_folder = self.get_clusters_dir(self.output) self.output.create_folder(self.clusters_folder) self.mpi_info("Storing raw PolyChord output in '%s'.", self.base_dir) # Exploiting the speed hierarchy if self.blocking: blocks, oversampling_factors = self.model.check_blocking( self.blocking) else: if self.measure_speeds: self.model.measure_and_set_speeds(n=self.measure_speeds) blocks, oversampling_factors = self.model.get_param_blocking_for_sampler( oversample_power=self.oversample_power) self.mpi_info("Parameter blocks and their oversampling factors:") max_width = len(str(max(oversampling_factors))) for f, b in zip(oversampling_factors, blocks): self.mpi_info("* %" + "%d" % max_width + "d : %r", f, b) # Save blocking in updated info, in case we want to resume self._updated_info["blocking"] = list(zip(oversampling_factors, blocks)) blocks_flat = list(chain(*blocks)) self.ordering = [ blocks_flat.index(p) for p in self.model.parameterization.sampled_params() ] self.grade_dims = [len(block) for block in blocks] # Steps per block # NB: num_repeats is ignored by PolyChord when int "grade_frac" given, # so needs to be applied by hand. # In num_repeats, `d` is interpreted as dimension of each block self.grade_frac = [ int(o * read_dnumber(self.num_repeats, dim_block)) for o, dim_block in zip(oversampling_factors, self.grade_dims) ] # Assign settings pc_args = [ "nlive", "num_repeats", "nprior", "do_clustering", "precision_criterion", "max_ndead", "boost_posterior", "feedback", "logzero", "posteriors", "equals", "compression_factor", "cluster_posteriors", "write_resume", "read_resume", "write_stats", "write_live", "write_dead", "base_dir", "grade_frac", "grade_dims", "feedback", "read_resume", "base_dir", "file_root", "grade_frac", "grade_dims" ] # As stated above, num_repeats is ignored, so let's not pass it pc_args.pop(pc_args.index("num_repeats")) self.pc_settings = PolyChordSettings( self.nDims, self.nDerived, seed=(self.seed if self.seed is not None else -1), **{ p: getattr(self, p) for p in pc_args if getattr(self, p) is not None }) # prior conversion from the hypercube bounds = self.model.prior.bounds( confidence_for_unbounded=self.confidence_for_unbounded) # Check if priors are bounded (nan's to inf) inf = np.where(np.isinf(bounds)) if len(inf[0]): params_names = self.model.parameterization.sampled_params() params = [params_names[i] for i in sorted(list(set(inf[0])))] raise LoggedError( self.log, "PolyChord needs bounded priors, but the parameter(s) '" "', '".join(params) + "' is(are) unbounded.") locs = bounds[:, 0] scales = bounds[:, 1] - bounds[:, 0] # This function re-scales the parameters AND puts them in the right order self.pc_prior = lambda x: (locs + np.array(x)[self.ordering] * scales ).tolist() # We will need the volume of the prior domain, since PolyChord divides by it self.logvolume = np.log(np.prod(scales)) # Prepare callback function if self.callback_function is not None: self.callback_function_callable = (get_external_function( self.callback_function)) self.last_point_callback = 0 # Prepare runtime live and dead points collections self.live = Collection(self.model, None, name="live", initial_size=self.pc_settings.nlive) self.dead = Collection(self.model, self.output, name="dead") # Done! if is_main_process(): self.log.debug("Calling PolyChord with arguments:") for p, v in inspect.getmembers(self.pc_settings, lambda a: not (callable(a))): if not p.startswith("_"): self.log.debug(" %s: %s", p, v) self.mpi_info("Initialized!")
def process_raw_output(self): """ Loads the sample of live points from ``PolyChord``'s raw output and writes it (if ``txt`` output requested). """ if is_main_process(): self.log.info( "Loading PolyChord's results: samples and evidences.") self.dump_paramnames(self.raw_prefix) self.collection = self.save_sample(self.raw_prefix + ".txt", "1") # Load clusters, and save if output if self.pc_settings.do_clustering: self.clusters = {} clusters_raw_regexp = re.compile( re.escape(self.pc_settings.file_root + "_") + r"\d+\.txt") cluster_raw_files = sorted( find_with_regexp(clusters_raw_regexp, os.path.join(self.pc_settings.base_dir, self._clusters_dir), walk_tree=True)) for f in cluster_raw_files: i = int(f[f.rfind("_") + 1:-len(".txt")]) if self.output: old_folder = self.output.folder self.output.folder = self.clusters_folder sample = self.save_sample(f, str(i)) if self.output: self.output.folder = old_folder self.clusters[i] = {"sample": sample} # Prepare the evidence(s) and write to file pre = "log(Z" active = "(Still active)" with open(self.raw_prefix + ".stats", "r", encoding="utf-8-sig") as statsfile: lines = [l for l in statsfile.readlines() if l.startswith(pre)] for l in lines: logZ, logZstd = [ float(n.replace(active, "")) for n in l.split("=")[-1].split("+/-") ] component = l.split("=")[0].lstrip(pre + "_").rstrip(") ") if not component: self.logZ, self.logZstd = logZ, logZstd elif self.pc_settings.do_clustering: i = int(component) self.clusters[i]["logZ"], self.clusters[i][ "logZstd"] = logZ, logZstd self.log.debug( "RAW log(Z) = %g +/- %g ; RAW Z in [%.8g, %.8g] (68%% C.L. log-gaussian)", self.logZ, self.logZstd, *[np.exp(self.logZ + n * self.logZstd) for n in [-1, 1]]) self._correct_unphysical_fraction() if self.output: out_evidences = dict(logZ=self.logZ, logZstd=self.logZstd) if getattr(self, "clusters", None): out_evidences["clusters"] = {} for i in sorted(list(self.clusters)): out_evidences["clusters"][i] = dict( logZ=self.clusters[i]["logZ"], logZstd=self.clusters[i]["logZstd"]) fname = os.path.join(self.output.folder, self.output.prefix + _evidence_extension) yaml_dump_file(fname, out_evidences, comment="log-evidence", error_if_exists=False) # TODO: try to broadcast the collections # if get_mpi(): # bcast_from_0 = lambda attrname: setattr(self, # attrname, get_mpi_comm().bcast(getattr(self, attrname, None), root=0)) # map(bcast_from_0, ["collection", "logZ", "logZstd", "clusters"]) if is_main_process(): self.log.info( "Finished! Raw PolyChord output stored in '%s', " "with prefix '%s'", self.pc_settings.base_dir, self.pc_settings.file_root) self.log.info( "log(Z) = %g +/- %g ; Z in [%.8g, %.8g] (68%% C.L. log-gaussian)", self.logZ, self.logZstd, *[np.exp(self.logZ + n * self.logZstd) for n in [-1, 1]])