def __init__(self, info_params, info_likelihood, info_prior=None, info_theory=None, modules=None, timing=None, allow_renames=True): self.log = logging.getLogger(self.__class__.__name__) self._full_info = { _params: deepcopy(info_params), _likelihood: deepcopy(info_likelihood) } if not self._full_info[_likelihood]: self.log.error("No likelihood requested!") raise HandledException for like in self._full_info[_likelihood].values(): like.pop(_params) for k, v in ((_prior, info_prior), (_theory, info_theory), (_path_install, modules), (_timing, timing)): if v not in (None, {}): self._full_info[k] = deepcopy(v) self.parameterization = Parameterization(info_params, allow_renames=allow_renames) self.prior = Prior(self.parameterization, info_prior) self.likelihood = Likelihood(info_likelihood, self.parameterization, info_theory, modules=modules, timing=timing)
def __init__(self, info_params, info_likelihood, info_prior=None, info_theory=None, modules=None, timing=None, allow_renames=True): self.set_logger(lowercase=True) self._updated_info = { _params: deepcopy_where_possible(info_params), _likelihood: deepcopy_where_possible(info_likelihood) } if not self._updated_info[_likelihood]: raise LoggedError(self.log, "No likelihood requested!") for k, v in ((_prior, info_prior), (_theory, info_theory), (_path_install, modules), (_timing, timing)): if v not in (None, {}): self._updated_info[k] = deepcopy_where_possible(v) self.parameterization = Parameterization(self._updated_info[_params], allow_renames=allow_renames) self.prior = Prior(self.parameterization, self._updated_info.get(_prior, None)) self.likelihood = Likelihood(self._updated_info[_likelihood], self.parameterization, self._updated_info.get(_theory), modules=modules, timing=timing)
def post(info, sample=None): logger_setup(info.get(_debug), info.get(_debug_file)) log = logging.getLogger(__name__.split(".")[-1]) try: info_post = info[_post] except KeyError: log.error("No 'post' block given. Nothing to do!") raise HandledException if get_mpi_rank(): log.warning( "Post-processing is not yet MPI-able. Doing nothing for rank > 1 processes." ) return # 1. Load existing sample output_in = Output(output_prefix=info.get(_output_prefix), resume=True) info_in = load_input(output_in.file_full) if output_in else deepcopy(info) dummy_model_in = DummyModel(info_in[_params], info_in[_likelihood], info_in.get(_prior, None), info_in.get(_theory, None)) if output_in: i = 0 while True: try: collection = Collection(dummy_model_in, output_in, name="%d" % (1 + i), load=True, onload_skip=info_post.get("skip", 0), onload_thin=info_post.get("thin", 1)) if i == 0: collection_in = collection else: collection_in._append(collection) i += 1 except IOError: break elif sample: if isinstance(sample, Collection): sample = [sample] collection_in = deepcopy(sample[0]) for s in sample[1:]: try: collection_in._append(s) except: log.error("Failed to load some of the input samples.") raise HandledException i = len(sample) else: log.error( "Not output from where to load from or input collections given.") raise HandledException log.info("Loaded %d chain%s. Will process %d samples.", i, "s" if i - 1 else "", collection_in.n()) if collection_in.n() <= 1: log.error( "Not enough samples for post-processing. Try using a larger sample, " "or skipping or thinning less.") raise HandledException # 2. Compare old and new info: determine what to do add = info_post.get("add", {}) remove = info_post.get("remove", {}) # Add a dummy 'one' likelihood, to absorb unused parameters if not add.get(_likelihood): add[_likelihood] = odict() add[_likelihood].update({"one": None}) # Expand the "add" info add = get_full_info(add) # 2.1 Adding/removing derived parameters and changes in priors of sampled parameters out = {_params: deepcopy(info_in[_params])} for p in remove.get(_params, {}): pinfo = info_in[_params].get(p) if pinfo is None or not is_derived_param(pinfo): log.error( "You tried to remove parameter '%s', which is not a derived paramter. " "Only derived parameters can be removed during post-processing.", p) raise HandledException out[_params].pop(p) mlprior_names_add = [] for p, pinfo in add.get(_params, {}).items(): pinfo_in = info_in[_params].get(p) if is_sampled_param(pinfo): if not is_sampled_param(pinfo_in): # No added sampled parameters (de-marginalisation not implemented) if pinfo_in is None: log.error( "You added a new sampled parameter %r (maybe accidentaly " "by adding a new likelihood that depends on it). " "Adding new sampled parameters is not possible. Try fixing " "it to some value.", p) raise HandledException else: log.error( "You tried to change the prior of parameter '%s', " "but it was not a sampled parameter. " "To change that prior, you need to define as an external one.", p) raise HandledException if mlprior_names_add[:1] != _prior_1d_name: mlprior_names_add = ( [_minuslogprior + _separator + _prior_1d_name] + mlprior_names_add) elif is_derived_param(pinfo): if p in out[_params]: log.error( "You tried to add derived parameter '%s', which is already " "present. To force its recomputation, 'remove' it too.", p) raise HandledException elif is_fixed_param(pinfo): # Only one possibility left "fixed" parameter that was not present before: # input of new likelihood, or just an argument for dynamical derived (dropped) if ((p in info_in[_params] and pinfo[_p_value] != (pinfo_in or {}).get(_p_value, None))): log.error( "You tried to add a fixed parameter '%s: %r' that was already present" " but had a different value or was not fixed. This is not allowed. " "The old info of the parameter was '%s: %r'", p, dict(pinfo), p, dict(pinfo_in)) raise HandledException else: log.error("This should not happen. Contact the developers.") raise HandledException out[_params][p] = pinfo # For the likelihood only, turn the rest of *derived* parameters into constants, # so that the likelihoods do not try to compute them) # But be careful to exclude *input* params that have a "derived: True" value # (which in "full info" turns into "derived: 'lambda [x]: [x]'") out_params_like = deepcopy(out[_params]) for p, pinfo in out_params_like.items(): if ((is_derived_param(pinfo) and not (_p_value in pinfo) and p not in add.get(_params, {}))): out_params_like[p] = {_p_value: np.nan, _p_drop: True} parameterization_like = Parameterization(out_params_like, ignore_unused_sampled=True) # 2.2 Manage adding/removing priors and likelihoods warn_remove = False for level in [_prior, _likelihood]: out[level] = getattr(dummy_model_in, level) if level == _prior: out[level].remove(_prior_1d_name) for pdf in info_post.get("remove", {}).get(level, []) or []: try: out[level].remove(pdf) warn_remove = True except ValueError: log.error( "Trying to remove %s '%s', but it is not present. " "Existing ones: %r", level, pdf, out[level]) raise HandledException if warn_remove: log.warning("You are removing a prior or likelihood pdf. " "Notice that if the resulting posterior is much wider " "than the original one, or displaced enough, " "it is probably safer to explore it directly.") if _prior in add: mlprior_names_add += [ _minuslogprior + _separator + name for name in add[_prior] ] out[_prior] += list(add[_prior]) prior_recompute_1d = (mlprior_names_add[:1] == [ _minuslogprior + _separator + _prior_1d_name ]) # Don't initialise the theory code if not adding/recomputing theory, # theory-derived params or likelihoods recompute_theory = info_in.get(_theory) and not (list( add[_likelihood]) == ["one"] and not any([ is_derived_param(pinfo) for pinfo in add.get(_params, {}).values() ])) if recompute_theory: # Inherit from the original chain (needs input|output_params, renames, etc theory = list(info_in[_theory].keys())[0] info_theory_out = odict([[ theory, recursive_update(deepcopy(info_in[_theory][theory]), add.get(_theory, {theory: {}})[theory]) ]]) else: info_theory_out = None chi2_names_add = [ _chi2 + _separator + name for name in add[_likelihood] if name is not "one" ] out[_likelihood] += [l for l in add[_likelihood] if l is not "one"] if recompute_theory: log.warn( "You are recomputing the theory, but in the current version this does " "not force recomputation of any likelihood or derived parameter, " "unless explicitly removed+added.") for level in [_prior, _likelihood]: for i, x_i in enumerate(out[level]): if x_i in list(out[level])[i + 1:]: log.error( "You have added %s '%s', which was already present. If you " "want to force its recomputation, you must also 'remove' it.", level, x_i) raise HandledException # 3. Create output collection if "suffix" not in info_post: log.error("You need to provide a 'suffix' for your chains.") raise HandledException # Use default prefix if it exists. If it does not, produce no output by default. # {post: {output: None}} suppresses output, and if it's a string, updates it. out_prefix = info_post.get(_output_prefix, info.get(_output_prefix)) if out_prefix not in [None, False]: out_prefix += "_" + _post + "_" + info_post["suffix"] output_out = Output(output_prefix=out_prefix, force_output=info.get(_force)) info_out = deepcopy(info) info_out[_post] = info_post # Updated with input info and extended (full) add info info_out.update(info_in) info_out[_post]["add"] = add dummy_model_out = DummyModel(out[_params], out[_likelihood], info_prior=out[_prior]) if recompute_theory: theory = list(info_theory_out.keys())[0] if _input_params not in info_theory_out[theory]: log.error( "You appear to be post-processing a chain generated with an older " "version of Cobaya. For post-processing to work, please edit the " "'[root]__full.info' file of the original chain to add, inside the " "theory code block, the list of its input parameters. E.g.\n----\n" "theory:\n %s:\n input_params: [param1, param2, ...]\n" "----\nIf you get strange errors later, it is likely that you did not " "specify the correct set of theory parameters.\n" "The full set of input parameters are %s.", theory, list(dummy_model_out.parameterization.input_params())) raise HandledException prior_add = Prior(dummy_model_out.parameterization, add.get(_prior)) likelihood_add = Likelihood(add[_likelihood], parameterization_like, info_theory=info_theory_out, modules=info.get(_path_install)) # Remove auxiliary "one" before dumping -- 'add' *is* info_out[_post]["add"] add[_likelihood].pop("one") if likelihood_add.theory: # Make sure that theory.needs is called at least once, for adjustments likelihood_add.theory.needs() collection_out = Collection(dummy_model_out, output_out, name="1") output_out.dump_info({}, info_out) # 4. Main loop! log.info("Running post-processing...") last_percent = 0 for i, point in enumerate(collection_in.data.itertuples()): log.debug("Point: %r", point) sampled = [ getattr(point, param) for param in dummy_model_in.parameterization.sampled_params() ] derived = odict( [[param, getattr(point, param, None)] for param in dummy_model_out.parameterization.derived_params()]) inputs = odict([[ param, getattr( point, param, dummy_model_in.parameterization.constant_params().get( param, dummy_model_out.parameterization.constant_params().get( param, None))) ] for param in dummy_model_out.parameterization.input_params()]) # Solve inputs that depend on a function and were not saved # (we don't use the Parameterization_to_input method in case there are references # to functions that cannot be loaded at the moment) for p, value in inputs.items(): if value is None: func = dummy_model_out.parameterization._input_funcs[p] args = dummy_model_out.parameterization._input_args[p] inputs[p] = func(*[getattr(point, arg) for arg in args]) # Add/remove priors priors_add = prior_add.logps(sampled) if not prior_recompute_1d: priors_add = priors_add[1:] logpriors_add = odict(zip(mlprior_names_add, priors_add)) logpriors_new = [ logpriors_add.get(name, -getattr(point, name, 0)) for name in collection_out.minuslogprior_names ] if log.getEffectiveLevel() <= logging.DEBUG: log.debug("New set of priors: %r", dict(zip(dummy_model_out.prior, logpriors_new))) if -np.inf in logpriors_new: continue # Add/remove likelihoods output_like = [] if likelihood_add: # Notice "one" (last in likelihood_add) is ignored: not in chi2_names loglikes_add = odict( zip(chi2_names_add, likelihood_add.logps(inputs, _derived=output_like))) output_like = dict(zip(likelihood_add.output_params, output_like)) else: loglikes_add = dict() loglikes_new = [ loglikes_add.get(name, -0.5 * getattr(point, name, 0)) for name in collection_out.chi2_names ] if log.getEffectiveLevel() <= logging.DEBUG: log.debug("New set of likelihoods: %r", dict(zip(dummy_model_out.likelihood, loglikes_new))) if output_like: log.debug("New set of likelihood-derived parameters: %r", output_like) if -np.inf in loglikes_new: continue # Add/remove derived parameters and change priors of sampled parameters for p in add[_params]: if p in dummy_model_out.parameterization._directly_output: derived[p] = output_like[p] elif p in dummy_model_out.parameterization._derived_funcs: func = dummy_model_out.parameterization._derived_funcs[p] args = dummy_model_out.parameterization._derived_args[p] derived[p] = func(*[ getattr(point, arg, output_like.get(arg, None)) for arg in args ]) if log.getEffectiveLevel() <= logging.DEBUG: log.debug( "New derived parameters: %r", dict([[ p, derived[p] ] for p in dummy_model_out.parameterization.derived_params() if p in add[_params]])) # Save to the collection (keep old weight for now) collection_out.add(sampled, derived=derived.values(), weight=getattr(point, _weight), logpriors=logpriors_new, loglikes=loglikes_new) # Display progress percent = np.round(i / collection_in.n() * 100) if percent != last_percent and not percent % 5: last_percent = percent progress_bar(log, percent, " (%d/%d)" % (i, collection_in.n())) if not collection_out.data.last_valid_index(): log.error( "No elements in the final sample. Possible causes: " "added a prior or likelihood valued zero over the full sampled domain, " "or the computation of the theory failed everywhere, etc.") raise HandledException # Reweight -- account for large dynamic range! # Prefer to rescale +inf to finite, and ignore final points with -inf. # Remove -inf's (0-weight), and correct indices difflogmax = max(collection_in[_minuslogpost] - collection_out[_minuslogpost]) collection_out.data[_weight] *= np.exp(collection_in[_minuslogpost] - collection_out[_minuslogpost] - difflogmax) collection_out.data = ( collection_out.data[collection_out.data.weight > 0].reset_index( drop=True)) collection_out._n = collection_out.data.last_valid_index() + 1 # Write! collection_out._out_update() log.info("Finished! Final number of samples: %d", collection_out.n()) return info_out, {"sample": collection_out}
def body_of_test(info_logpdf, kind, tmpdir, derived=False, manual=False): # For pytest's handling of tmp dirs if hasattr(tmpdir, "dirpath"): tmpdir = tmpdir.dirname prefix = os.path.join(tmpdir, "%d" % round(1e8 * random())) + os.sep if os.path.exists(prefix): shutil.rmtree(prefix) # build updated info info = { _output_prefix: prefix, _params: { "x": { _prior: { "min": 0, "max": 1 }, "proposal": 0.05 }, "y": { _prior: { "min": -1, "max": 1 }, "proposal": 0.05 } }, kinds.sampler: { "mcmc": { "max_samples": (10 if not manual else 5000), "learn_proposal": False } } } if derived: info[_params].update({ "r": { "min": 0, "max": 1 }, "theta": { "min": -0.5, "max": 0.5 } }) # Complete according to kind if kind == _prior: info.update({_prior: info_logpdf, kinds.likelihood: {"one": None}}) elif kind == kinds.likelihood: info.update({kinds.likelihood: info_logpdf}) else: raise ValueError("Kind of test not known.") # If there is an ext function that is not a string, don't write output! stringy = {k: v for k, v in info_logpdf.items() if isinstance(v, str)} if stringy != info_logpdf: info.pop(_output_prefix) # Run updated_info, sampler = run(info) products = sampler.products() # Test values logprior_base = -np.log( (info[_params]["x"][_prior]["max"] - info[_params]["x"][_prior]["min"]) * (info[_params]["y"][_prior]["max"] - info[_params]["y"][_prior]["min"])) logps = { name: logpdf( **{ arg: products["sample"][arg].values for arg in getfullargspec(logpdf)[0] }) for name, logpdf in { "half_ring": half_ring_func, "gaussian_y": gaussian_func }.items() } # Test #1: values of logpdf's if kind == _prior: columns_priors = [ c for c in products["sample"].data.columns if c.startswith("minuslogprior") ] assert np.allclose( products["sample"][columns_priors[0]].values, np.sum(products["sample"][columns_priors[1:]].values, axis=-1)), ( "The single prior values do not add up to the total one.") assert np.allclose( logprior_base + sum(logps[p] for p in info_logpdf), -products["sample"]["minuslogprior"].values), ( "The value of the total prior is not reproduced correctly.") elif kind == kinds.likelihood: for lik in info[kinds.likelihood]: assert np.allclose( -2 * logps[lik], products["sample"][_get_chi2_name(lik)].values ), ("The value of the likelihood '%s' is not reproduced correctly." % lik) assert np.allclose( logprior_base + sum(logps[p] for p in info_logpdf), -products["sample"]["minuslogpost"].values), ( "The value of the posterior is not reproduced correctly.") # Test derived parameters, if present -- for now just for "r" if derived: derived_values = { param: func(**{arg: products["sample"][arg].values for arg in ["x", "y"]}) for param, func in derived_funcs.items() } assert all( np.allclose(v, products["sample"][p].values) for p, v in derived_values.items() ), ("The value of the derived parameters is not reproduced correctly.") # Test updated info -- scripted if kind == _prior: assert info[_prior] == updated_info[_prior], ( "The prior information has not been updated correctly.") elif kind == kinds.likelihood: # Transform the likelihood info to the "external" convention and add defaults info_likelihood = deepcopy(info[kinds.likelihood]) for lik, value in list(info_likelihood.items()): if not hasattr(value, "get"): info_likelihood[lik] = {_external: value} info_likelihood[lik].update({ k: v for k, v in Likelihood.get_defaults().items() if k not in info_likelihood[lik] }) for k in [_input_params, _output_params]: info_likelihood[lik].pop(k, None) updated_info[kinds.likelihood][lik].pop(k) assert info_likelihood == updated_info[kinds.likelihood], ( "The likelihood information has not been updated correctly\n %r vs %r" % (info_likelihood, updated_info[kinds.likelihood])) # Test updated info -- yaml # For now, only if ALL external pdfs are given as strings, # since the YAML load fails otherwise if stringy == info_logpdf: updated_output_file = os.path.join(prefix, _updated_suffix + ".yaml") with open(updated_output_file) as updated: updated_yaml = yaml_load("".join(updated.readlines())) for k, v in stringy.items(): to_test = updated_yaml[kind][k] if kind == kinds.likelihood: to_test = to_test[_external] assert to_test == info_logpdf[k], ( "The updated external pdf info has not been written correctly." )
def run(info): assert hasattr(info, "items"), ( "The agument of `run` must be a dictionary with the info needed for the run. " "If you were trying to pass an input file instead, load it first with " "`cobaya.input.load_input`.") # Import names from cobaya.conventions import _likelihood, _prior, _params from cobaya.conventions import _theory, _sampler, _path_install from cobaya.conventions import _debug, _debug_file, _output_prefix # Configure the logger ASAP from cobaya.log import logger_setup logger_setup(info.get(_debug), info.get(_debug_file)) # Debug (lazy call) import logging if logging.root.getEffectiveLevel() <= logging.DEBUG: # Don't dump unless we are doing output, just in case something not serializable # May be fixed in the future if we find a way to serialize external functions if info.get(_output_prefix): from cobaya.yaml import yaml_dump logging.getLogger(__name__.split(".")[-1]).debug( "Input info (dumped to YAML):\n%s", yaml_dump(info)) # Import general classes from cobaya.prior import Prior from cobaya.sampler import get_Sampler as Sampler # Import the functions and classes that need MPI wrapping from cobaya.mpi import import_MPI # Likelihood = import_MPI(".likelihood", "LikelihoodCollection") from cobaya.likelihood import LikelihoodCollection as Likelihood # Initialise output, if requiered do_output = info.get(_output_prefix) if do_output: Output = import_MPI(".output", "Output") output = Output(info) else: from cobaya.output import Output_dummy output = Output_dummy(info) # Create the full input information, including defaults for each module. from cobaya.input import get_full_info full_info = get_full_info(info) if logging.root.getEffectiveLevel() <= logging.DEBUG: # Don't dump unless we are doing output, just in case something not serializable # May be fixed in the future if we find a way to serialize external functions if info.get(_output_prefix): logging.getLogger(__name__.split(".")[-1]).debug( "Updated info (dumped to YAML):\n%s", yaml_dump(full_info)) # We dump the info now, before modules initialization, lest it is accidentaly modified output.dump_info(info, full_info) # Set the path of the installed modules, if given from cobaya.tools import set_path_to_installation set_path_to_installation(info.get(_path_install)) # Initialise parametrization, likelihoods and prior from cobaya.parametrization import Parametrization with Parametrization(full_info[_params]) as par: with Prior(par, full_info.get(_prior)) as prior: with Likelihood(full_info[_likelihood], par, full_info.get(_theory)) as lik: with Sampler(full_info[_sampler], par, prior, lik, output) as sampler: sampler.run() # For scripted calls return deepcopy(full_info), sampler.products()