示例#1
0
 def __init__(self,
              info_params,
              info_likelihood,
              info_prior=None,
              info_theory=None,
              modules=None,
              timing=None,
              allow_renames=True):
     self.log = logging.getLogger(self.__class__.__name__)
     self._full_info = {
         _params: deepcopy(info_params),
         _likelihood: deepcopy(info_likelihood)
     }
     if not self._full_info[_likelihood]:
         self.log.error("No likelihood requested!")
         raise HandledException
     for like in self._full_info[_likelihood].values():
         like.pop(_params)
     for k, v in ((_prior, info_prior), (_theory, info_theory),
                  (_path_install, modules), (_timing, timing)):
         if v not in (None, {}):
             self._full_info[k] = deepcopy(v)
     self.parameterization = Parameterization(info_params,
                                              allow_renames=allow_renames)
     self.prior = Prior(self.parameterization, info_prior)
     self.likelihood = Likelihood(info_likelihood,
                                  self.parameterization,
                                  info_theory,
                                  modules=modules,
                                  timing=timing)
示例#2
0
 def __init__(self,
              info_params,
              info_likelihood,
              info_prior=None,
              info_theory=None,
              modules=None,
              timing=None,
              allow_renames=True):
     self.set_logger(lowercase=True)
     self._updated_info = {
         _params: deepcopy_where_possible(info_params),
         _likelihood: deepcopy_where_possible(info_likelihood)
     }
     if not self._updated_info[_likelihood]:
         raise LoggedError(self.log, "No likelihood requested!")
     for k, v in ((_prior, info_prior), (_theory, info_theory),
                  (_path_install, modules), (_timing, timing)):
         if v not in (None, {}):
             self._updated_info[k] = deepcopy_where_possible(v)
     self.parameterization = Parameterization(self._updated_info[_params],
                                              allow_renames=allow_renames)
     self.prior = Prior(self.parameterization,
                        self._updated_info.get(_prior, None))
     self.likelihood = Likelihood(self._updated_info[_likelihood],
                                  self.parameterization,
                                  self._updated_info.get(_theory),
                                  modules=modules,
                                  timing=timing)
示例#3
0
def post(info, sample=None):
    logger_setup(info.get(_debug), info.get(_debug_file))
    log = logging.getLogger(__name__.split(".")[-1])
    try:
        info_post = info[_post]
    except KeyError:
        log.error("No 'post' block given. Nothing to do!")
        raise HandledException
    if get_mpi_rank():
        log.warning(
            "Post-processing is not yet MPI-able. Doing nothing for rank > 1 processes."
        )
        return
    # 1. Load existing sample
    output_in = Output(output_prefix=info.get(_output_prefix), resume=True)
    info_in = load_input(output_in.file_full) if output_in else deepcopy(info)
    dummy_model_in = DummyModel(info_in[_params], info_in[_likelihood],
                                info_in.get(_prior, None),
                                info_in.get(_theory, None))
    if output_in:
        i = 0
        while True:
            try:
                collection = Collection(dummy_model_in,
                                        output_in,
                                        name="%d" % (1 + i),
                                        load=True,
                                        onload_skip=info_post.get("skip", 0),
                                        onload_thin=info_post.get("thin", 1))
                if i == 0:
                    collection_in = collection
                else:
                    collection_in._append(collection)
                i += 1
            except IOError:
                break
    elif sample:
        if isinstance(sample, Collection):
            sample = [sample]
        collection_in = deepcopy(sample[0])
        for s in sample[1:]:
            try:
                collection_in._append(s)
            except:
                log.error("Failed to load some of the input samples.")
                raise HandledException
        i = len(sample)
    else:
        log.error(
            "Not output from where to load from or input collections given.")
        raise HandledException
    log.info("Loaded %d chain%s. Will process %d samples.", i,
             "s" if i - 1 else "", collection_in.n())
    if collection_in.n() <= 1:
        log.error(
            "Not enough samples for post-processing. Try using a larger sample, "
            "or skipping or thinning less.")
        raise HandledException
    # 2. Compare old and new info: determine what to do
    add = info_post.get("add", {})
    remove = info_post.get("remove", {})
    # Add a dummy 'one' likelihood, to absorb unused parameters
    if not add.get(_likelihood):
        add[_likelihood] = odict()
    add[_likelihood].update({"one": None})
    # Expand the "add" info
    add = get_full_info(add)
    # 2.1 Adding/removing derived parameters and changes in priors of sampled parameters
    out = {_params: deepcopy(info_in[_params])}
    for p in remove.get(_params, {}):
        pinfo = info_in[_params].get(p)
        if pinfo is None or not is_derived_param(pinfo):
            log.error(
                "You tried to remove parameter '%s', which is not a derived paramter. "
                "Only derived parameters can be removed during post-processing.",
                p)
            raise HandledException
        out[_params].pop(p)
    mlprior_names_add = []
    for p, pinfo in add.get(_params, {}).items():
        pinfo_in = info_in[_params].get(p)
        if is_sampled_param(pinfo):
            if not is_sampled_param(pinfo_in):
                # No added sampled parameters (de-marginalisation not implemented)
                if pinfo_in is None:
                    log.error(
                        "You added a new sampled parameter %r (maybe accidentaly "
                        "by adding a new likelihood that depends on it). "
                        "Adding new sampled parameters is not possible. Try fixing "
                        "it to some value.", p)
                    raise HandledException
                else:
                    log.error(
                        "You tried to change the prior of parameter '%s', "
                        "but it was not a sampled parameter. "
                        "To change that prior, you need to define as an external one.",
                        p)
                    raise HandledException
            if mlprior_names_add[:1] != _prior_1d_name:
                mlprior_names_add = (
                    [_minuslogprior + _separator + _prior_1d_name] +
                    mlprior_names_add)
        elif is_derived_param(pinfo):
            if p in out[_params]:
                log.error(
                    "You tried to add derived parameter '%s', which is already "
                    "present. To force its recomputation, 'remove' it too.", p)
                raise HandledException
        elif is_fixed_param(pinfo):
            # Only one possibility left "fixed" parameter that was not present before:
            # input of new likelihood, or just an argument for dynamical derived (dropped)
            if ((p in info_in[_params] and pinfo[_p_value] !=
                 (pinfo_in or {}).get(_p_value, None))):
                log.error(
                    "You tried to add a fixed parameter '%s: %r' that was already present"
                    " but had a different value or was not fixed. This is not allowed. "
                    "The old info of the parameter was '%s: %r'", p,
                    dict(pinfo), p, dict(pinfo_in))
                raise HandledException
        else:
            log.error("This should not happen. Contact the developers.")
            raise HandledException
        out[_params][p] = pinfo
    # For the likelihood only, turn the rest of *derived* parameters into constants,
    # so that the likelihoods do not try to compute them)
    # But be careful to exclude *input* params that have a "derived: True" value
    # (which in "full info" turns into "derived: 'lambda [x]: [x]'")
    out_params_like = deepcopy(out[_params])
    for p, pinfo in out_params_like.items():
        if ((is_derived_param(pinfo) and not (_p_value in pinfo)
             and p not in add.get(_params, {}))):
            out_params_like[p] = {_p_value: np.nan, _p_drop: True}
    parameterization_like = Parameterization(out_params_like,
                                             ignore_unused_sampled=True)
    # 2.2 Manage adding/removing priors and likelihoods
    warn_remove = False
    for level in [_prior, _likelihood]:
        out[level] = getattr(dummy_model_in, level)
        if level == _prior:
            out[level].remove(_prior_1d_name)
        for pdf in info_post.get("remove", {}).get(level, []) or []:
            try:
                out[level].remove(pdf)
                warn_remove = True
            except ValueError:
                log.error(
                    "Trying to remove %s '%s', but it is not present. "
                    "Existing ones: %r", level, pdf, out[level])
                raise HandledException
    if warn_remove:
        log.warning("You are removing a prior or likelihood pdf. "
                    "Notice that if the resulting posterior is much wider "
                    "than the original one, or displaced enough, "
                    "it is probably safer to explore it directly.")
    if _prior in add:
        mlprior_names_add += [
            _minuslogprior + _separator + name for name in add[_prior]
        ]
        out[_prior] += list(add[_prior])
    prior_recompute_1d = (mlprior_names_add[:1] == [
        _minuslogprior + _separator + _prior_1d_name
    ])
    # Don't initialise the theory code if not adding/recomputing theory,
    # theory-derived params or likelihoods
    recompute_theory = info_in.get(_theory) and not (list(
        add[_likelihood]) == ["one"] and not any([
            is_derived_param(pinfo) for pinfo in add.get(_params, {}).values()
        ]))
    if recompute_theory:
        # Inherit from the original chain (needs input|output_params, renames, etc
        theory = list(info_in[_theory].keys())[0]
        info_theory_out = odict([[
            theory,
            recursive_update(deepcopy(info_in[_theory][theory]),
                             add.get(_theory, {theory: {}})[theory])
        ]])
    else:
        info_theory_out = None
    chi2_names_add = [
        _chi2 + _separator + name for name in add[_likelihood]
        if name is not "one"
    ]
    out[_likelihood] += [l for l in add[_likelihood] if l is not "one"]
    if recompute_theory:
        log.warn(
            "You are recomputing the theory, but in the current version this does "
            "not force recomputation of any likelihood or derived parameter, "
            "unless explicitly removed+added.")
    for level in [_prior, _likelihood]:
        for i, x_i in enumerate(out[level]):
            if x_i in list(out[level])[i + 1:]:
                log.error(
                    "You have added %s '%s', which was already present. If you "
                    "want to force its recomputation, you must also 'remove' it.",
                    level, x_i)
                raise HandledException
    # 3. Create output collection
    if "suffix" not in info_post:
        log.error("You need to provide a 'suffix' for your chains.")
        raise HandledException
    # Use default prefix if it exists. If it does not, produce no output by default.
    # {post: {output: None}} suppresses output, and if it's a string, updates it.
    out_prefix = info_post.get(_output_prefix, info.get(_output_prefix))
    if out_prefix not in [None, False]:
        out_prefix += "_" + _post + "_" + info_post["suffix"]
    output_out = Output(output_prefix=out_prefix,
                        force_output=info.get(_force))
    info_out = deepcopy(info)
    info_out[_post] = info_post
    # Updated with input info and extended (full) add info
    info_out.update(info_in)
    info_out[_post]["add"] = add
    dummy_model_out = DummyModel(out[_params],
                                 out[_likelihood],
                                 info_prior=out[_prior])
    if recompute_theory:
        theory = list(info_theory_out.keys())[0]
        if _input_params not in info_theory_out[theory]:
            log.error(
                "You appear to be post-processing a chain generated with an older "
                "version of Cobaya. For post-processing to work, please edit the "
                "'[root]__full.info' file of the original chain to add, inside the "
                "theory code block, the list of its input parameters. E.g.\n----\n"
                "theory:\n  %s:\n    input_params: [param1, param2, ...]\n"
                "----\nIf you get strange errors later, it is likely that you did not "
                "specify the correct set of theory parameters.\n"
                "The full set of input parameters are %s.", theory,
                list(dummy_model_out.parameterization.input_params()))
            raise HandledException
    prior_add = Prior(dummy_model_out.parameterization, add.get(_prior))
    likelihood_add = Likelihood(add[_likelihood],
                                parameterization_like,
                                info_theory=info_theory_out,
                                modules=info.get(_path_install))
    # Remove auxiliary "one" before dumping -- 'add' *is* info_out[_post]["add"]
    add[_likelihood].pop("one")
    if likelihood_add.theory:
        # Make sure that theory.needs is called at least once, for adjustments
        likelihood_add.theory.needs()
    collection_out = Collection(dummy_model_out, output_out, name="1")
    output_out.dump_info({}, info_out)
    # 4. Main loop!
    log.info("Running post-processing...")
    last_percent = 0
    for i, point in enumerate(collection_in.data.itertuples()):
        log.debug("Point: %r", point)
        sampled = [
            getattr(point, param)
            for param in dummy_model_in.parameterization.sampled_params()
        ]
        derived = odict(
            [[param, getattr(point, param, None)]
             for param in dummy_model_out.parameterization.derived_params()])
        inputs = odict([[
            param,
            getattr(
                point, param,
                dummy_model_in.parameterization.constant_params().get(
                    param,
                    dummy_model_out.parameterization.constant_params().get(
                        param, None)))
        ] for param in dummy_model_out.parameterization.input_params()])
        # Solve inputs that depend on a function and were not saved
        # (we don't use the Parameterization_to_input method in case there are references
        #  to functions that cannot be loaded at the moment)
        for p, value in inputs.items():
            if value is None:
                func = dummy_model_out.parameterization._input_funcs[p]
                args = dummy_model_out.parameterization._input_args[p]
                inputs[p] = func(*[getattr(point, arg) for arg in args])
        # Add/remove priors
        priors_add = prior_add.logps(sampled)
        if not prior_recompute_1d:
            priors_add = priors_add[1:]
        logpriors_add = odict(zip(mlprior_names_add, priors_add))
        logpriors_new = [
            logpriors_add.get(name, -getattr(point, name, 0))
            for name in collection_out.minuslogprior_names
        ]
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug("New set of priors: %r",
                      dict(zip(dummy_model_out.prior, logpriors_new)))
        if -np.inf in logpriors_new:
            continue
        # Add/remove likelihoods
        output_like = []
        if likelihood_add:
            # Notice "one" (last in likelihood_add) is ignored: not in chi2_names
            loglikes_add = odict(
                zip(chi2_names_add,
                    likelihood_add.logps(inputs, _derived=output_like)))
            output_like = dict(zip(likelihood_add.output_params, output_like))
        else:
            loglikes_add = dict()
        loglikes_new = [
            loglikes_add.get(name, -0.5 * getattr(point, name, 0))
            for name in collection_out.chi2_names
        ]
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug("New set of likelihoods: %r",
                      dict(zip(dummy_model_out.likelihood, loglikes_new)))
            if output_like:
                log.debug("New set of likelihood-derived parameters: %r",
                          output_like)
        if -np.inf in loglikes_new:
            continue
        # Add/remove derived parameters and change priors of sampled parameters
        for p in add[_params]:
            if p in dummy_model_out.parameterization._directly_output:
                derived[p] = output_like[p]
            elif p in dummy_model_out.parameterization._derived_funcs:
                func = dummy_model_out.parameterization._derived_funcs[p]
                args = dummy_model_out.parameterization._derived_args[p]
                derived[p] = func(*[
                    getattr(point, arg, output_like.get(arg, None))
                    for arg in args
                ])
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug(
                "New derived parameters: %r",
                dict([[
                    p, derived[p]
                ] for p in dummy_model_out.parameterization.derived_params()
                      if p in add[_params]]))
        # Save to the collection (keep old weight for now)
        collection_out.add(sampled,
                           derived=derived.values(),
                           weight=getattr(point, _weight),
                           logpriors=logpriors_new,
                           loglikes=loglikes_new)
        # Display progress
        percent = np.round(i / collection_in.n() * 100)
        if percent != last_percent and not percent % 5:
            last_percent = percent
            progress_bar(log, percent, " (%d/%d)" % (i, collection_in.n()))
    if not collection_out.data.last_valid_index():
        log.error(
            "No elements in the final sample. Possible causes: "
            "added a prior or likelihood valued zero over the full sampled domain, "
            "or the computation of the theory failed everywhere, etc.")
        raise HandledException
    # Reweight -- account for large dynamic range!
    #   Prefer to rescale +inf to finite, and ignore final points with -inf.
    #   Remove -inf's (0-weight), and correct indices
    difflogmax = max(collection_in[_minuslogpost] -
                     collection_out[_minuslogpost])
    collection_out.data[_weight] *= np.exp(collection_in[_minuslogpost] -
                                           collection_out[_minuslogpost] -
                                           difflogmax)
    collection_out.data = (
        collection_out.data[collection_out.data.weight > 0].reset_index(
            drop=True))
    collection_out._n = collection_out.data.last_valid_index() + 1
    # Write!
    collection_out._out_update()
    log.info("Finished! Final number of samples: %d", collection_out.n())
    return info_out, {"sample": collection_out}
示例#4
0
def body_of_test(info_logpdf, kind, tmpdir, derived=False, manual=False):
    # For pytest's handling of tmp dirs
    if hasattr(tmpdir, "dirpath"):
        tmpdir = tmpdir.dirname
    prefix = os.path.join(tmpdir, "%d" % round(1e8 * random())) + os.sep
    if os.path.exists(prefix):
        shutil.rmtree(prefix)
    # build updated info
    info = {
        _output_prefix: prefix,
        _params: {
            "x": {
                _prior: {
                    "min": 0,
                    "max": 1
                },
                "proposal": 0.05
            },
            "y": {
                _prior: {
                    "min": -1,
                    "max": 1
                },
                "proposal": 0.05
            }
        },
        kinds.sampler: {
            "mcmc": {
                "max_samples": (10 if not manual else 5000),
                "learn_proposal": False
            }
        }
    }
    if derived:
        info[_params].update({
            "r": {
                "min": 0,
                "max": 1
            },
            "theta": {
                "min": -0.5,
                "max": 0.5
            }
        })
    # Complete according to kind
    if kind == _prior:
        info.update({_prior: info_logpdf, kinds.likelihood: {"one": None}})
    elif kind == kinds.likelihood:
        info.update({kinds.likelihood: info_logpdf})
    else:
        raise ValueError("Kind of test not known.")
    # If there is an ext function that is not a string, don't write output!
    stringy = {k: v for k, v in info_logpdf.items() if isinstance(v, str)}
    if stringy != info_logpdf:
        info.pop(_output_prefix)
    # Run
    updated_info, sampler = run(info)
    products = sampler.products()
    # Test values
    logprior_base = -np.log(
        (info[_params]["x"][_prior]["max"] - info[_params]["x"][_prior]["min"])
        * (info[_params]["y"][_prior]["max"] -
           info[_params]["y"][_prior]["min"]))
    logps = {
        name: logpdf(
            **{
                arg: products["sample"][arg].values
                for arg in getfullargspec(logpdf)[0]
            })
        for name, logpdf in {
            "half_ring": half_ring_func,
            "gaussian_y": gaussian_func
        }.items()
    }
    # Test #1: values of logpdf's
    if kind == _prior:
        columns_priors = [
            c for c in products["sample"].data.columns
            if c.startswith("minuslogprior")
        ]
        assert np.allclose(
            products["sample"][columns_priors[0]].values,
            np.sum(products["sample"][columns_priors[1:]].values, axis=-1)), (
                "The single prior values do not add up to the total one.")
        assert np.allclose(
            logprior_base + sum(logps[p] for p in info_logpdf),
            -products["sample"]["minuslogprior"].values), (
                "The value of the total prior is not reproduced correctly.")
    elif kind == kinds.likelihood:
        for lik in info[kinds.likelihood]:
            assert np.allclose(
                -2 * logps[lik], products["sample"][_get_chi2_name(lik)].values
            ), ("The value of the likelihood '%s' is not reproduced correctly."
                % lik)
    assert np.allclose(
        logprior_base + sum(logps[p] for p in info_logpdf),
        -products["sample"]["minuslogpost"].values), (
            "The value of the posterior is not reproduced correctly.")
    # Test derived parameters, if present -- for now just for "r"
    if derived:
        derived_values = {
            param:
            func(**{arg: products["sample"][arg].values
                    for arg in ["x", "y"]})
            for param, func in derived_funcs.items()
        }
        assert all(
            np.allclose(v, products["sample"][p].values)
            for p, v in derived_values.items()
        ), ("The value of the derived parameters is not reproduced correctly.")
    # Test updated info -- scripted
    if kind == _prior:
        assert info[_prior] == updated_info[_prior], (
            "The prior information has not been updated correctly.")
    elif kind == kinds.likelihood:
        # Transform the likelihood info to the "external" convention and add defaults
        info_likelihood = deepcopy(info[kinds.likelihood])
        for lik, value in list(info_likelihood.items()):
            if not hasattr(value, "get"):
                info_likelihood[lik] = {_external: value}
            info_likelihood[lik].update({
                k: v
                for k, v in Likelihood.get_defaults().items()
                if k not in info_likelihood[lik]
            })
            for k in [_input_params, _output_params]:
                info_likelihood[lik].pop(k, None)
                updated_info[kinds.likelihood][lik].pop(k)
        assert info_likelihood == updated_info[kinds.likelihood], (
            "The likelihood information has not been updated correctly\n %r vs %r"
            % (info_likelihood, updated_info[kinds.likelihood]))
    # Test updated info -- yaml
    # For now, only if ALL external pdfs are given as strings,
    # since the YAML load fails otherwise
    if stringy == info_logpdf:
        updated_output_file = os.path.join(prefix, _updated_suffix + ".yaml")
        with open(updated_output_file) as updated:
            updated_yaml = yaml_load("".join(updated.readlines()))
        for k, v in stringy.items():
            to_test = updated_yaml[kind][k]
            if kind == kinds.likelihood:
                to_test = to_test[_external]
            assert to_test == info_logpdf[k], (
                "The updated external pdf info has not been written correctly."
            )
示例#5
0
def run(info):

    assert hasattr(info, "items"), (
        "The agument of `run` must be a dictionary with the info needed for the run. "
        "If you were trying to pass an input file instead, load it first with "
        "`cobaya.input.load_input`.")

    # Import names
    from cobaya.conventions import _likelihood, _prior, _params
    from cobaya.conventions import _theory, _sampler, _path_install
    from cobaya.conventions import _debug, _debug_file, _output_prefix

    # Configure the logger ASAP
    from cobaya.log import logger_setup
    logger_setup(info.get(_debug), info.get(_debug_file))

    # Debug (lazy call)
    import logging
    if logging.root.getEffectiveLevel() <= logging.DEBUG:
        # Don't dump unless we are doing output, just in case something not serializable
        # May be fixed in the future if we find a way to serialize external functions
        if info.get(_output_prefix):
            from cobaya.yaml import yaml_dump
            logging.getLogger(__name__.split(".")[-1]).debug(
                "Input info (dumped to YAML):\n%s", yaml_dump(info))

    # Import general classes
    from cobaya.prior import Prior
    from cobaya.sampler import get_Sampler as Sampler

    # Import the functions and classes that need MPI wrapping
    from cobaya.mpi import import_MPI
    #    Likelihood = import_MPI(".likelihood", "LikelihoodCollection")
    from cobaya.likelihood import LikelihoodCollection as Likelihood

    # Initialise output, if requiered
    do_output = info.get(_output_prefix)
    if do_output:
        Output = import_MPI(".output", "Output")
        output = Output(info)
    else:
        from cobaya.output import Output_dummy
        output = Output_dummy(info)

    # Create the full input information, including defaults for each module.
    from cobaya.input import get_full_info
    full_info = get_full_info(info)
    if logging.root.getEffectiveLevel() <= logging.DEBUG:
        # Don't dump unless we are doing output, just in case something not serializable
        # May be fixed in the future if we find a way to serialize external functions
        if info.get(_output_prefix):
            logging.getLogger(__name__.split(".")[-1]).debug(
                "Updated info (dumped to YAML):\n%s", yaml_dump(full_info))
    # We dump the info now, before modules initialization, lest it is accidentaly modified
    output.dump_info(info, full_info)

    # Set the path of the installed modules, if given
    from cobaya.tools import set_path_to_installation
    set_path_to_installation(info.get(_path_install))

    # Initialise parametrization, likelihoods and prior
    from cobaya.parametrization import Parametrization
    with Parametrization(full_info[_params]) as par:
        with Prior(par, full_info.get(_prior)) as prior:
            with Likelihood(full_info[_likelihood], par,
                            full_info.get(_theory)) as lik:
                with Sampler(full_info[_sampler], par, prior, lik,
                             output) as sampler:
                    sampler.run()

    # For scripted calls
    return deepcopy(full_info), sampler.products()