Python Collection примеры использования

Язык программирования: Python

Пространство имен/Пакет: cobaya.collection

Класс/Тип: Collection

Примеров на hotexamples.com: 16

Python Collection - 16 примеров найдено. Это лучшие примеры Python кода для cobaya.collection.Collection, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Collection(12)

Основные методы

Collection (12)

Пример #1

Показать файл

Файл: output.py Проект: yufdu/cobaya

    def load_collections(self,
                         model,
                         skip=0,
                         thin=1,
                         concatenate=False,
                         name=None,
                         extension=None):
        """
        Loads all collection files found which are compatible with this `Output`
        instance, including their path in their name.

        Use `name` for particular types of collections (default: any number).
        Pass `False` to mean there is nothing between the output prefix and the extension.
        """
        filenames = self.find_collections(name=name, extension=extension)
        collections = [
            Collection(model,
                       self,
                       name="%d" % (1 + i),
                       file_name=filename,
                       load=True,
                       onload_skip=skip,
                       onload_thin=thin)
            for i, filename in enumerate(filenames)
        ]
        if concatenate and collections:
            collection = collections[0]
            for collection_i in collections[1:]:
                collection.append(collection_i)
            collections = collection
        return collections

Пример #2

Показать файл

Файл: evaluate.py Проект: JoanesL/cobaya

 def initialise(self):
     """
     Creates a 1-point collection to store the point
     at which the posterior is evaluated.
     """
     self.one_point = Collection(self.parametrization, self.likelihood,
                                 self.output, initial_size=1, name="1")
     self.log.info("Initialised!")

Пример #3

Показать файл

 def initialize(self):
     """
     Creates a 1-point collection to store the point
     at which the posterior is evaluated.
     """
     self.one_point = Collection(self.model,
                                 self.output,
                                 initial_size=1,
                                 name="1")
     self.log.info("Initialized!")

Пример #4

Показать файл

Файл: fisher.py Проект: Stefan-Heimersheim/cobaya-documentation

 def initialize(self):
     self.log.info("Initializing.")
     # Example for setting default options
     print("+++ For option '%s' got value '%s'. Add more options in fisher.yaml." % (
         "example_option", self.example_option))
     # Prepare the list of "samples" (posterior evaluations)
     self.collection = Collection(self.model, self.output)
     # Prepare vectors to store derivatives
     # ... up to you
     # Prepare the matrix
     self.fisher_matrix = np.eye(self.model.prior.d())

Пример #5

Показать файл

Файл: output.py Проект: rancesol/cobaya

 def load_collections(self, model, skip=0, thin=1, concatenate=False):
     filenames = self.find_collections()
     collections = [
         Collection(model, self, name="%d" % (1 + i), file_name=filename,
                    load=True, onload_skip=skip, onload_thin=thin)
         for i, filename in enumerate(filenames)]
     if concatenate and collections:
         collection = collections[0]
         for collection_i in collections[1:]:
             collection._append(collection_i)
         collections = collection
     return collections

Пример #6

Показать файл

Файл: evaluate.py Проект: rancesol/cobaya

 def initialize(self):
     """
     Creates a 1-point collection to store the point
     at which the posterior is evaluated.
     """
     try:
         self.N = int(self.N)
     except:
         raise LoggedError(
             self.log,
             "Could not convert the number of samples to an integer: %r", self.N)
     self.one_point = Collection(
         self.model, self.output, initial_size=self.N, name="1")
     self.log.info("Initialized!")

Пример #7

Показать файл

 def save_sample(self, fname, name):
     sample = np.atleast_2d(np.loadtxt(fname))
     if not sample.size:
         return None
     collection = Collection(self.model, self.output, name=str(name))
     for row in sample:
         collection.add(
             row[2:2 + self.n_sampled],
             derived=row[2 + self.n_sampled:2 + self.n_sampled + self.n_derived + 1],
             weight=row[0], logpost=-row[1],
             logpriors=row[-(self.n_priors + self.n_likes):-self.n_likes],
             loglikes=row[-self.n_likes:])
     # make sure that the points are written
     collection._out_update()
     return collection

Пример #8

Показать файл

Файл: polychord.py Проект: JoanesL/cobaya

 def save_sample(self, fname, name):
     sample = np.atleast_2d(np.loadtxt(fname))
     collection = Collection(self.parametrization,
                             self.likelihood,
                             self.output,
                             name=str(name))
     for row in sample:
         collection.add(row[2:2 + self.n_sampled],
                        derived=row[2 + self.n_sampled:2 + self.n_sampled +
                                    self.n_derived + 1],
                        weight=row[0],
                        logpost=-row[1],
                        logprior=row[-(1 + self.n_liks)],
                        logliks=row[-self.n_liks:])
     # make sure that the points are written
     collection.out_update()
     return collection

Пример #9

Показать файл

 def initialize(self):
     """Imports the PolyChord sampler and prepares its arguments."""
     # Allow global import if no direct path specification
     allow_global = not self.path
     if not self.path and self.packages_path:
         self.path = self.get_path(self.packages_path)
     self.pc = self.is_installed(path=self.path, allow_global=allow_global)
     if not self.pc:
         raise NotInstalledError(
             self.log,
             "Could not find PolyChord. Check error message above. "
             "To install it, run 'cobaya-install polychord --%s "
             "[packages_path]'", _packages_path_arg)
     # Prepare arguments and settings
     from pypolychord.settings import PolyChordSettings
     self.n_sampled = len(self.model.parameterization.sampled_params())
     self.n_derived = len(self.model.parameterization.derived_params())
     self.n_priors = len(self.model.prior)
     self.n_likes = len(self.model.likelihood)
     self.nDims = self.model.prior.d()
     self.nDerived = (self.n_derived + self.n_priors + self.n_likes)
     if self.logzero is None:
         self.logzero = np.nan_to_num(-np.inf)
     if self.max_ndead == np.inf:
         self.max_ndead = -1
     self._quants_d_units = ["nlive", "max_ndead"]
     for p in self._quants_d_units:
         if getattr(self, p) is not None:
             setattr(
                 self, p,
                 NumberWithUnits(getattr(self, p),
                                 "d",
                                 scale=self.nDims,
                                 dtype=int).value)
     self._quants_nlive_units = ["nprior"]
     for p in self._quants_nlive_units:
         if getattr(self, p) is not None:
             setattr(
                 self, p,
                 NumberWithUnits(getattr(self, p),
                                 "nlive",
                                 scale=self.nlive,
                                 dtype=int).value)
     # Fill the automatic ones
     if getattr(self, "feedback", None) is None:
         values = {
             logging.CRITICAL: 0,
             logging.ERROR: 0,
             logging.WARNING: 0,
             logging.INFO: 1,
             logging.DEBUG: 2
         }
         self.feedback = values[self.log.getEffectiveLevel()]
     # Prepare output folders and prefixes
     if self.output:
         self.file_root = self.output.prefix
         self.read_resume = self.output.is_resuming()
     else:
         output_prefix = share_mpi(
             hex(int(random() * 16**6))[2:] if is_main_process() else None)
         self.file_root = output_prefix
         # dummy output -- no resume!
         self.read_resume = False
     self.base_dir = self.get_base_dir(self.output)
     self.raw_clusters_dir = os.path.join(self.base_dir, self._clusters_dir)
     self.output.create_folder(self.base_dir)
     if self.do_clustering:
         self.clusters_folder = self.get_clusters_dir(self.output)
         self.output.create_folder(self.clusters_folder)
     self.mpi_info("Storing raw PolyChord output in '%s'.", self.base_dir)
     # Exploiting the speed hierarchy
     if self.blocking:
         blocks, oversampling_factors = self.model.check_blocking(
             self.blocking)
     else:
         if self.measure_speeds:
             self.model.measure_and_set_speeds(n=self.measure_speeds)
         blocks, oversampling_factors = self.model.get_param_blocking_for_sampler(
             oversample_power=self.oversample_power)
     self.mpi_info("Parameter blocks and their oversampling factors:")
     max_width = len(str(max(oversampling_factors)))
     for f, b in zip(oversampling_factors, blocks):
         self.mpi_info("* %" + "%d" % max_width + "d : %r", f, b)
     # Save blocking in updated info, in case we want to resume
     self._updated_info["blocking"] = list(zip(oversampling_factors,
                                               blocks))
     blocks_flat = list(chain(*blocks))
     self.ordering = [
         blocks_flat.index(p)
         for p in self.model.parameterization.sampled_params()
     ]
     self.grade_dims = [len(block) for block in blocks]
     # Steps per block
     # NB: num_repeats is ignored by PolyChord when int "grade_frac" given,
     # so needs to be applied by hand.
     # In num_repeats, `d` is interpreted as dimension of each block
     self.grade_frac = [
         int(o * read_dnumber(self.num_repeats, dim_block))
         for o, dim_block in zip(oversampling_factors, self.grade_dims)
     ]
     # Assign settings
     pc_args = [
         "nlive", "num_repeats", "nprior", "do_clustering",
         "precision_criterion", "max_ndead", "boost_posterior", "feedback",
         "logzero", "posteriors", "equals", "compression_factor",
         "cluster_posteriors", "write_resume", "read_resume", "write_stats",
         "write_live", "write_dead", "base_dir", "grade_frac", "grade_dims",
         "feedback", "read_resume", "base_dir", "file_root", "grade_frac",
         "grade_dims"
     ]
     # As stated above, num_repeats is ignored, so let's not pass it
     pc_args.pop(pc_args.index("num_repeats"))
     self.pc_settings = PolyChordSettings(
         self.nDims,
         self.nDerived,
         seed=(self.seed if self.seed is not None else -1),
         **{
             p: getattr(self, p)
             for p in pc_args if getattr(self, p) is not None
         })
     # prior conversion from the hypercube
     bounds = self.model.prior.bounds(
         confidence_for_unbounded=self.confidence_for_unbounded)
     # Check if priors are bounded (nan's to inf)
     inf = np.where(np.isinf(bounds))
     if len(inf[0]):
         params_names = self.model.parameterization.sampled_params()
         params = [params_names[i] for i in sorted(list(set(inf[0])))]
         raise LoggedError(
             self.log,
             "PolyChord needs bounded priors, but the parameter(s) '"
             "', '".join(params) + "' is(are) unbounded.")
     locs = bounds[:, 0]
     scales = bounds[:, 1] - bounds[:, 0]
     # This function re-scales the parameters AND puts them in the right order
     self.pc_prior = lambda x: (locs + np.array(x)[self.ordering] * scales
                                ).tolist()
     # We will need the volume of the prior domain, since PolyChord divides by it
     self.logvolume = np.log(np.prod(scales))
     # Prepare callback function
     if self.callback_function is not None:
         self.callback_function_callable = (get_external_function(
             self.callback_function))
     self.last_point_callback = 0
     # Prepare runtime live and dead points collections
     self.live = Collection(self.model,
                            None,
                            name="live",
                            initial_size=self.pc_settings.nlive)
     self.dead = Collection(self.model, self.output, name="dead")
     # Done!
     if is_main_process():
         self.log.debug("Calling PolyChord with arguments:")
         for p, v in inspect.getmembers(self.pc_settings,
                                        lambda a: not (callable(a))):
             if not p.startswith("_"):
                 self.log.debug("  %s: %s", p, v)
     self.mpi_info("Initialized!")

Пример #10

Показать файл

def post(info, sample=None):
    logger_setup(info.get(_debug), info.get(_debug_file))
    log = logging.getLogger(__name__.split(".")[-1])
    # MARKED FOR DEPRECATION IN v3.0
    # BEHAVIOUR TO BE REPLACED BY ERROR:
    check_deprecated_modules_path(info)
    # END OF DEPRECATION BLOCK
    try:
        info_post = info[_post]
    except KeyError:
        raise LoggedError(log, "No 'post' block given. Nothing to do!")
    if get_mpi_rank():
        log.warning(
            "Post-processing is not yet MPI-aware. Doing nothing for rank > 1 processes.")
        return
    if info.get(_resume):
        log.warning("Resuming not implemented for post-processing. Re-starting.")
    # 1. Load existing sample
    output_in = get_output(output_prefix=info.get(_output_prefix))
    if output_in:
        try:
            info_in = output_in.reload_updated_info()
        except FileNotFoundError:
            raise LoggedError(log, "Error loading input model: "
                                   "could not find input info at %s",
                              output_in.file_updated)
    else:
        info_in = deepcopy_where_possible(info)
    dummy_model_in = DummyModel(info_in[_params], info_in[kinds.likelihood],
                                info_in.get(_prior, None))
    if output_in:
        if not output_in.find_collections():
            raise LoggedError(log, "No samples found for the input model with prefix %s",
                              os.path.join(output_in.folder, output_in.prefix))
        collection_in = output_in.load_collections(
            dummy_model_in, skip=info_post.get("skip", 0), thin=info_post.get("thin", 1),
            concatenate=True)
    elif sample:
        if isinstance(sample, Collection):
            sample = [sample]
        collection_in = deepcopy(sample[0])
        for s in sample[1:]:
            try:
                collection_in.append(s)
            except:
                raise LoggedError(log, "Failed to load some of the input samples.")
    else:
        raise LoggedError(log,
                          "Not output from where to load from or input collections given.")
    log.info("Will process %d samples.", len(collection_in))
    if len(collection_in) <= 1:
        raise LoggedError(
            log, "Not enough samples for post-processing. Try using a larger sample, "
                 "or skipping or thinning less.")
    # 2. Compare old and new info: determine what to do
    add = info_post.get(_post_add, {}) or {}
    remove = info_post.get(_post_remove, {})
    # Add a dummy 'one' likelihood, to absorb unused parameters
    if not add.get(kinds.likelihood):
        add[kinds.likelihood] = {}
    add[kinds.likelihood]["one"] = None
    # Expand the "add" info
    add = update_info(add)
    # 2.1 Adding/removing derived parameters and changes in priors of sampled parameters
    out = {_params: deepcopy_where_possible(info_in[_params])}
    for p in remove.get(_params, {}):
        pinfo = info_in[_params].get(p)
        if pinfo is None or not is_derived_param(pinfo):
            raise LoggedError(
                log,
                "You tried to remove parameter '%s', which is not a derived parameter. "
                "Only derived parameters can be removed during post-processing.", p)
        out[_params].pop(p)
    # Force recomputation of aggregated chi2
    for p in list(out[_params]):
        if p.startswith(_get_chi2_name("")):
            out[_params].pop(p)
    mlprior_names_add = []
    for p, pinfo in add.get(_params, {}).items():
        pinfo_in = info_in[_params].get(p)
        if is_sampled_param(pinfo):
            if not is_sampled_param(pinfo_in):
                # No added sampled parameters (de-marginalisation not implemented)
                if pinfo_in is None:
                    raise LoggedError(
                        log, "You added a new sampled parameter %r (maybe accidentally "
                             "by adding a new likelihood that depends on it). "
                             "Adding new sampled parameters is not possible. Try fixing "
                             "it to some value.", p)
                else:
                    raise LoggedError(
                        log,
                        "You tried to change the prior of parameter '%s', "
                        "but it was not a sampled parameter. "
                        "To change that prior, you need to define as an external one.", p)
            if mlprior_names_add[:1] != _prior_1d_name:
                mlprior_names_add = ([_minuslogprior + _separator + _prior_1d_name]
                                     + mlprior_names_add)
        elif is_derived_param(pinfo):
            if p in out[_params]:
                raise LoggedError(
                    log, "You tried to add derived parameter '%s', which is already "
                         "present. To force its recomputation, 'remove' it too.", p)
        elif is_fixed_param(pinfo):
            # Only one possibility left "fixed" parameter that was not present before:
            # input of new likelihood, or just an argument for dynamical derived (dropped)
            if ((p in info_in[_params] and
                 pinfo[partag.value] != (pinfo_in or {}).get(partag.value, None))):
                raise LoggedError(
                    log,
                    "You tried to add a fixed parameter '%s: %r' that was already present"
                    " but had a different value or was not fixed. This is not allowed. "
                    "The old info of the parameter was '%s: %r'",
                    p, dict(pinfo), p, dict(pinfo_in))
        else:
            raise LoggedError(log, "This should not happen. Contact the developers.")
        out[_params][p] = pinfo
    # For the likelihood only, turn the rest of *derived* parameters into constants,
    # so that the likelihoods do not try to compute them)
    # But be careful to exclude *input* params that have a "derived: True" value
    # (which in "updated info" turns into "derived: 'lambda [x]: [x]'")
    out_params_like = deepcopy_where_possible(out[_params])
    for p, pinfo in out_params_like.items():
        if ((is_derived_param(pinfo) and not (partag.value in pinfo)
             and p not in add.get(_params, {}))):
            out_params_like[p] = {partag.value: np.nan, partag.drop: True}
    # 2.2 Manage adding/removing priors and likelihoods
    warn_remove = False
    for level in [_prior, kinds.likelihood]:
        out[level] = getattr(dummy_model_in, level)
        if level == _prior:
            out[level].remove(_prior_1d_name)
        for pdf in info_post.get(_post_remove, {}).get(level, []) or []:
            try:
                out[level].remove(pdf)
                warn_remove = True
            except ValueError:
                raise LoggedError(
                    log, "Trying to remove %s '%s', but it is not present. "
                         "Existing ones: %r", level, pdf, out[level])
    if warn_remove:
        log.warning("You are removing a prior or likelihood pdf. "
                    "Notice that if the resulting posterior is much wider "
                    "than the original one, or displaced enough, "
                    "it is probably safer to explore it directly.")
    if _prior in add:
        mlprior_names_add += [_minuslogprior + _separator + name for name in add[_prior]]
        out[_prior] += list(add[_prior])
    prior_recompute_1d = (
            mlprior_names_add[:1] == [_minuslogprior + _separator + _prior_1d_name])
    # Don't initialise the theory code if not adding/recomputing theory,
    # theory-derived params or likelihoods
    recompute_theory = info_in.get(kinds.theory) and not (
            list(add[kinds.likelihood]) == ["one"] and
            not any(is_derived_param(pinfo) for pinfo in add.get(_params, {}).values()))
    if recompute_theory:
        # Inherit from the original chain (needs input|output_params, renames, etc
        add_theory = add.get(kinds.theory)
        if add_theory:
            info_theory_out = {}
            if len(add_theory) > 1:
                log.warning('Importance sampling with more than one theory is '
                            'not really tested')
            add_theory = add_theory.copy()
            for theory, theory_info in info_in[kinds.theory].items():
                theory_copy = deepcopy_where_possible(theory_info)
                if theory in add_theory:
                    info_theory_out[theory] = \
                        recursive_update(theory_copy, add_theory.pop(theory))
                else:
                    info_theory_out[theory] = theory_copy
            info_theory_out.update(add_theory)
        else:
            info_theory_out = deepcopy_where_possible(info_in[kinds.theory])
    else:
        info_theory_out = None
    chi2_names_add = [
        _get_chi2_name(name) for name in add[kinds.likelihood] if name != "one"]
    out[kinds.likelihood] += [l for l in add[kinds.likelihood] if l != "one"]
    if recompute_theory:
        log.warning("You are recomputing the theory, but in the current version this does"
                    " not force recomputation of any likelihood or derived parameter, "
                    "unless explicitly removed+added.")
    for level in [_prior, kinds.likelihood]:
        for i, x_i in enumerate(out[level]):
            if x_i in list(out[level])[i + 1:]:
                raise LoggedError(
                    log, "You have added %s '%s', which was already present. If you "
                         "want to force its recomputation, you must also 'remove' it.",
                    level, x_i)
    # 3. Create output collection
    if _post_suffix not in info_post:
        raise LoggedError(log, "You need to provide a '%s' for your chains.",
                          _post_suffix)
    # Use default prefix if it exists. If it does not, produce no output by default.
    # {post: {output: None}} suppresses output, and if it's a string, updates it.
    out_prefix = info_post.get(_output_prefix, info.get(_output_prefix))
    if out_prefix not in [None, False]:
        out_prefix += _separator_files + _post + _separator_files + info_post[
            _post_suffix]
    output_out = get_output(output_prefix=out_prefix, force=info.get(_force))
    if output_out and not output_out.force and output_out.find_collections():
        raise LoggedError(log, "Found existing post-processing output with prefix %r. "
                               "Delete it manually or re-run with `force: True` "
                               "(or `-f`, `--force` from the shell).", out_prefix)
    elif output_out and output_out.force:
        output_out.delete_infos()
        for regexp in output_out.find_collections():
            output_out.delete_with_regexp(re.compile(regexp))
    info_out = deepcopy_where_possible(info)
    info_out[_post] = info_post
    # Updated with input info and extended (updated) add info
    info_out.update(info_in)
    info_out[_post][_post_add] = add
    dummy_model_out = DummyModel(out[_params], out[kinds.likelihood],
                                 info_prior=out[_prior])
    if recompute_theory:
        # TODO: May need updating for more than one, or maybe can be removed
        theory = list(info_theory_out)[0]
        if _input_params not in info_theory_out[theory]:
            raise LoggedError(
                log,
                "You appear to be post-processing a chain generated with an older "
                "version of Cobaya. For post-processing to work, please edit the "
                "'[root].updated.yaml' file of the original chain to add, inside the "
                "theory code block, the list of its input parameters. E.g.\n----\n"
                "theory:\n  %s:\n    input_params: [param1, param2, ...]\n"
                "----\nIf you get strange errors later, it is likely that you did not "
                "specify the correct set of theory parameters.\n"
                "The full set of input parameters are %s.",
                theory, list(dummy_model_out.parameterization.input_params()))
    # TODO: check allow_renames=False?
    # TODO: May well be simplifications here, this is v close to pre-refactor logic
    # Have not gone through or understood all the parameterization  stuff
    model_add = Model(out_params_like, add[kinds.likelihood], info_prior=add.get(_prior),
                      info_theory=info_theory_out, packages_path=info.get(_packages_path),
                      allow_renames=False, post=True,
                      prior_parameterization=dummy_model_out.parameterization)
    # Remove auxiliary "one" before dumping -- 'add' *is* info_out[_post][_post_add]
    add[kinds.likelihood].pop("one")
    collection_out = Collection(dummy_model_out, output_out, name="1")
    output_out.check_and_dump_info(None, info_out, check_compatible=False)
    # Prepare recomputation of aggregated chi2
    # (they need to be recomputed by hand, because its autocomputation won't pick up
    #  old likelihoods for a given type)
    all_types = {
        like: str_to_list(add[kinds.likelihood].get(
            like, info_in[kinds.likelihood].get(like)).get("type", []) or [])
        for like in out[kinds.likelihood]}
    types = set(chain(*list(all_types.values())))
    inv_types = {t: [like for like, like_types in all_types.items() if t in like_types]
                 for t in types}
    # 4. Main loop!
    log.info("Running post-processing...")
    last_percent = 0
    for i, point in collection_in.data.iterrows():
        log.debug("Point: %r", point)
        sampled = [point[param] for param in
                   dummy_model_in.parameterization.sampled_params()]
        derived = {param: point.get(param, None)
                   for param in dummy_model_out.parameterization.derived_params()}
        inputs = {param: point.get(
            param, dummy_model_in.parameterization.constant_params().get(
                param, dummy_model_out.parameterization.constant_params().get(
                    param, None)))
            for param in dummy_model_out.parameterization.input_params()}
        # Solve inputs that depend on a function and were not saved
        # (we don't use the Parameterization_to_input method in case there are references
        #  to functions that cannot be loaded at the moment)
        for p, value in inputs.items():
            if value is None:
                func = dummy_model_out.parameterization._input_funcs[p]
                args = dummy_model_out.parameterization._input_args[p]
                inputs[p] = func(*[point.get(arg) for arg in args])
        # Add/remove priors
        priors_add = model_add.prior.logps(sampled)
        if not prior_recompute_1d:
            priors_add = priors_add[1:]
        logpriors_add = dict(zip(mlprior_names_add, priors_add))
        logpriors_new = [logpriors_add.get(name, - point.get(name, 0))
                         for name in collection_out.minuslogprior_names]
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug(
                "New set of priors: %r", dict(zip(dummy_model_out.prior, logpriors_new)))
        if -np.inf in logpriors_new:
            continue
        # Add/remove likelihoods
        output_like = []
        if add[kinds.likelihood]:
            # Notice "one" (last in likelihood_add) is ignored: not in chi2_names
            loglikes_add, output_like = model_add.logps(inputs, return_derived=True)
            loglikes_add = dict(zip(chi2_names_add, loglikes_add))
            output_like = dict(zip(model_add.output_params, output_like))
        else:
            loglikes_add = dict()
        loglikes_new = [loglikes_add.get(name, -0.5 * point.get(name, 0))
                        for name in collection_out.chi2_names]
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug(
                "New set of likelihoods: %r",
                dict(zip(dummy_model_out.likelihood, loglikes_new)))
            if output_like:
                log.debug("New set of likelihood-derived parameters: %r", output_like)
        if -np.inf in loglikes_new:
            continue
        # Add/remove derived parameters and change priors of sampled parameters
        for p in add[_params]:
            if p in dummy_model_out.parameterization._directly_output:
                derived[p] = output_like[p]
            elif p in dummy_model_out.parameterization._derived_funcs:
                func = dummy_model_out.parameterization._derived_funcs[p]
                args = dummy_model_out.parameterization._derived_args[p]
                derived[p] = func(
                    *[point.get(arg, output_like.get(arg, None)) for arg in args])
        # We need to recompute the aggregated chi2 by hand
        for type_, likes in inv_types.items():
            derived[_get_chi2_name(type_)] = sum(
                [-2 * lvalue for lname, lvalue
                 in zip(collection_out.chi2_names, loglikes_new)
                 if _undo_chi2_name(lname) in likes])
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug("New derived parameters: %r",
                      dict([(p, derived[p])
                            for p in dummy_model_out.parameterization.derived_params()
                            if p in add[_params]]))
        # Save to the collection (keep old weight for now)
        collection_out.add(
            sampled, derived=derived.values(), weight=point.get(_weight),
            logpriors=logpriors_new, loglikes=loglikes_new)
        # Display progress
        percent = np.round(i / len(collection_in) * 100)
        if percent != last_percent and not percent % 5:
            last_percent = percent
            progress_bar(log, percent, " (%d/%d)" % (i, len(collection_in)))
    if not collection_out.data.last_valid_index():
        raise LoggedError(
            log, "No elements in the final sample. Possible causes: "
                 "added a prior or likelihood valued zero over the full sampled domain, "
                 "or the computation of the theory failed everywhere, etc.")
    # Reweight -- account for large dynamic range!
    #   Prefer to rescale +inf to finite, and ignore final points with -inf.
    #   Remove -inf's (0-weight), and correct indices
    difflogmax = max(collection_in[_minuslogpost] - collection_out[_minuslogpost])
    collection_out.data[_weight] *= np.exp(
        collection_in[_minuslogpost] - collection_out[_minuslogpost] - difflogmax)
    collection_out.data = (
        collection_out.data[collection_out.data.weight > 0].reset_index(drop=True))
    collection_out._n = collection_out.data.last_valid_index() + 1
    # Write!
    collection_out.out_update()
    log.info("Finished! Final number of samples: %d", len(collection_out))
    return info_out, {"sample": collection_out}

Пример #11

Показать файл

    def initialize(self):
        self.mpi_info("Initializing")
        self.max_evals = read_dnumber(self.max_evals, self.model.prior.d())
        # Configure target
        method = self.model.loglike if self.ignore_prior else self.model.logpost
        kwargs = {"make_finite": True}
        if self.ignore_prior:
            kwargs["return_derived"] = False
        self.logp = lambda x: method(x, **kwargs)
        # Try to load info from previous samples.
        # If none, sample from reference (make sure that it has finite like/post)
        initial_point = None
        if self.output:
            files = self.output.find_collections()
            collection_in = None
            if files:
                if more_than_one_process():
                    if 1 + get_mpi_rank() <= len(files):
                        collection_in = Collection(self.model,
                                                   self.output,
                                                   name=str(1 +
                                                            get_mpi_rank()),
                                                   resuming=True)
                else:
                    collection_in = self.output.load_collections(
                        self.model, concatenate=True)
            if collection_in:
                initial_point = (collection_in.bestfit()
                                 if self.ignore_prior else collection_in.MAP())
                initial_point = initial_point[list(
                    self.model.parameterization.sampled_params())].values
                self.log.info("Starting from %s of previous chain:",
                              "best fit" if self.ignore_prior else "MAP")
        if initial_point is None:
            this_logp = -np.inf
            while not np.isfinite(this_logp):
                initial_point = self.model.prior.reference()
                this_logp = self.logp(initial_point)
            self.log.info("Starting from random initial point:")
        self.log.info(
            dict(
                zip(self.model.parameterization.sampled_params(),
                    initial_point)))

        self._bounds = self.model.prior.bounds(
            confidence_for_unbounded=self.confidence_for_unbounded)

        # TODO: if ignore_prior, one should use *like* covariance (this is *post*)
        covmat = self._load_covmat(self.output)[0]

        # scale by conditional parameter widths (since not using correlation structure)
        scales = np.minimum(1 / np.sqrt(np.diag(np.linalg.inv(covmat))),
                            (self._bounds[:, 1] - self._bounds[:, 0]) / 3)

        # Cov and affine transformation
        # Transform to space where initial point is at centre, and cov is normalised
        # Cannot do rotation, as supported minimization routines assume bounds aligned
        # with the parameter axes.
        self._affine_transform_matrix = np.diag(1 / scales)
        self._inv_affine_transform_matrix = np.diag(scales)
        self._scales = scales
        self._affine_transform_baseline = initial_point
        initial_point = self.affine_transform(initial_point)
        np.testing.assert_allclose(initial_point,
                                   np.zeros(initial_point.shape))
        bounds = np.array(
            [self.affine_transform(self._bounds[:, i]) for i in range(2)]).T
        # Configure method
        if self.method.lower() == "bobyqa":
            self.minimizer = pybobyqa.solve
            self.kwargs = {
                "objfun": (lambda x: -self.logp_transf(x)),
                "x0":
                initial_point,
                "bounds":
                np.array(list(zip(*bounds))),
                "seek_global_minimum":
                (True if get_mpi_size() in [0, 1] else False),
                "maxfun":
                int(self.max_evals)
            }
            self.kwargs = recursive_update(deepcopy(self.kwargs),
                                           self.override_bobyqa or {})
            self.log.debug(
                "Arguments for pybobyqa.solve:\n%r",
                {k: v
                 for k, v in self.kwargs.items() if k != "objfun"})
        elif self.method.lower() == "scipy":
            self.minimizer = scpminimize
            self.kwargs = {
                "fun": (lambda x: -self.logp_transf(x)),
                "x0": initial_point,
                "bounds": bounds,
                "options": {
                    "maxiter": self.max_evals,
                    "disp": (self.log.getEffectiveLevel() == logging.DEBUG)
                }
            }
            self.kwargs = recursive_update(deepcopy(self.kwargs),
                                           self.override_scipy or {})
            self.log.debug(
                "Arguments for scipy.optimize.minimize:\n%r",
                {k: v
                 for k, v in self.kwargs.items() if k != "fun"})
        else:
            methods = ["bobyqa", "scipy"]
            raise LoggedError(self.log,
                              "Method '%s' not recognized. Try one of %r.",
                              self.method, methods)

Пример #12

Показать файл

 def initialize(self):
     """Initializes the sampler:
     creates the proposal distribution and draws the initial sample."""
     self.log.debug("Initializing")
     for p in [
             "burn_in", "max_tries", "output_every", "check_every",
             "callback_every"
     ]:
         setattr(
             self, p,
             read_dnumber(getattr(self, p), self.model.prior.d(),
                          dtype=int))
     if self.callback_every is None:
         self.callback_every = self.check_every
     # Burning-in countdown -- the +1 accounts for the initial point (always accepted)
     self.burn_in_left = self.burn_in + 1
     # Max # checkpoints to wait, in case one process dies without sending MPI_ABORT
     self.been_waiting = 0
     self.max_waiting = max(50, self.max_tries / self.model.prior.d())
     if self.resuming and (max(self.mpi_size or 0, 1) != max(
             get_mpi_size(), 1)):
         self.log.error(
             "Cannot resume a sample with a different number of chains: "
             "was %d and now is %d.", max(self.mpi_size, 1),
             max(get_mpi_size(), 1))
         raise HandledException
     if not self.resuming and self.output:
         # Delete previous files (if not "forced", the run would have already failed)
         if ((os.path.abspath(self.covmat_filename()) != os.path.abspath(
                 str(self.covmat)))):
             try:
                 os.remove(self.covmat_filename())
             except OSError:
                 pass
         # There may be more that chains than expected,
         # if #ranks was bigger in a previous run
         i = 0
         while True:
             i += 1
             collection_filename, _ = self.output.prepare_collection(str(i))
             try:
                 os.remove(collection_filename)
             except OSError:
                 break
     # One collection per MPI process: `name` is the MPI rank + 1
     name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank()))
     self.collection = Collection(self.model,
                                  self.output,
                                  name=name,
                                  resuming=self.resuming)
     self.current_point = OnePoint(self.model, OutputDummy({}), name=name)
     # Use standard MH steps by default
     self.get_new_sample = self.get_new_sample_metropolis
     # Prepare oversampling / dragging if applicable
     self.effective_max_samples = self.max_samples
     if self.oversample and self.drag:
         self.log.error("Choose either oversampling or dragging, not both.")
         raise HandledException
     if self.oversample:
         factors, blocks = self.model.likelihood._speeds_of_params(
             int_speeds=True)
         self.oversampling_factors = factors
         self.log.info("Oversampling with factors:\n" + "\n".join([
             "   %d : %r" % (f, b)
             for f, b in zip(self.oversampling_factors, blocks)
         ]))
         self.i_last_slow_block = None
         # No way right now to separate slow and fast
         slow_params = list(self.model.parameterization.sampled_params())
     elif self.drag:
         speeds, blocks = self.model.likelihood._speeds_of_params(
             fast_slow=True, int_speeds=True)
         # For now, no blocking inside either fast or slow: just 2 blocks
         self.i_last_slow_block = 0
         if np.all(speeds == speeds[0]):
             self.log.error(
                 "All speeds are equal or too similar: cannot drag! "
                 "Make sure to define accurate likelihoods' speeds.")
             raise HandledException
         # Make the 1st factor 1:
         speeds = [1, speeds[1] / speeds[0]]
         # Target: dragging step taking as long as slow step
         self.drag_interp_steps = self.drag * speeds[1]
         # Per dragging step, the (fast) posterior is evaluated *twice*,
         self.drag_interp_steps /= 2
         self.drag_interp_steps = int(np.round(self.drag_interp_steps))
         fast_params = list(chain(*blocks[1 + self.i_last_slow_block:]))
         # Not too much or too little dragging
         drag_limits = [(int(l) * len(fast_params) if l is not None else l)
                        for l in self.drag_limits]
         if drag_limits[
                 0] is not None and self.drag_interp_steps < drag_limits[0]:
             self.log.warning(
                 "Number of dragging steps clipped from below: was not "
                 "enough to efficiently explore the fast directions -- "
                 "avoid this limit by decreasing 'drag_limits[0]'.")
             self.drag_interp_steps = drag_limits[0]
         if drag_limits[
                 1] is not None and self.drag_interp_steps > drag_limits[1]:
             self.log.warning(
                 "Number of dragging steps clipped from above: "
                 "excessive, probably inefficient, exploration of the "
                 "fast directions -- "
                 "avoid this limit by increasing 'drag_limits[1]'.")
             self.drag_interp_steps = drag_limits[1]
         # Re-scale steps between checkpoint and callback to the slow dimensions only
         slow_params = list(chain(*blocks[:1 + self.i_last_slow_block]))
         self.n_slow = len(slow_params)
         for p in ["check_every", "callback_every"]:
             setattr(
                 self, p,
                 int(getattr(self, p) * self.n_slow / self.model.prior.d()))
         self.log.info("Dragging with oversampling per step:\n" +
                       "\n".join([
                           "   %d : %r" % (f, b)
                           for f, b in zip([1, self.drag_interp_steps],
                                           [blocks[0], fast_params])
                       ]))
         self.get_new_sample = self.get_new_sample_dragging
     else:
         _, blocks = self.model.likelihood._speeds_of_params()
         self.oversampling_factors = [1 for b in blocks]
         slow_params = list(self.model.parameterization.sampled_params())
         self.n_slow = len(slow_params)
     # Turn parameter names into indices
     self.blocks = [[
         list(self.model.parameterization.sampled_params()).index(p)
         for p in b
     ] for b in blocks]
     self.proposer = BlockedProposer(
         self.blocks,
         oversampling_factors=self.oversampling_factors,
         i_last_slow_block=self.i_last_slow_block,
         proposal_scale=self.proposal_scale)
     # Build the initial covariance matrix of the proposal, or load from checkpoint
     if self.resuming:
         covmat = np.loadtxt(self.covmat_filename())
         self.log.info("Covariance matrix from checkpoint.")
     else:
         covmat = self.initial_proposal_covmat(slow_params=slow_params)
         self.log.info("Initial covariance matrix.")
     self.log.debug(
         "Sampling with covmat:\n%s",
         DataFrame(
             covmat,
             columns=self.model.parameterization.sampled_params(),
             index=self.model.parameterization.sampled_params()).to_string(
                 line_width=_line_width))
     self.proposer.set_covariance(covmat)
     # Prepare callback function
     if self.callback_function is not None:
         self.callback_function_callable = (get_external_function(
             self.callback_function))

Пример #13

Показать файл

 def initialize(self):
     """Initializes the sampler:
     creates the proposal distribution and draws the initial sample."""
     if not self.model.prior.d():
         raise LoggedError(self.log,
                           "No parameters being varied for sampler")
     self.log.debug("Initializing")
     # MARKED FOR DEPRECATION IN v3.0
     if getattr(self, "oversample", None) is not None:
         self.log.warning(
             "*DEPRECATION*: `oversample` will be deprecated in the "
             "next version. Oversampling is now requested by setting "
             "`oversample_power` > 0.")
     # END OF DEPRECATION BLOCK
     # MARKED FOR DEPRECATION IN v3.0
     if getattr(self, "check_every", None) is not None:
         self.log.warning(
             "*DEPRECATION*: `check_every` will be deprecated in the "
             "next version. Please use `learn_every` instead.")
         # BEHAVIOUR TO BE REPLACED BY ERROR:
         self.learn_every = getattr(self, "check_every")
     # END OF DEPRECATION BLOCK
     if self.callback_every is None:
         self.callback_every = self.learn_every
     self._quants_d_units = []
     for q in ["max_tries", "learn_every", "callback_every", "burn_in"]:
         number = NumberWithUnits(getattr(self, q), "d", dtype=int)
         self._quants_d_units.append(number)
         setattr(self, q, number)
     self.output_every = NumberWithUnits(self.output_every, "s", dtype=int)
     if is_main_process():
         if self.output.is_resuming() and (max(self.mpi_size or 0, 1) !=
                                           max(get_mpi_size(), 1)):
             raise LoggedError(
                 self.log,
                 "Cannot resume a run with a different number of chains: "
                 "was %d and now is %d.", max(self.mpi_size, 1),
                 max(get_mpi_size(), 1))
         if more_than_one_process():
             if get_mpi().Get_version()[0] < 3:
                 raise LoggedError(
                     self.log, "MPI use requires MPI version 3.0 or "
                     "higher to support IALLGATHER.")
     sync_processes()
     # One collection per MPI process: `name` is the MPI rank + 1
     name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank()))
     self.collection = Collection(self.model,
                                  self.output,
                                  name=name,
                                  resuming=self.output.is_resuming())
     self.current_point = OneSamplePoint(self.model)
     # Use standard MH steps by default
     self.get_new_sample = self.get_new_sample_metropolis
     # Prepare callback function
     if self.callback_function is not None:
         self.callback_function_callable = (get_external_function(
             self.callback_function))
     # Useful for getting last points added inside callback function
     self.last_point_callback = 0
     # Monitoring/restore progress
     if is_main_process():
         cols = [
             "N", "timestamp", "acceptance_rate", "Rminus1", "Rminus1_cl"
         ]
         self.progress = DataFrame(columns=cols)
         self.i_learn = 1
         if self.output and not self.output.is_resuming():
             with open(self.progress_filename(), "w",
                       encoding="utf-8") as progress_file:
                 progress_file.write("# " +
                                     " ".join(self.progress.columns) + "\n")
     # Get first point, to be discarded -- not possible to determine its weight
     # Still, we need to compute derived parameters, since, as the proposal "blocked",
     # we may be saving the initial state of some block.
     # NB: if resuming but nothing was written (burn-in not finished): re-start
     if self.output.is_resuming() and len(self.collection):
         initial_point = (self.collection[
             self.collection.sampled_params].iloc[len(self.collection) -
                                                  1]).values.copy()
         logpost = -(self.collection[_minuslogpost].iloc[
             len(self.collection) - 1].copy())
         logpriors = -(self.collection[self.collection.minuslogprior_names].
                       iloc[len(self.collection) - 1].copy())
         loglikes = -0.5 * (self.collection[self.collection.chi2_names].
                            iloc[len(self.collection) - 1].copy())
         derived = (self.collection[self.collection.derived_params].iloc[
             len(self.collection) - 1].values.copy())
     else:
         # NB: max_tries adjusted to dim instead of #cycles (blocking not computed yet)
         self.max_tries.set_scale(self.model.prior.d())
         self.log.info(
             "Getting initial point... (this may take a few seconds)")
         initial_point, logpost, logpriors, loglikes, derived = \
             self.model.get_valid_point(max_tries=self.max_tries.value)
         # If resuming but no existing chain, assume failed run and ignore blocking
         # if speeds measurement requested
         if self.output.is_resuming() and not len(self.collection) \
            and self.measure_speeds:
             self.blocking = None
         if self.measure_speeds and self.blocking:
             self.log.warning(
                 "Parameter blocking manually fixed: speeds will not be measured."
             )
         elif self.measure_speeds:
             n = None if self.measure_speeds is True else int(
                 self.measure_speeds)
             self.model.measure_and_set_speeds(n=n, discard=0)
     self.set_proposer_blocking()
     self.set_proposer_covmat(load=True)
     self.current_point.add(initial_point,
                            derived=derived,
                            logpost=logpost,
                            logpriors=logpriors,
                            loglikes=loglikes)
     self.log.info("Initial point: %s", self.current_point)
     # Max #(learn+convergence checks) to wait,
     # in case one process dies without sending MPI_ABORT
     self.been_waiting = 0
     self.max_waiting = max(50, self.max_tries.unit_value)
     # Burning-in countdown -- the +1 accounts for the initial point (always accepted)
     self.burn_in_left = self.burn_in.value * self.current_point.output_thin + 1
     # Initial dummy checkpoint
     # (needed when 1st "learn point" not reached in prev. run)
     self.write_checkpoint()

Пример #14

Показать файл

Файл: polychord.py Проект: rancesol/cobaya

 def initialize(self):
     """Imports the PolyChord sampler and prepares its arguments."""
     if am_single_or_primary_process(
     ):  # rank = 0 (MPI master) or None (no MPI)
         self.log.info("Initializing")
     # If path not given, try using general path to modules
     if not self.path and self.path_install:
         self.path = get_path(self.path_install)
     if self.path:
         if am_single_or_primary_process():
             self.log.info("Importing *local* PolyChord from " + self.path)
             if not os.path.exists(os.path.realpath(self.path)):
                 raise LoggedError(
                     self.log, "The given path does not exist. "
                     "Try installing PolyChord with "
                     "'cobaya-install polychord -m [modules_path]")
         pc_build_path = get_build_path(self.path)
         if not pc_build_path:
             raise LoggedError(
                 self.log, "Either PolyChord is not in the given folder, "
                 "'%s', or you have not compiled it.", self.path)
         # Inserting the previously found path into the list of import folders
         sys.path.insert(0, pc_build_path)
     else:
         self.log.info("Importing *global* PolyChord.")
     try:
         import pypolychord
         from pypolychord.settings import PolyChordSettings
         self.pc = pypolychord
     except ImportError:
         raise LoggedError(
             self.log, "Couldn't find the PolyChord python interface. "
             "Make sure that you have compiled it, and that you either\n"
             " (a) specify a path (you didn't) or\n"
             " (b) install the Python interface globally with\n"
             "     '/path/to/PolyChord/python setup.py install --user'")
     # Prepare arguments and settings
     self.nDims = self.model.prior.d()
     self.nDerived = (len(self.model.parameterization.derived_params()) +
                      len(self.model.prior) +
                      len(self.model.likelihood._likelihoods))
     if self.logzero is None:
         self.logzero = np.nan_to_num(-np.inf)
     if self.max_ndead == np.inf:
         self.max_ndead = -1
     for p in ["nlive", "nprior", "max_ndead"]:
         setattr(self, p,
                 read_dnumber(getattr(self, p), self.nDims, dtype=int))
     # Fill the automatic ones
     if getattr(self, "feedback", None) is None:
         values = {
             logging.CRITICAL: 0,
             logging.ERROR: 0,
             logging.WARNING: 0,
             logging.INFO: 1,
             logging.DEBUG: 2
         }
         self.feedback = values[self.log.getEffectiveLevel()]
     try:
         output_folder = getattr(self.output, "folder")
         output_prefix = getattr(self.output, "prefix") or ""
         self.read_resume = self.resuming
     except AttributeError:
         # dummy output -- no resume!
         self.read_resume = False
         from tempfile import gettempdir
         output_folder = gettempdir()
         if am_single_or_primary_process():
             from random import random
             output_prefix = hex(int(random() * 16**6))[2:]
         else:
             output_prefix = None
         if more_than_one_process():
             output_prefix = get_mpi_comm().bcast(output_prefix, root=0)
     self.base_dir = os.path.join(output_folder, self.base_dir)
     self.file_root = output_prefix
     if am_single_or_primary_process():
         # Creating output folder, if it does not exist (just one process)
         if not os.path.exists(self.base_dir):
             os.makedirs(self.base_dir)
         # Idem, a clusters folder if needed -- notice that PolyChord's default
         # is "True", here "None", hence the funny condition below
         if self.do_clustering is not False:  # None here means "default"
             try:
                 os.makedirs(os.path.join(self.base_dir, clusters))
             except OSError:  # exists!
                 pass
         self.log.info("Storing raw PolyChord output in '%s'.",
                       self.base_dir)
     # Exploiting the speed hierarchy
     if self.blocking:
         speeds, blocks = self.model.likelihood._check_speeds_of_params(
             self.blocking)
     else:
         speeds, blocks = self.model.likelihood._speeds_of_params(
             int_speeds=True)
     blocks_flat = list(chain(*blocks))
     self.ordering = [
         blocks_flat.index(p)
         for p in self.model.parameterization.sampled_params()
     ]
     self.grade_dims = np.array([len(block) for block in blocks])
     # bugfix: pypolychord's C interface for Fortran does not like int numpy types
     self.grade_dims = [int(x) for x in self.grade_dims]
     # Steps per block
     # NB: num_repeats is ignored by PolyChord when int "grade_frac" given,
     # so needs to be applied by hand.
     # Make sure that speeds are integer, and that the slowest is 1,
     # for a straightforward application of num_repeats
     speeds = relative_to_int(speeds, 1)
     # In num_repeats, `d` is interpreted as dimension of each block
     self.grade_frac = [
         int(speed * read_dnumber(self.num_repeats, dim_block))
         for speed, dim_block in zip(speeds, self.grade_dims)
     ]
     # Assign settings
     pc_args = [
         "nlive", "num_repeats", "nprior", "do_clustering",
         "precision_criterion", "max_ndead", "boost_posterior", "feedback",
         "logzero", "posteriors", "equals", "compression_factor",
         "cluster_posteriors", "write_resume", "read_resume", "write_stats",
         "write_live", "write_dead", "base_dir", "grade_frac", "grade_dims",
         "feedback", "read_resume", "base_dir", "file_root", "grade_frac",
         "grade_dims"
     ]
     # As stated above, num_repeats is ignored, so let's not pass it
     pc_args.pop(pc_args.index("num_repeats"))
     self.pc_settings = PolyChordSettings(
         self.nDims,
         self.nDerived,
         seed=(self.seed if self.seed is not None else -1),
         **{
             p: getattr(self, p)
             for p in pc_args if getattr(self, p) is not None
         })
     # prior conversion from the hypercube
     bounds = self.model.prior.bounds(
         confidence_for_unbounded=self.confidence_for_unbounded)
     # Check if priors are bounded (nan's to inf)
     inf = np.where(np.isinf(bounds))
     if len(inf[0]):
         params_names = self.model.parameterization.sampled_params()
         params = [params_names[i] for i in sorted(list(set(inf[0])))]
         raise LoggedError(
             self.log,
             "PolyChord needs bounded priors, but the parameter(s) '"
             "', '".join(params) + "' is(are) unbounded.")
     locs = bounds[:, 0]
     scales = bounds[:, 1] - bounds[:, 0]
     # This function re-scales the parameters AND puts them in the right order
     self.pc_prior = lambda x: (locs + np.array(x)[self.ordering] * scales
                                ).tolist()
     # We will need the volume of the prior domain, since PolyChord divides by it
     self.logvolume = np.log(np.prod(scales))
     # Prepare callback function
     if self.callback_function is not None:
         self.callback_function_callable = (get_external_function(
             self.callback_function))
     self.last_point_callback = 0
     # Prepare runtime live and dead points collections
     self.live = Collection(self.model,
                            None,
                            name="live",
                            initial_size=self.pc_settings.nlive)
     self.dead = Collection(self.model, self.output, name="dead")
     self.n_sampled = len(self.model.parameterization.sampled_params())
     self.n_derived = len(self.model.parameterization.derived_params())
     self.n_priors = len(self.model.prior)
     self.n_likes = len(self.model.likelihood._likelihoods)
     # Done!
     if am_single_or_primary_process():
         self.log.info("Calling PolyChord with arguments:")
         for p, v in inspect.getmembers(self.pc_settings,
                                        lambda a: not (callable(a))):
             if not p.startswith("_"):
                 self.log.info("  %s: %s", p, v)

Пример #15

Показать файл

    def initialise(self):
        """Initialises the sampler:
        creates the proposal distribution and draws the initial sample."""
        self.log.info("Initializing")
        # Burning-in countdown -- the +1 accounts for the initial point (always accepted)
        self.burn_in_left = self.burn_in + 1
        # One collection per MPI process: `name` is the MPI rank + 1
        name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank()))
        self.collection = Collection(self.parametrization,
                                     self.likelihood,
                                     self.output,
                                     name=name)
        self.current_point = OnePoint(self.parametrization,
                                      self.likelihood,
                                      self.output,
                                      name=name)
        # Use the standard steps by default
        self.get_new_sample = self.get_new_sample_metropolis
        # Prepare oversampling / fast-dragging if applicable
        self.effective_max_samples = self.max_samples
        if self.oversample and self.drag:
            self.log.error(
                "Choose either oversampling or fast-dragging, not both.")
            raise HandledException
#        if (self.oversample or self.drag) and len(set(factors)) == 1:
#            self.log.error("All block speeds are similar: "
#                           "no dragging or oversampling possible.")
#            raise HandledException
        if self.oversample:
            factors, blocks = self.likelihood.speeds_of_params(
                oversampling_factors=True)
            self.oversampling_factors = factors
            # WIP: actually, we would have to re-normalise to the dimension of the blocks.
            self.log.info("Oversampling with factors:\n" + "\n".join([
                "   %d : %r" % (f, b)
                for f, b in zip(self.oversampling_factors, blocks)
            ]))
            # WIP: useless until likelihoods have STATES!
            self.log.error("Sorry, oversampling is WIP")
            raise HandledException
        elif self.drag:
            # WIP: for now, can only separate between theory and likelihoods
            # until likelihoods have states
            if not self.likelihood.theory:
                self.log.error(
                    "WIP: dragging disabled for now when no theory code present."
                )
                raise HandledException
#            if self.max_speed_slow < min(speeds) or self.max_speed_slow >= max(speeds):
#                self.log.error("The maximum speed considered slow, `max_speed_slow`, must be "
#                          "%g <= `max_speed_slow < %g, and is %g",
#                          min(speeds), max(speeds), self.max_speed_slow)
#                raise HandledException
            speeds, blocks = self.likelihood.speeds_of_params(int_speeds=True,
                                                              fast_slow=True)
            if np.all(speeds == speeds[0]):
                self.log.error(
                    "All speeds are equal: cannot drag! Make sure to define, "
                    "especially, the speed of the fastest likelihoods.")
            self.i_last_slow_block = 0  # just theory can be slow for now
            fast_params = list(chain(*blocks[1 + self.i_last_slow_block:]))
            self.n_slow = sum(
                len(blocks[i]) for i in range(1 + self.i_last_slow_block))
            self.drag_interp_steps = int(self.drag *
                                         np.round(min(speeds[1:]) / speeds[0]))
            self.log.info("Dragging with oversampling per step:\n" +
                          "\n".join([
                              "   %d : %r" % (f, b)
                              for f, b in zip([1, self.drag_interp_steps],
                                              [blocks[0], fast_params])
                          ]))
            self.get_new_sample = self.get_new_sample_dragging
        else:
            _, blocks = self.likelihood.speeds_of_params()
            self.oversampling_factors = [1 for b in blocks]
            self.n_slow = len(self.parametrization.sampled_params())
        # Turn parameter names into indices
        blocks = [[
            list(self.parametrization.sampled_params().keys()).index(p)
            for p in b
        ] for b in blocks]
        self.proposer = BlockedProposer(
            blocks,
            oversampling_factors=getattr(self, "oversampling_factors", None),
            i_last_slow_block=getattr(self, "i_last_slow_block", None),
            propose_scale=self.propose_scale)
        # Build the initial covariance matrix of the proposal
        covmat = self.initial_proposal_covmat()
        self.log.info("Sampling with covariance matrix:")
        self.log.info("%r", covmat)
        self.proposer.set_covariance(covmat)
        # Prepare callback function
        if self.callback_function is not None:
            self.callback_function_callable = (get_external_function(
                self.callback_function))

Пример #16

Показать файл

def post(info, sample=None):
    logger_setup(info.get(_debug), info.get(_debug_file))
    log = logging.getLogger(__name__.split(".")[-1])
    try:
        info_post = info[_post]
    except KeyError:
        log.error("No 'post' block given. Nothing to do!")
        raise HandledException
    if get_mpi_rank():
        log.warning(
            "Post-processing is not yet MPI-able. Doing nothing for rank > 1 processes."
        )
        return
    # 1. Load existing sample
    output_in = Output(output_prefix=info.get(_output_prefix), resume=True)
    info_in = load_input(output_in.file_full) if output_in else deepcopy(info)
    dummy_model_in = DummyModel(info_in[_params], info_in[_likelihood],
                                info_in.get(_prior, None),
                                info_in.get(_theory, None))
    if output_in:
        i = 0
        while True:
            try:
                collection = Collection(dummy_model_in,
                                        output_in,
                                        name="%d" % (1 + i),
                                        load=True,
                                        onload_skip=info_post.get("skip", 0),
                                        onload_thin=info_post.get("thin", 1))
                if i == 0:
                    collection_in = collection
                else:
                    collection_in._append(collection)
                i += 1
            except IOError:
                break
    elif sample:
        if isinstance(sample, Collection):
            sample = [sample]
        collection_in = deepcopy(sample[0])
        for s in sample[1:]:
            try:
                collection_in._append(s)
            except:
                log.error("Failed to load some of the input samples.")
                raise HandledException
        i = len(sample)
    else:
        log.error(
            "Not output from where to load from or input collections given.")
        raise HandledException
    log.info("Loaded %d chain%s. Will process %d samples.", i,
             "s" if i - 1 else "", collection_in.n())
    if collection_in.n() <= 1:
        log.error(
            "Not enough samples for post-processing. Try using a larger sample, "
            "or skipping or thinning less.")
        raise HandledException
    # 2. Compare old and new info: determine what to do
    add = info_post.get("add", {})
    remove = info_post.get("remove", {})
    # Add a dummy 'one' likelihood, to absorb unused parameters
    if not add.get(_likelihood):
        add[_likelihood] = odict()
    add[_likelihood].update({"one": None})
    # Expand the "add" info
    add = get_full_info(add)
    # 2.1 Adding/removing derived parameters and changes in priors of sampled parameters
    out = {_params: deepcopy(info_in[_params])}
    for p in remove.get(_params, {}):
        pinfo = info_in[_params].get(p)
        if pinfo is None or not is_derived_param(pinfo):
            log.error(
                "You tried to remove parameter '%s', which is not a derived paramter. "
                "Only derived parameters can be removed during post-processing.",
                p)
            raise HandledException
        out[_params].pop(p)
    mlprior_names_add = []
    for p, pinfo in add.get(_params, {}).items():
        pinfo_in = info_in[_params].get(p)
        if is_sampled_param(pinfo):
            if not is_sampled_param(pinfo_in):
                # No added sampled parameters (de-marginalisation not implemented)
                if pinfo_in is None:
                    log.error(
                        "You added a new sampled parameter %r (maybe accidentaly "
                        "by adding a new likelihood that depends on it). "
                        "Adding new sampled parameters is not possible. Try fixing "
                        "it to some value.", p)
                    raise HandledException
                else:
                    log.error(
                        "You tried to change the prior of parameter '%s', "
                        "but it was not a sampled parameter. "
                        "To change that prior, you need to define as an external one.",
                        p)
                    raise HandledException
            if mlprior_names_add[:1] != _prior_1d_name:
                mlprior_names_add = (
                    [_minuslogprior + _separator + _prior_1d_name] +
                    mlprior_names_add)
        elif is_derived_param(pinfo):
            if p in out[_params]:
                log.error(
                    "You tried to add derived parameter '%s', which is already "
                    "present. To force its recomputation, 'remove' it too.", p)
                raise HandledException
        elif is_fixed_param(pinfo):
            # Only one possibility left "fixed" parameter that was not present before:
            # input of new likelihood, or just an argument for dynamical derived (dropped)
            if ((p in info_in[_params] and pinfo[_p_value] !=
                 (pinfo_in or {}).get(_p_value, None))):
                log.error(
                    "You tried to add a fixed parameter '%s: %r' that was already present"
                    " but had a different value or was not fixed. This is not allowed. "
                    "The old info of the parameter was '%s: %r'", p,
                    dict(pinfo), p, dict(pinfo_in))
                raise HandledException
        else:
            log.error("This should not happen. Contact the developers.")
            raise HandledException
        out[_params][p] = pinfo
    # For the likelihood only, turn the rest of *derived* parameters into constants,
    # so that the likelihoods do not try to compute them)
    # But be careful to exclude *input* params that have a "derived: True" value
    # (which in "full info" turns into "derived: 'lambda [x]: [x]'")
    out_params_like = deepcopy(out[_params])
    for p, pinfo in out_params_like.items():
        if ((is_derived_param(pinfo) and not (_p_value in pinfo)
             and p not in add.get(_params, {}))):
            out_params_like[p] = {_p_value: np.nan, _p_drop: True}
    parameterization_like = Parameterization(out_params_like,
                                             ignore_unused_sampled=True)
    # 2.2 Manage adding/removing priors and likelihoods
    warn_remove = False
    for level in [_prior, _likelihood]:
        out[level] = getattr(dummy_model_in, level)
        if level == _prior:
            out[level].remove(_prior_1d_name)
        for pdf in info_post.get("remove", {}).get(level, []) or []:
            try:
                out[level].remove(pdf)
                warn_remove = True
            except ValueError:
                log.error(
                    "Trying to remove %s '%s', but it is not present. "
                    "Existing ones: %r", level, pdf, out[level])
                raise HandledException
    if warn_remove:
        log.warning("You are removing a prior or likelihood pdf. "
                    "Notice that if the resulting posterior is much wider "
                    "than the original one, or displaced enough, "
                    "it is probably safer to explore it directly.")
    if _prior in add:
        mlprior_names_add += [
            _minuslogprior + _separator + name for name in add[_prior]
        ]
        out[_prior] += list(add[_prior])
    prior_recompute_1d = (mlprior_names_add[:1] == [
        _minuslogprior + _separator + _prior_1d_name
    ])
    # Don't initialise the theory code if not adding/recomputing theory,
    # theory-derived params or likelihoods
    recompute_theory = info_in.get(_theory) and not (list(
        add[_likelihood]) == ["one"] and not any([
            is_derived_param(pinfo) for pinfo in add.get(_params, {}).values()
        ]))
    if recompute_theory:
        # Inherit from the original chain (needs input|output_params, renames, etc
        theory = list(info_in[_theory].keys())[0]
        info_theory_out = odict([[
            theory,
            recursive_update(deepcopy(info_in[_theory][theory]),
                             add.get(_theory, {theory: {}})[theory])
        ]])
    else:
        info_theory_out = None
    chi2_names_add = [
        _chi2 + _separator + name for name in add[_likelihood]
        if name is not "one"
    ]
    out[_likelihood] += [l for l in add[_likelihood] if l is not "one"]
    if recompute_theory:
        log.warn(
            "You are recomputing the theory, but in the current version this does "
            "not force recomputation of any likelihood or derived parameter, "
            "unless explicitly removed+added.")
    for level in [_prior, _likelihood]:
        for i, x_i in enumerate(out[level]):
            if x_i in list(out[level])[i + 1:]:
                log.error(
                    "You have added %s '%s', which was already present. If you "
                    "want to force its recomputation, you must also 'remove' it.",
                    level, x_i)
                raise HandledException
    # 3. Create output collection
    if "suffix" not in info_post:
        log.error("You need to provide a 'suffix' for your chains.")
        raise HandledException
    # Use default prefix if it exists. If it does not, produce no output by default.
    # {post: {output: None}} suppresses output, and if it's a string, updates it.
    out_prefix = info_post.get(_output_prefix, info.get(_output_prefix))
    if out_prefix not in [None, False]:
        out_prefix += "_" + _post + "_" + info_post["suffix"]
    output_out = Output(output_prefix=out_prefix,
                        force_output=info.get(_force))
    info_out = deepcopy(info)
    info_out[_post] = info_post
    # Updated with input info and extended (full) add info
    info_out.update(info_in)
    info_out[_post]["add"] = add
    dummy_model_out = DummyModel(out[_params],
                                 out[_likelihood],
                                 info_prior=out[_prior])
    if recompute_theory:
        theory = list(info_theory_out.keys())[0]
        if _input_params not in info_theory_out[theory]:
            log.error(
                "You appear to be post-processing a chain generated with an older "
                "version of Cobaya. For post-processing to work, please edit the "
                "'[root]__full.info' file of the original chain to add, inside the "
                "theory code block, the list of its input parameters. E.g.\n----\n"
                "theory:\n  %s:\n    input_params: [param1, param2, ...]\n"
                "----\nIf you get strange errors later, it is likely that you did not "
                "specify the correct set of theory parameters.\n"
                "The full set of input parameters are %s.", theory,
                list(dummy_model_out.parameterization.input_params()))
            raise HandledException
    prior_add = Prior(dummy_model_out.parameterization, add.get(_prior))
    likelihood_add = Likelihood(add[_likelihood],
                                parameterization_like,
                                info_theory=info_theory_out,
                                modules=info.get(_path_install))
    # Remove auxiliary "one" before dumping -- 'add' *is* info_out[_post]["add"]
    add[_likelihood].pop("one")
    if likelihood_add.theory:
        # Make sure that theory.needs is called at least once, for adjustments
        likelihood_add.theory.needs()
    collection_out = Collection(dummy_model_out, output_out, name="1")
    output_out.dump_info({}, info_out)
    # 4. Main loop!
    log.info("Running post-processing...")
    last_percent = 0
    for i, point in enumerate(collection_in.data.itertuples()):
        log.debug("Point: %r", point)
        sampled = [
            getattr(point, param)
            for param in dummy_model_in.parameterization.sampled_params()
        ]
        derived = odict(
            [[param, getattr(point, param, None)]
             for param in dummy_model_out.parameterization.derived_params()])
        inputs = odict([[
            param,
            getattr(
                point, param,
                dummy_model_in.parameterization.constant_params().get(
                    param,
                    dummy_model_out.parameterization.constant_params().get(
                        param, None)))
        ] for param in dummy_model_out.parameterization.input_params()])
        # Solve inputs that depend on a function and were not saved
        # (we don't use the Parameterization_to_input method in case there are references
        #  to functions that cannot be loaded at the moment)
        for p, value in inputs.items():
            if value is None:
                func = dummy_model_out.parameterization._input_funcs[p]
                args = dummy_model_out.parameterization._input_args[p]
                inputs[p] = func(*[getattr(point, arg) for arg in args])
        # Add/remove priors
        priors_add = prior_add.logps(sampled)
        if not prior_recompute_1d:
            priors_add = priors_add[1:]
        logpriors_add = odict(zip(mlprior_names_add, priors_add))
        logpriors_new = [
            logpriors_add.get(name, -getattr(point, name, 0))
            for name in collection_out.minuslogprior_names
        ]
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug("New set of priors: %r",
                      dict(zip(dummy_model_out.prior, logpriors_new)))
        if -np.inf in logpriors_new:
            continue
        # Add/remove likelihoods
        output_like = []
        if likelihood_add:
            # Notice "one" (last in likelihood_add) is ignored: not in chi2_names
            loglikes_add = odict(
                zip(chi2_names_add,
                    likelihood_add.logps(inputs, _derived=output_like)))
            output_like = dict(zip(likelihood_add.output_params, output_like))
        else:
            loglikes_add = dict()
        loglikes_new = [
            loglikes_add.get(name, -0.5 * getattr(point, name, 0))
            for name in collection_out.chi2_names
        ]
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug("New set of likelihoods: %r",
                      dict(zip(dummy_model_out.likelihood, loglikes_new)))
            if output_like:
                log.debug("New set of likelihood-derived parameters: %r",
                          output_like)
        if -np.inf in loglikes_new:
            continue
        # Add/remove derived parameters and change priors of sampled parameters
        for p in add[_params]:
            if p in dummy_model_out.parameterization._directly_output:
                derived[p] = output_like[p]
            elif p in dummy_model_out.parameterization._derived_funcs:
                func = dummy_model_out.parameterization._derived_funcs[p]
                args = dummy_model_out.parameterization._derived_args[p]
                derived[p] = func(*[
                    getattr(point, arg, output_like.get(arg, None))
                    for arg in args
                ])
        if log.getEffectiveLevel() <= logging.DEBUG:
            log.debug(
                "New derived parameters: %r",
                dict([[
                    p, derived[p]
                ] for p in dummy_model_out.parameterization.derived_params()
                      if p in add[_params]]))
        # Save to the collection (keep old weight for now)
        collection_out.add(sampled,
                           derived=derived.values(),
                           weight=getattr(point, _weight),
                           logpriors=logpriors_new,
                           loglikes=loglikes_new)
        # Display progress
        percent = np.round(i / collection_in.n() * 100)
        if percent != last_percent and not percent % 5:
            last_percent = percent
            progress_bar(log, percent, " (%d/%d)" % (i, collection_in.n()))
    if not collection_out.data.last_valid_index():
        log.error(
            "No elements in the final sample. Possible causes: "
            "added a prior or likelihood valued zero over the full sampled domain, "
            "or the computation of the theory failed everywhere, etc.")
        raise HandledException
    # Reweight -- account for large dynamic range!
    #   Prefer to rescale +inf to finite, and ignore final points with -inf.
    #   Remove -inf's (0-weight), and correct indices
    difflogmax = max(collection_in[_minuslogpost] -
                     collection_out[_minuslogpost])
    collection_out.data[_weight] *= np.exp(collection_in[_minuslogpost] -
                                           collection_out[_minuslogpost] -
                                           difflogmax)
    collection_out.data = (
        collection_out.data[collection_out.data.weight > 0].reset_index(
            drop=True))
    collection_out._n = collection_out.data.last_valid_index() + 1
    # Write!
    collection_out._out_update()
    log.info("Finished! Final number of samples: %d", collection_out.n())
    return info_out, {"sample": collection_out}