def load_collections(self, model, skip=0, thin=1, concatenate=False, name=None, extension=None): """ Loads all collection files found which are compatible with this `Output` instance, including their path in their name. Use `name` for particular types of collections (default: any number). Pass `False` to mean there is nothing between the output prefix and the extension. """ filenames = self.find_collections(name=name, extension=extension) collections = [ Collection(model, self, name="%d" % (1 + i), file_name=filename, load=True, onload_skip=skip, onload_thin=thin) for i, filename in enumerate(filenames) ] if concatenate and collections: collection = collections[0] for collection_i in collections[1:]: collection.append(collection_i) collections = collection return collections
def initialise(self): """ Creates a 1-point collection to store the point at which the posterior is evaluated. """ self.one_point = Collection(self.parametrization, self.likelihood, self.output, initial_size=1, name="1") self.log.info("Initialised!")
def initialize(self): """ Creates a 1-point collection to store the point at which the posterior is evaluated. """ self.one_point = Collection(self.model, self.output, initial_size=1, name="1") self.log.info("Initialized!")
def initialize(self): self.log.info("Initializing.") # Example for setting default options print("+++ For option '%s' got value '%s'. Add more options in fisher.yaml." % ( "example_option", self.example_option)) # Prepare the list of "samples" (posterior evaluations) self.collection = Collection(self.model, self.output) # Prepare vectors to store derivatives # ... up to you # Prepare the matrix self.fisher_matrix = np.eye(self.model.prior.d())
def load_collections(self, model, skip=0, thin=1, concatenate=False): filenames = self.find_collections() collections = [ Collection(model, self, name="%d" % (1 + i), file_name=filename, load=True, onload_skip=skip, onload_thin=thin) for i, filename in enumerate(filenames)] if concatenate and collections: collection = collections[0] for collection_i in collections[1:]: collection._append(collection_i) collections = collection return collections
def initialize(self): """ Creates a 1-point collection to store the point at which the posterior is evaluated. """ try: self.N = int(self.N) except: raise LoggedError( self.log, "Could not convert the number of samples to an integer: %r", self.N) self.one_point = Collection( self.model, self.output, initial_size=self.N, name="1") self.log.info("Initialized!")
def save_sample(self, fname, name): sample = np.atleast_2d(np.loadtxt(fname)) if not sample.size: return None collection = Collection(self.model, self.output, name=str(name)) for row in sample: collection.add( row[2:2 + self.n_sampled], derived=row[2 + self.n_sampled:2 + self.n_sampled + self.n_derived + 1], weight=row[0], logpost=-row[1], logpriors=row[-(self.n_priors + self.n_likes):-self.n_likes], loglikes=row[-self.n_likes:]) # make sure that the points are written collection._out_update() return collection
def save_sample(self, fname, name): sample = np.atleast_2d(np.loadtxt(fname)) collection = Collection(self.parametrization, self.likelihood, self.output, name=str(name)) for row in sample: collection.add(row[2:2 + self.n_sampled], derived=row[2 + self.n_sampled:2 + self.n_sampled + self.n_derived + 1], weight=row[0], logpost=-row[1], logprior=row[-(1 + self.n_liks)], logliks=row[-self.n_liks:]) # make sure that the points are written collection.out_update() return collection
def initialize(self): """Imports the PolyChord sampler and prepares its arguments.""" # Allow global import if no direct path specification allow_global = not self.path if not self.path and self.packages_path: self.path = self.get_path(self.packages_path) self.pc = self.is_installed(path=self.path, allow_global=allow_global) if not self.pc: raise NotInstalledError( self.log, "Could not find PolyChord. Check error message above. " "To install it, run 'cobaya-install polychord --%s " "[packages_path]'", _packages_path_arg) # Prepare arguments and settings from pypolychord.settings import PolyChordSettings self.n_sampled = len(self.model.parameterization.sampled_params()) self.n_derived = len(self.model.parameterization.derived_params()) self.n_priors = len(self.model.prior) self.n_likes = len(self.model.likelihood) self.nDims = self.model.prior.d() self.nDerived = (self.n_derived + self.n_priors + self.n_likes) if self.logzero is None: self.logzero = np.nan_to_num(-np.inf) if self.max_ndead == np.inf: self.max_ndead = -1 self._quants_d_units = ["nlive", "max_ndead"] for p in self._quants_d_units: if getattr(self, p) is not None: setattr( self, p, NumberWithUnits(getattr(self, p), "d", scale=self.nDims, dtype=int).value) self._quants_nlive_units = ["nprior"] for p in self._quants_nlive_units: if getattr(self, p) is not None: setattr( self, p, NumberWithUnits(getattr(self, p), "nlive", scale=self.nlive, dtype=int).value) # Fill the automatic ones if getattr(self, "feedback", None) is None: values = { logging.CRITICAL: 0, logging.ERROR: 0, logging.WARNING: 0, logging.INFO: 1, logging.DEBUG: 2 } self.feedback = values[self.log.getEffectiveLevel()] # Prepare output folders and prefixes if self.output: self.file_root = self.output.prefix self.read_resume = self.output.is_resuming() else: output_prefix = share_mpi( hex(int(random() * 16**6))[2:] if is_main_process() else None) self.file_root = output_prefix # dummy output -- no resume! self.read_resume = False self.base_dir = self.get_base_dir(self.output) self.raw_clusters_dir = os.path.join(self.base_dir, self._clusters_dir) self.output.create_folder(self.base_dir) if self.do_clustering: self.clusters_folder = self.get_clusters_dir(self.output) self.output.create_folder(self.clusters_folder) self.mpi_info("Storing raw PolyChord output in '%s'.", self.base_dir) # Exploiting the speed hierarchy if self.blocking: blocks, oversampling_factors = self.model.check_blocking( self.blocking) else: if self.measure_speeds: self.model.measure_and_set_speeds(n=self.measure_speeds) blocks, oversampling_factors = self.model.get_param_blocking_for_sampler( oversample_power=self.oversample_power) self.mpi_info("Parameter blocks and their oversampling factors:") max_width = len(str(max(oversampling_factors))) for f, b in zip(oversampling_factors, blocks): self.mpi_info("* %" + "%d" % max_width + "d : %r", f, b) # Save blocking in updated info, in case we want to resume self._updated_info["blocking"] = list(zip(oversampling_factors, blocks)) blocks_flat = list(chain(*blocks)) self.ordering = [ blocks_flat.index(p) for p in self.model.parameterization.sampled_params() ] self.grade_dims = [len(block) for block in blocks] # Steps per block # NB: num_repeats is ignored by PolyChord when int "grade_frac" given, # so needs to be applied by hand. # In num_repeats, `d` is interpreted as dimension of each block self.grade_frac = [ int(o * read_dnumber(self.num_repeats, dim_block)) for o, dim_block in zip(oversampling_factors, self.grade_dims) ] # Assign settings pc_args = [ "nlive", "num_repeats", "nprior", "do_clustering", "precision_criterion", "max_ndead", "boost_posterior", "feedback", "logzero", "posteriors", "equals", "compression_factor", "cluster_posteriors", "write_resume", "read_resume", "write_stats", "write_live", "write_dead", "base_dir", "grade_frac", "grade_dims", "feedback", "read_resume", "base_dir", "file_root", "grade_frac", "grade_dims" ] # As stated above, num_repeats is ignored, so let's not pass it pc_args.pop(pc_args.index("num_repeats")) self.pc_settings = PolyChordSettings( self.nDims, self.nDerived, seed=(self.seed if self.seed is not None else -1), **{ p: getattr(self, p) for p in pc_args if getattr(self, p) is not None }) # prior conversion from the hypercube bounds = self.model.prior.bounds( confidence_for_unbounded=self.confidence_for_unbounded) # Check if priors are bounded (nan's to inf) inf = np.where(np.isinf(bounds)) if len(inf[0]): params_names = self.model.parameterization.sampled_params() params = [params_names[i] for i in sorted(list(set(inf[0])))] raise LoggedError( self.log, "PolyChord needs bounded priors, but the parameter(s) '" "', '".join(params) + "' is(are) unbounded.") locs = bounds[:, 0] scales = bounds[:, 1] - bounds[:, 0] # This function re-scales the parameters AND puts them in the right order self.pc_prior = lambda x: (locs + np.array(x)[self.ordering] * scales ).tolist() # We will need the volume of the prior domain, since PolyChord divides by it self.logvolume = np.log(np.prod(scales)) # Prepare callback function if self.callback_function is not None: self.callback_function_callable = (get_external_function( self.callback_function)) self.last_point_callback = 0 # Prepare runtime live and dead points collections self.live = Collection(self.model, None, name="live", initial_size=self.pc_settings.nlive) self.dead = Collection(self.model, self.output, name="dead") # Done! if is_main_process(): self.log.debug("Calling PolyChord with arguments:") for p, v in inspect.getmembers(self.pc_settings, lambda a: not (callable(a))): if not p.startswith("_"): self.log.debug(" %s: %s", p, v) self.mpi_info("Initialized!")
def post(info, sample=None): logger_setup(info.get(_debug), info.get(_debug_file)) log = logging.getLogger(__name__.split(".")[-1]) # MARKED FOR DEPRECATION IN v3.0 # BEHAVIOUR TO BE REPLACED BY ERROR: check_deprecated_modules_path(info) # END OF DEPRECATION BLOCK try: info_post = info[_post] except KeyError: raise LoggedError(log, "No 'post' block given. Nothing to do!") if get_mpi_rank(): log.warning( "Post-processing is not yet MPI-aware. Doing nothing for rank > 1 processes.") return if info.get(_resume): log.warning("Resuming not implemented for post-processing. Re-starting.") # 1. Load existing sample output_in = get_output(output_prefix=info.get(_output_prefix)) if output_in: try: info_in = output_in.reload_updated_info() except FileNotFoundError: raise LoggedError(log, "Error loading input model: " "could not find input info at %s", output_in.file_updated) else: info_in = deepcopy_where_possible(info) dummy_model_in = DummyModel(info_in[_params], info_in[kinds.likelihood], info_in.get(_prior, None)) if output_in: if not output_in.find_collections(): raise LoggedError(log, "No samples found for the input model with prefix %s", os.path.join(output_in.folder, output_in.prefix)) collection_in = output_in.load_collections( dummy_model_in, skip=info_post.get("skip", 0), thin=info_post.get("thin", 1), concatenate=True) elif sample: if isinstance(sample, Collection): sample = [sample] collection_in = deepcopy(sample[0]) for s in sample[1:]: try: collection_in.append(s) except: raise LoggedError(log, "Failed to load some of the input samples.") else: raise LoggedError(log, "Not output from where to load from or input collections given.") log.info("Will process %d samples.", len(collection_in)) if len(collection_in) <= 1: raise LoggedError( log, "Not enough samples for post-processing. Try using a larger sample, " "or skipping or thinning less.") # 2. Compare old and new info: determine what to do add = info_post.get(_post_add, {}) or {} remove = info_post.get(_post_remove, {}) # Add a dummy 'one' likelihood, to absorb unused parameters if not add.get(kinds.likelihood): add[kinds.likelihood] = {} add[kinds.likelihood]["one"] = None # Expand the "add" info add = update_info(add) # 2.1 Adding/removing derived parameters and changes in priors of sampled parameters out = {_params: deepcopy_where_possible(info_in[_params])} for p in remove.get(_params, {}): pinfo = info_in[_params].get(p) if pinfo is None or not is_derived_param(pinfo): raise LoggedError( log, "You tried to remove parameter '%s', which is not a derived parameter. " "Only derived parameters can be removed during post-processing.", p) out[_params].pop(p) # Force recomputation of aggregated chi2 for p in list(out[_params]): if p.startswith(_get_chi2_name("")): out[_params].pop(p) mlprior_names_add = [] for p, pinfo in add.get(_params, {}).items(): pinfo_in = info_in[_params].get(p) if is_sampled_param(pinfo): if not is_sampled_param(pinfo_in): # No added sampled parameters (de-marginalisation not implemented) if pinfo_in is None: raise LoggedError( log, "You added a new sampled parameter %r (maybe accidentally " "by adding a new likelihood that depends on it). " "Adding new sampled parameters is not possible. Try fixing " "it to some value.", p) else: raise LoggedError( log, "You tried to change the prior of parameter '%s', " "but it was not a sampled parameter. " "To change that prior, you need to define as an external one.", p) if mlprior_names_add[:1] != _prior_1d_name: mlprior_names_add = ([_minuslogprior + _separator + _prior_1d_name] + mlprior_names_add) elif is_derived_param(pinfo): if p in out[_params]: raise LoggedError( log, "You tried to add derived parameter '%s', which is already " "present. To force its recomputation, 'remove' it too.", p) elif is_fixed_param(pinfo): # Only one possibility left "fixed" parameter that was not present before: # input of new likelihood, or just an argument for dynamical derived (dropped) if ((p in info_in[_params] and pinfo[partag.value] != (pinfo_in or {}).get(partag.value, None))): raise LoggedError( log, "You tried to add a fixed parameter '%s: %r' that was already present" " but had a different value or was not fixed. This is not allowed. " "The old info of the parameter was '%s: %r'", p, dict(pinfo), p, dict(pinfo_in)) else: raise LoggedError(log, "This should not happen. Contact the developers.") out[_params][p] = pinfo # For the likelihood only, turn the rest of *derived* parameters into constants, # so that the likelihoods do not try to compute them) # But be careful to exclude *input* params that have a "derived: True" value # (which in "updated info" turns into "derived: 'lambda [x]: [x]'") out_params_like = deepcopy_where_possible(out[_params]) for p, pinfo in out_params_like.items(): if ((is_derived_param(pinfo) and not (partag.value in pinfo) and p not in add.get(_params, {}))): out_params_like[p] = {partag.value: np.nan, partag.drop: True} # 2.2 Manage adding/removing priors and likelihoods warn_remove = False for level in [_prior, kinds.likelihood]: out[level] = getattr(dummy_model_in, level) if level == _prior: out[level].remove(_prior_1d_name) for pdf in info_post.get(_post_remove, {}).get(level, []) or []: try: out[level].remove(pdf) warn_remove = True except ValueError: raise LoggedError( log, "Trying to remove %s '%s', but it is not present. " "Existing ones: %r", level, pdf, out[level]) if warn_remove: log.warning("You are removing a prior or likelihood pdf. " "Notice that if the resulting posterior is much wider " "than the original one, or displaced enough, " "it is probably safer to explore it directly.") if _prior in add: mlprior_names_add += [_minuslogprior + _separator + name for name in add[_prior]] out[_prior] += list(add[_prior]) prior_recompute_1d = ( mlprior_names_add[:1] == [_minuslogprior + _separator + _prior_1d_name]) # Don't initialise the theory code if not adding/recomputing theory, # theory-derived params or likelihoods recompute_theory = info_in.get(kinds.theory) and not ( list(add[kinds.likelihood]) == ["one"] and not any(is_derived_param(pinfo) for pinfo in add.get(_params, {}).values())) if recompute_theory: # Inherit from the original chain (needs input|output_params, renames, etc add_theory = add.get(kinds.theory) if add_theory: info_theory_out = {} if len(add_theory) > 1: log.warning('Importance sampling with more than one theory is ' 'not really tested') add_theory = add_theory.copy() for theory, theory_info in info_in[kinds.theory].items(): theory_copy = deepcopy_where_possible(theory_info) if theory in add_theory: info_theory_out[theory] = \ recursive_update(theory_copy, add_theory.pop(theory)) else: info_theory_out[theory] = theory_copy info_theory_out.update(add_theory) else: info_theory_out = deepcopy_where_possible(info_in[kinds.theory]) else: info_theory_out = None chi2_names_add = [ _get_chi2_name(name) for name in add[kinds.likelihood] if name != "one"] out[kinds.likelihood] += [l for l in add[kinds.likelihood] if l != "one"] if recompute_theory: log.warning("You are recomputing the theory, but in the current version this does" " not force recomputation of any likelihood or derived parameter, " "unless explicitly removed+added.") for level in [_prior, kinds.likelihood]: for i, x_i in enumerate(out[level]): if x_i in list(out[level])[i + 1:]: raise LoggedError( log, "You have added %s '%s', which was already present. If you " "want to force its recomputation, you must also 'remove' it.", level, x_i) # 3. Create output collection if _post_suffix not in info_post: raise LoggedError(log, "You need to provide a '%s' for your chains.", _post_suffix) # Use default prefix if it exists. If it does not, produce no output by default. # {post: {output: None}} suppresses output, and if it's a string, updates it. out_prefix = info_post.get(_output_prefix, info.get(_output_prefix)) if out_prefix not in [None, False]: out_prefix += _separator_files + _post + _separator_files + info_post[ _post_suffix] output_out = get_output(output_prefix=out_prefix, force=info.get(_force)) if output_out and not output_out.force and output_out.find_collections(): raise LoggedError(log, "Found existing post-processing output with prefix %r. " "Delete it manually or re-run with `force: True` " "(or `-f`, `--force` from the shell).", out_prefix) elif output_out and output_out.force: output_out.delete_infos() for regexp in output_out.find_collections(): output_out.delete_with_regexp(re.compile(regexp)) info_out = deepcopy_where_possible(info) info_out[_post] = info_post # Updated with input info and extended (updated) add info info_out.update(info_in) info_out[_post][_post_add] = add dummy_model_out = DummyModel(out[_params], out[kinds.likelihood], info_prior=out[_prior]) if recompute_theory: # TODO: May need updating for more than one, or maybe can be removed theory = list(info_theory_out)[0] if _input_params not in info_theory_out[theory]: raise LoggedError( log, "You appear to be post-processing a chain generated with an older " "version of Cobaya. For post-processing to work, please edit the " "'[root].updated.yaml' file of the original chain to add, inside the " "theory code block, the list of its input parameters. E.g.\n----\n" "theory:\n %s:\n input_params: [param1, param2, ...]\n" "----\nIf you get strange errors later, it is likely that you did not " "specify the correct set of theory parameters.\n" "The full set of input parameters are %s.", theory, list(dummy_model_out.parameterization.input_params())) # TODO: check allow_renames=False? # TODO: May well be simplifications here, this is v close to pre-refactor logic # Have not gone through or understood all the parameterization stuff model_add = Model(out_params_like, add[kinds.likelihood], info_prior=add.get(_prior), info_theory=info_theory_out, packages_path=info.get(_packages_path), allow_renames=False, post=True, prior_parameterization=dummy_model_out.parameterization) # Remove auxiliary "one" before dumping -- 'add' *is* info_out[_post][_post_add] add[kinds.likelihood].pop("one") collection_out = Collection(dummy_model_out, output_out, name="1") output_out.check_and_dump_info(None, info_out, check_compatible=False) # Prepare recomputation of aggregated chi2 # (they need to be recomputed by hand, because its autocomputation won't pick up # old likelihoods for a given type) all_types = { like: str_to_list(add[kinds.likelihood].get( like, info_in[kinds.likelihood].get(like)).get("type", []) or []) for like in out[kinds.likelihood]} types = set(chain(*list(all_types.values()))) inv_types = {t: [like for like, like_types in all_types.items() if t in like_types] for t in types} # 4. Main loop! log.info("Running post-processing...") last_percent = 0 for i, point in collection_in.data.iterrows(): log.debug("Point: %r", point) sampled = [point[param] for param in dummy_model_in.parameterization.sampled_params()] derived = {param: point.get(param, None) for param in dummy_model_out.parameterization.derived_params()} inputs = {param: point.get( param, dummy_model_in.parameterization.constant_params().get( param, dummy_model_out.parameterization.constant_params().get( param, None))) for param in dummy_model_out.parameterization.input_params()} # Solve inputs that depend on a function and were not saved # (we don't use the Parameterization_to_input method in case there are references # to functions that cannot be loaded at the moment) for p, value in inputs.items(): if value is None: func = dummy_model_out.parameterization._input_funcs[p] args = dummy_model_out.parameterization._input_args[p] inputs[p] = func(*[point.get(arg) for arg in args]) # Add/remove priors priors_add = model_add.prior.logps(sampled) if not prior_recompute_1d: priors_add = priors_add[1:] logpriors_add = dict(zip(mlprior_names_add, priors_add)) logpriors_new = [logpriors_add.get(name, - point.get(name, 0)) for name in collection_out.minuslogprior_names] if log.getEffectiveLevel() <= logging.DEBUG: log.debug( "New set of priors: %r", dict(zip(dummy_model_out.prior, logpriors_new))) if -np.inf in logpriors_new: continue # Add/remove likelihoods output_like = [] if add[kinds.likelihood]: # Notice "one" (last in likelihood_add) is ignored: not in chi2_names loglikes_add, output_like = model_add.logps(inputs, return_derived=True) loglikes_add = dict(zip(chi2_names_add, loglikes_add)) output_like = dict(zip(model_add.output_params, output_like)) else: loglikes_add = dict() loglikes_new = [loglikes_add.get(name, -0.5 * point.get(name, 0)) for name in collection_out.chi2_names] if log.getEffectiveLevel() <= logging.DEBUG: log.debug( "New set of likelihoods: %r", dict(zip(dummy_model_out.likelihood, loglikes_new))) if output_like: log.debug("New set of likelihood-derived parameters: %r", output_like) if -np.inf in loglikes_new: continue # Add/remove derived parameters and change priors of sampled parameters for p in add[_params]: if p in dummy_model_out.parameterization._directly_output: derived[p] = output_like[p] elif p in dummy_model_out.parameterization._derived_funcs: func = dummy_model_out.parameterization._derived_funcs[p] args = dummy_model_out.parameterization._derived_args[p] derived[p] = func( *[point.get(arg, output_like.get(arg, None)) for arg in args]) # We need to recompute the aggregated chi2 by hand for type_, likes in inv_types.items(): derived[_get_chi2_name(type_)] = sum( [-2 * lvalue for lname, lvalue in zip(collection_out.chi2_names, loglikes_new) if _undo_chi2_name(lname) in likes]) if log.getEffectiveLevel() <= logging.DEBUG: log.debug("New derived parameters: %r", dict([(p, derived[p]) for p in dummy_model_out.parameterization.derived_params() if p in add[_params]])) # Save to the collection (keep old weight for now) collection_out.add( sampled, derived=derived.values(), weight=point.get(_weight), logpriors=logpriors_new, loglikes=loglikes_new) # Display progress percent = np.round(i / len(collection_in) * 100) if percent != last_percent and not percent % 5: last_percent = percent progress_bar(log, percent, " (%d/%d)" % (i, len(collection_in))) if not collection_out.data.last_valid_index(): raise LoggedError( log, "No elements in the final sample. Possible causes: " "added a prior or likelihood valued zero over the full sampled domain, " "or the computation of the theory failed everywhere, etc.") # Reweight -- account for large dynamic range! # Prefer to rescale +inf to finite, and ignore final points with -inf. # Remove -inf's (0-weight), and correct indices difflogmax = max(collection_in[_minuslogpost] - collection_out[_minuslogpost]) collection_out.data[_weight] *= np.exp( collection_in[_minuslogpost] - collection_out[_minuslogpost] - difflogmax) collection_out.data = ( collection_out.data[collection_out.data.weight > 0].reset_index(drop=True)) collection_out._n = collection_out.data.last_valid_index() + 1 # Write! collection_out.out_update() log.info("Finished! Final number of samples: %d", len(collection_out)) return info_out, {"sample": collection_out}
def initialize(self): self.mpi_info("Initializing") self.max_evals = read_dnumber(self.max_evals, self.model.prior.d()) # Configure target method = self.model.loglike if self.ignore_prior else self.model.logpost kwargs = {"make_finite": True} if self.ignore_prior: kwargs["return_derived"] = False self.logp = lambda x: method(x, **kwargs) # Try to load info from previous samples. # If none, sample from reference (make sure that it has finite like/post) initial_point = None if self.output: files = self.output.find_collections() collection_in = None if files: if more_than_one_process(): if 1 + get_mpi_rank() <= len(files): collection_in = Collection(self.model, self.output, name=str(1 + get_mpi_rank()), resuming=True) else: collection_in = self.output.load_collections( self.model, concatenate=True) if collection_in: initial_point = (collection_in.bestfit() if self.ignore_prior else collection_in.MAP()) initial_point = initial_point[list( self.model.parameterization.sampled_params())].values self.log.info("Starting from %s of previous chain:", "best fit" if self.ignore_prior else "MAP") if initial_point is None: this_logp = -np.inf while not np.isfinite(this_logp): initial_point = self.model.prior.reference() this_logp = self.logp(initial_point) self.log.info("Starting from random initial point:") self.log.info( dict( zip(self.model.parameterization.sampled_params(), initial_point))) self._bounds = self.model.prior.bounds( confidence_for_unbounded=self.confidence_for_unbounded) # TODO: if ignore_prior, one should use *like* covariance (this is *post*) covmat = self._load_covmat(self.output)[0] # scale by conditional parameter widths (since not using correlation structure) scales = np.minimum(1 / np.sqrt(np.diag(np.linalg.inv(covmat))), (self._bounds[:, 1] - self._bounds[:, 0]) / 3) # Cov and affine transformation # Transform to space where initial point is at centre, and cov is normalised # Cannot do rotation, as supported minimization routines assume bounds aligned # with the parameter axes. self._affine_transform_matrix = np.diag(1 / scales) self._inv_affine_transform_matrix = np.diag(scales) self._scales = scales self._affine_transform_baseline = initial_point initial_point = self.affine_transform(initial_point) np.testing.assert_allclose(initial_point, np.zeros(initial_point.shape)) bounds = np.array( [self.affine_transform(self._bounds[:, i]) for i in range(2)]).T # Configure method if self.method.lower() == "bobyqa": self.minimizer = pybobyqa.solve self.kwargs = { "objfun": (lambda x: -self.logp_transf(x)), "x0": initial_point, "bounds": np.array(list(zip(*bounds))), "seek_global_minimum": (True if get_mpi_size() in [0, 1] else False), "maxfun": int(self.max_evals) } self.kwargs = recursive_update(deepcopy(self.kwargs), self.override_bobyqa or {}) self.log.debug( "Arguments for pybobyqa.solve:\n%r", {k: v for k, v in self.kwargs.items() if k != "objfun"}) elif self.method.lower() == "scipy": self.minimizer = scpminimize self.kwargs = { "fun": (lambda x: -self.logp_transf(x)), "x0": initial_point, "bounds": bounds, "options": { "maxiter": self.max_evals, "disp": (self.log.getEffectiveLevel() == logging.DEBUG) } } self.kwargs = recursive_update(deepcopy(self.kwargs), self.override_scipy or {}) self.log.debug( "Arguments for scipy.optimize.minimize:\n%r", {k: v for k, v in self.kwargs.items() if k != "fun"}) else: methods = ["bobyqa", "scipy"] raise LoggedError(self.log, "Method '%s' not recognized. Try one of %r.", self.method, methods)
def initialize(self): """Initializes the sampler: creates the proposal distribution and draws the initial sample.""" self.log.debug("Initializing") for p in [ "burn_in", "max_tries", "output_every", "check_every", "callback_every" ]: setattr( self, p, read_dnumber(getattr(self, p), self.model.prior.d(), dtype=int)) if self.callback_every is None: self.callback_every = self.check_every # Burning-in countdown -- the +1 accounts for the initial point (always accepted) self.burn_in_left = self.burn_in + 1 # Max # checkpoints to wait, in case one process dies without sending MPI_ABORT self.been_waiting = 0 self.max_waiting = max(50, self.max_tries / self.model.prior.d()) if self.resuming and (max(self.mpi_size or 0, 1) != max( get_mpi_size(), 1)): self.log.error( "Cannot resume a sample with a different number of chains: " "was %d and now is %d.", max(self.mpi_size, 1), max(get_mpi_size(), 1)) raise HandledException if not self.resuming and self.output: # Delete previous files (if not "forced", the run would have already failed) if ((os.path.abspath(self.covmat_filename()) != os.path.abspath( str(self.covmat)))): try: os.remove(self.covmat_filename()) except OSError: pass # There may be more that chains than expected, # if #ranks was bigger in a previous run i = 0 while True: i += 1 collection_filename, _ = self.output.prepare_collection(str(i)) try: os.remove(collection_filename) except OSError: break # One collection per MPI process: `name` is the MPI rank + 1 name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank())) self.collection = Collection(self.model, self.output, name=name, resuming=self.resuming) self.current_point = OnePoint(self.model, OutputDummy({}), name=name) # Use standard MH steps by default self.get_new_sample = self.get_new_sample_metropolis # Prepare oversampling / dragging if applicable self.effective_max_samples = self.max_samples if self.oversample and self.drag: self.log.error("Choose either oversampling or dragging, not both.") raise HandledException if self.oversample: factors, blocks = self.model.likelihood._speeds_of_params( int_speeds=True) self.oversampling_factors = factors self.log.info("Oversampling with factors:\n" + "\n".join([ " %d : %r" % (f, b) for f, b in zip(self.oversampling_factors, blocks) ])) self.i_last_slow_block = None # No way right now to separate slow and fast slow_params = list(self.model.parameterization.sampled_params()) elif self.drag: speeds, blocks = self.model.likelihood._speeds_of_params( fast_slow=True, int_speeds=True) # For now, no blocking inside either fast or slow: just 2 blocks self.i_last_slow_block = 0 if np.all(speeds == speeds[0]): self.log.error( "All speeds are equal or too similar: cannot drag! " "Make sure to define accurate likelihoods' speeds.") raise HandledException # Make the 1st factor 1: speeds = [1, speeds[1] / speeds[0]] # Target: dragging step taking as long as slow step self.drag_interp_steps = self.drag * speeds[1] # Per dragging step, the (fast) posterior is evaluated *twice*, self.drag_interp_steps /= 2 self.drag_interp_steps = int(np.round(self.drag_interp_steps)) fast_params = list(chain(*blocks[1 + self.i_last_slow_block:])) # Not too much or too little dragging drag_limits = [(int(l) * len(fast_params) if l is not None else l) for l in self.drag_limits] if drag_limits[ 0] is not None and self.drag_interp_steps < drag_limits[0]: self.log.warning( "Number of dragging steps clipped from below: was not " "enough to efficiently explore the fast directions -- " "avoid this limit by decreasing 'drag_limits[0]'.") self.drag_interp_steps = drag_limits[0] if drag_limits[ 1] is not None and self.drag_interp_steps > drag_limits[1]: self.log.warning( "Number of dragging steps clipped from above: " "excessive, probably inefficient, exploration of the " "fast directions -- " "avoid this limit by increasing 'drag_limits[1]'.") self.drag_interp_steps = drag_limits[1] # Re-scale steps between checkpoint and callback to the slow dimensions only slow_params = list(chain(*blocks[:1 + self.i_last_slow_block])) self.n_slow = len(slow_params) for p in ["check_every", "callback_every"]: setattr( self, p, int(getattr(self, p) * self.n_slow / self.model.prior.d())) self.log.info("Dragging with oversampling per step:\n" + "\n".join([ " %d : %r" % (f, b) for f, b in zip([1, self.drag_interp_steps], [blocks[0], fast_params]) ])) self.get_new_sample = self.get_new_sample_dragging else: _, blocks = self.model.likelihood._speeds_of_params() self.oversampling_factors = [1 for b in blocks] slow_params = list(self.model.parameterization.sampled_params()) self.n_slow = len(slow_params) # Turn parameter names into indices self.blocks = [[ list(self.model.parameterization.sampled_params()).index(p) for p in b ] for b in blocks] self.proposer = BlockedProposer( self.blocks, oversampling_factors=self.oversampling_factors, i_last_slow_block=self.i_last_slow_block, proposal_scale=self.proposal_scale) # Build the initial covariance matrix of the proposal, or load from checkpoint if self.resuming: covmat = np.loadtxt(self.covmat_filename()) self.log.info("Covariance matrix from checkpoint.") else: covmat = self.initial_proposal_covmat(slow_params=slow_params) self.log.info("Initial covariance matrix.") self.log.debug( "Sampling with covmat:\n%s", DataFrame( covmat, columns=self.model.parameterization.sampled_params(), index=self.model.parameterization.sampled_params()).to_string( line_width=_line_width)) self.proposer.set_covariance(covmat) # Prepare callback function if self.callback_function is not None: self.callback_function_callable = (get_external_function( self.callback_function))
def initialize(self): """Initializes the sampler: creates the proposal distribution and draws the initial sample.""" if not self.model.prior.d(): raise LoggedError(self.log, "No parameters being varied for sampler") self.log.debug("Initializing") # MARKED FOR DEPRECATION IN v3.0 if getattr(self, "oversample", None) is not None: self.log.warning( "*DEPRECATION*: `oversample` will be deprecated in the " "next version. Oversampling is now requested by setting " "`oversample_power` > 0.") # END OF DEPRECATION BLOCK # MARKED FOR DEPRECATION IN v3.0 if getattr(self, "check_every", None) is not None: self.log.warning( "*DEPRECATION*: `check_every` will be deprecated in the " "next version. Please use `learn_every` instead.") # BEHAVIOUR TO BE REPLACED BY ERROR: self.learn_every = getattr(self, "check_every") # END OF DEPRECATION BLOCK if self.callback_every is None: self.callback_every = self.learn_every self._quants_d_units = [] for q in ["max_tries", "learn_every", "callback_every", "burn_in"]: number = NumberWithUnits(getattr(self, q), "d", dtype=int) self._quants_d_units.append(number) setattr(self, q, number) self.output_every = NumberWithUnits(self.output_every, "s", dtype=int) if is_main_process(): if self.output.is_resuming() and (max(self.mpi_size or 0, 1) != max(get_mpi_size(), 1)): raise LoggedError( self.log, "Cannot resume a run with a different number of chains: " "was %d and now is %d.", max(self.mpi_size, 1), max(get_mpi_size(), 1)) if more_than_one_process(): if get_mpi().Get_version()[0] < 3: raise LoggedError( self.log, "MPI use requires MPI version 3.0 or " "higher to support IALLGATHER.") sync_processes() # One collection per MPI process: `name` is the MPI rank + 1 name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank())) self.collection = Collection(self.model, self.output, name=name, resuming=self.output.is_resuming()) self.current_point = OneSamplePoint(self.model) # Use standard MH steps by default self.get_new_sample = self.get_new_sample_metropolis # Prepare callback function if self.callback_function is not None: self.callback_function_callable = (get_external_function( self.callback_function)) # Useful for getting last points added inside callback function self.last_point_callback = 0 # Monitoring/restore progress if is_main_process(): cols = [ "N", "timestamp", "acceptance_rate", "Rminus1", "Rminus1_cl" ] self.progress = DataFrame(columns=cols) self.i_learn = 1 if self.output and not self.output.is_resuming(): with open(self.progress_filename(), "w", encoding="utf-8") as progress_file: progress_file.write("# " + " ".join(self.progress.columns) + "\n") # Get first point, to be discarded -- not possible to determine its weight # Still, we need to compute derived parameters, since, as the proposal "blocked", # we may be saving the initial state of some block. # NB: if resuming but nothing was written (burn-in not finished): re-start if self.output.is_resuming() and len(self.collection): initial_point = (self.collection[ self.collection.sampled_params].iloc[len(self.collection) - 1]).values.copy() logpost = -(self.collection[_minuslogpost].iloc[ len(self.collection) - 1].copy()) logpriors = -(self.collection[self.collection.minuslogprior_names]. iloc[len(self.collection) - 1].copy()) loglikes = -0.5 * (self.collection[self.collection.chi2_names]. iloc[len(self.collection) - 1].copy()) derived = (self.collection[self.collection.derived_params].iloc[ len(self.collection) - 1].values.copy()) else: # NB: max_tries adjusted to dim instead of #cycles (blocking not computed yet) self.max_tries.set_scale(self.model.prior.d()) self.log.info( "Getting initial point... (this may take a few seconds)") initial_point, logpost, logpriors, loglikes, derived = \ self.model.get_valid_point(max_tries=self.max_tries.value) # If resuming but no existing chain, assume failed run and ignore blocking # if speeds measurement requested if self.output.is_resuming() and not len(self.collection) \ and self.measure_speeds: self.blocking = None if self.measure_speeds and self.blocking: self.log.warning( "Parameter blocking manually fixed: speeds will not be measured." ) elif self.measure_speeds: n = None if self.measure_speeds is True else int( self.measure_speeds) self.model.measure_and_set_speeds(n=n, discard=0) self.set_proposer_blocking() self.set_proposer_covmat(load=True) self.current_point.add(initial_point, derived=derived, logpost=logpost, logpriors=logpriors, loglikes=loglikes) self.log.info("Initial point: %s", self.current_point) # Max #(learn+convergence checks) to wait, # in case one process dies without sending MPI_ABORT self.been_waiting = 0 self.max_waiting = max(50, self.max_tries.unit_value) # Burning-in countdown -- the +1 accounts for the initial point (always accepted) self.burn_in_left = self.burn_in.value * self.current_point.output_thin + 1 # Initial dummy checkpoint # (needed when 1st "learn point" not reached in prev. run) self.write_checkpoint()
def initialize(self): """Imports the PolyChord sampler and prepares its arguments.""" if am_single_or_primary_process( ): # rank = 0 (MPI master) or None (no MPI) self.log.info("Initializing") # If path not given, try using general path to modules if not self.path and self.path_install: self.path = get_path(self.path_install) if self.path: if am_single_or_primary_process(): self.log.info("Importing *local* PolyChord from " + self.path) if not os.path.exists(os.path.realpath(self.path)): raise LoggedError( self.log, "The given path does not exist. " "Try installing PolyChord with " "'cobaya-install polychord -m [modules_path]") pc_build_path = get_build_path(self.path) if not pc_build_path: raise LoggedError( self.log, "Either PolyChord is not in the given folder, " "'%s', or you have not compiled it.", self.path) # Inserting the previously found path into the list of import folders sys.path.insert(0, pc_build_path) else: self.log.info("Importing *global* PolyChord.") try: import pypolychord from pypolychord.settings import PolyChordSettings self.pc = pypolychord except ImportError: raise LoggedError( self.log, "Couldn't find the PolyChord python interface. " "Make sure that you have compiled it, and that you either\n" " (a) specify a path (you didn't) or\n" " (b) install the Python interface globally with\n" " '/path/to/PolyChord/python setup.py install --user'") # Prepare arguments and settings self.nDims = self.model.prior.d() self.nDerived = (len(self.model.parameterization.derived_params()) + len(self.model.prior) + len(self.model.likelihood._likelihoods)) if self.logzero is None: self.logzero = np.nan_to_num(-np.inf) if self.max_ndead == np.inf: self.max_ndead = -1 for p in ["nlive", "nprior", "max_ndead"]: setattr(self, p, read_dnumber(getattr(self, p), self.nDims, dtype=int)) # Fill the automatic ones if getattr(self, "feedback", None) is None: values = { logging.CRITICAL: 0, logging.ERROR: 0, logging.WARNING: 0, logging.INFO: 1, logging.DEBUG: 2 } self.feedback = values[self.log.getEffectiveLevel()] try: output_folder = getattr(self.output, "folder") output_prefix = getattr(self.output, "prefix") or "" self.read_resume = self.resuming except AttributeError: # dummy output -- no resume! self.read_resume = False from tempfile import gettempdir output_folder = gettempdir() if am_single_or_primary_process(): from random import random output_prefix = hex(int(random() * 16**6))[2:] else: output_prefix = None if more_than_one_process(): output_prefix = get_mpi_comm().bcast(output_prefix, root=0) self.base_dir = os.path.join(output_folder, self.base_dir) self.file_root = output_prefix if am_single_or_primary_process(): # Creating output folder, if it does not exist (just one process) if not os.path.exists(self.base_dir): os.makedirs(self.base_dir) # Idem, a clusters folder if needed -- notice that PolyChord's default # is "True", here "None", hence the funny condition below if self.do_clustering is not False: # None here means "default" try: os.makedirs(os.path.join(self.base_dir, clusters)) except OSError: # exists! pass self.log.info("Storing raw PolyChord output in '%s'.", self.base_dir) # Exploiting the speed hierarchy if self.blocking: speeds, blocks = self.model.likelihood._check_speeds_of_params( self.blocking) else: speeds, blocks = self.model.likelihood._speeds_of_params( int_speeds=True) blocks_flat = list(chain(*blocks)) self.ordering = [ blocks_flat.index(p) for p in self.model.parameterization.sampled_params() ] self.grade_dims = np.array([len(block) for block in blocks]) # bugfix: pypolychord's C interface for Fortran does not like int numpy types self.grade_dims = [int(x) for x in self.grade_dims] # Steps per block # NB: num_repeats is ignored by PolyChord when int "grade_frac" given, # so needs to be applied by hand. # Make sure that speeds are integer, and that the slowest is 1, # for a straightforward application of num_repeats speeds = relative_to_int(speeds, 1) # In num_repeats, `d` is interpreted as dimension of each block self.grade_frac = [ int(speed * read_dnumber(self.num_repeats, dim_block)) for speed, dim_block in zip(speeds, self.grade_dims) ] # Assign settings pc_args = [ "nlive", "num_repeats", "nprior", "do_clustering", "precision_criterion", "max_ndead", "boost_posterior", "feedback", "logzero", "posteriors", "equals", "compression_factor", "cluster_posteriors", "write_resume", "read_resume", "write_stats", "write_live", "write_dead", "base_dir", "grade_frac", "grade_dims", "feedback", "read_resume", "base_dir", "file_root", "grade_frac", "grade_dims" ] # As stated above, num_repeats is ignored, so let's not pass it pc_args.pop(pc_args.index("num_repeats")) self.pc_settings = PolyChordSettings( self.nDims, self.nDerived, seed=(self.seed if self.seed is not None else -1), **{ p: getattr(self, p) for p in pc_args if getattr(self, p) is not None }) # prior conversion from the hypercube bounds = self.model.prior.bounds( confidence_for_unbounded=self.confidence_for_unbounded) # Check if priors are bounded (nan's to inf) inf = np.where(np.isinf(bounds)) if len(inf[0]): params_names = self.model.parameterization.sampled_params() params = [params_names[i] for i in sorted(list(set(inf[0])))] raise LoggedError( self.log, "PolyChord needs bounded priors, but the parameter(s) '" "', '".join(params) + "' is(are) unbounded.") locs = bounds[:, 0] scales = bounds[:, 1] - bounds[:, 0] # This function re-scales the parameters AND puts them in the right order self.pc_prior = lambda x: (locs + np.array(x)[self.ordering] * scales ).tolist() # We will need the volume of the prior domain, since PolyChord divides by it self.logvolume = np.log(np.prod(scales)) # Prepare callback function if self.callback_function is not None: self.callback_function_callable = (get_external_function( self.callback_function)) self.last_point_callback = 0 # Prepare runtime live and dead points collections self.live = Collection(self.model, None, name="live", initial_size=self.pc_settings.nlive) self.dead = Collection(self.model, self.output, name="dead") self.n_sampled = len(self.model.parameterization.sampled_params()) self.n_derived = len(self.model.parameterization.derived_params()) self.n_priors = len(self.model.prior) self.n_likes = len(self.model.likelihood._likelihoods) # Done! if am_single_or_primary_process(): self.log.info("Calling PolyChord with arguments:") for p, v in inspect.getmembers(self.pc_settings, lambda a: not (callable(a))): if not p.startswith("_"): self.log.info(" %s: %s", p, v)
def initialise(self): """Initialises the sampler: creates the proposal distribution and draws the initial sample.""" self.log.info("Initializing") # Burning-in countdown -- the +1 accounts for the initial point (always accepted) self.burn_in_left = self.burn_in + 1 # One collection per MPI process: `name` is the MPI rank + 1 name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank())) self.collection = Collection(self.parametrization, self.likelihood, self.output, name=name) self.current_point = OnePoint(self.parametrization, self.likelihood, self.output, name=name) # Use the standard steps by default self.get_new_sample = self.get_new_sample_metropolis # Prepare oversampling / fast-dragging if applicable self.effective_max_samples = self.max_samples if self.oversample and self.drag: self.log.error( "Choose either oversampling or fast-dragging, not both.") raise HandledException # if (self.oversample or self.drag) and len(set(factors)) == 1: # self.log.error("All block speeds are similar: " # "no dragging or oversampling possible.") # raise HandledException if self.oversample: factors, blocks = self.likelihood.speeds_of_params( oversampling_factors=True) self.oversampling_factors = factors # WIP: actually, we would have to re-normalise to the dimension of the blocks. self.log.info("Oversampling with factors:\n" + "\n".join([ " %d : %r" % (f, b) for f, b in zip(self.oversampling_factors, blocks) ])) # WIP: useless until likelihoods have STATES! self.log.error("Sorry, oversampling is WIP") raise HandledException elif self.drag: # WIP: for now, can only separate between theory and likelihoods # until likelihoods have states if not self.likelihood.theory: self.log.error( "WIP: dragging disabled for now when no theory code present." ) raise HandledException # if self.max_speed_slow < min(speeds) or self.max_speed_slow >= max(speeds): # self.log.error("The maximum speed considered slow, `max_speed_slow`, must be " # "%g <= `max_speed_slow < %g, and is %g", # min(speeds), max(speeds), self.max_speed_slow) # raise HandledException speeds, blocks = self.likelihood.speeds_of_params(int_speeds=True, fast_slow=True) if np.all(speeds == speeds[0]): self.log.error( "All speeds are equal: cannot drag! Make sure to define, " "especially, the speed of the fastest likelihoods.") self.i_last_slow_block = 0 # just theory can be slow for now fast_params = list(chain(*blocks[1 + self.i_last_slow_block:])) self.n_slow = sum( len(blocks[i]) for i in range(1 + self.i_last_slow_block)) self.drag_interp_steps = int(self.drag * np.round(min(speeds[1:]) / speeds[0])) self.log.info("Dragging with oversampling per step:\n" + "\n".join([ " %d : %r" % (f, b) for f, b in zip([1, self.drag_interp_steps], [blocks[0], fast_params]) ])) self.get_new_sample = self.get_new_sample_dragging else: _, blocks = self.likelihood.speeds_of_params() self.oversampling_factors = [1 for b in blocks] self.n_slow = len(self.parametrization.sampled_params()) # Turn parameter names into indices blocks = [[ list(self.parametrization.sampled_params().keys()).index(p) for p in b ] for b in blocks] self.proposer = BlockedProposer( blocks, oversampling_factors=getattr(self, "oversampling_factors", None), i_last_slow_block=getattr(self, "i_last_slow_block", None), propose_scale=self.propose_scale) # Build the initial covariance matrix of the proposal covmat = self.initial_proposal_covmat() self.log.info("Sampling with covariance matrix:") self.log.info("%r", covmat) self.proposer.set_covariance(covmat) # Prepare callback function if self.callback_function is not None: self.callback_function_callable = (get_external_function( self.callback_function))
def post(info, sample=None): logger_setup(info.get(_debug), info.get(_debug_file)) log = logging.getLogger(__name__.split(".")[-1]) try: info_post = info[_post] except KeyError: log.error("No 'post' block given. Nothing to do!") raise HandledException if get_mpi_rank(): log.warning( "Post-processing is not yet MPI-able. Doing nothing for rank > 1 processes." ) return # 1. Load existing sample output_in = Output(output_prefix=info.get(_output_prefix), resume=True) info_in = load_input(output_in.file_full) if output_in else deepcopy(info) dummy_model_in = DummyModel(info_in[_params], info_in[_likelihood], info_in.get(_prior, None), info_in.get(_theory, None)) if output_in: i = 0 while True: try: collection = Collection(dummy_model_in, output_in, name="%d" % (1 + i), load=True, onload_skip=info_post.get("skip", 0), onload_thin=info_post.get("thin", 1)) if i == 0: collection_in = collection else: collection_in._append(collection) i += 1 except IOError: break elif sample: if isinstance(sample, Collection): sample = [sample] collection_in = deepcopy(sample[0]) for s in sample[1:]: try: collection_in._append(s) except: log.error("Failed to load some of the input samples.") raise HandledException i = len(sample) else: log.error( "Not output from where to load from or input collections given.") raise HandledException log.info("Loaded %d chain%s. Will process %d samples.", i, "s" if i - 1 else "", collection_in.n()) if collection_in.n() <= 1: log.error( "Not enough samples for post-processing. Try using a larger sample, " "or skipping or thinning less.") raise HandledException # 2. Compare old and new info: determine what to do add = info_post.get("add", {}) remove = info_post.get("remove", {}) # Add a dummy 'one' likelihood, to absorb unused parameters if not add.get(_likelihood): add[_likelihood] = odict() add[_likelihood].update({"one": None}) # Expand the "add" info add = get_full_info(add) # 2.1 Adding/removing derived parameters and changes in priors of sampled parameters out = {_params: deepcopy(info_in[_params])} for p in remove.get(_params, {}): pinfo = info_in[_params].get(p) if pinfo is None or not is_derived_param(pinfo): log.error( "You tried to remove parameter '%s', which is not a derived paramter. " "Only derived parameters can be removed during post-processing.", p) raise HandledException out[_params].pop(p) mlprior_names_add = [] for p, pinfo in add.get(_params, {}).items(): pinfo_in = info_in[_params].get(p) if is_sampled_param(pinfo): if not is_sampled_param(pinfo_in): # No added sampled parameters (de-marginalisation not implemented) if pinfo_in is None: log.error( "You added a new sampled parameter %r (maybe accidentaly " "by adding a new likelihood that depends on it). " "Adding new sampled parameters is not possible. Try fixing " "it to some value.", p) raise HandledException else: log.error( "You tried to change the prior of parameter '%s', " "but it was not a sampled parameter. " "To change that prior, you need to define as an external one.", p) raise HandledException if mlprior_names_add[:1] != _prior_1d_name: mlprior_names_add = ( [_minuslogprior + _separator + _prior_1d_name] + mlprior_names_add) elif is_derived_param(pinfo): if p in out[_params]: log.error( "You tried to add derived parameter '%s', which is already " "present. To force its recomputation, 'remove' it too.", p) raise HandledException elif is_fixed_param(pinfo): # Only one possibility left "fixed" parameter that was not present before: # input of new likelihood, or just an argument for dynamical derived (dropped) if ((p in info_in[_params] and pinfo[_p_value] != (pinfo_in or {}).get(_p_value, None))): log.error( "You tried to add a fixed parameter '%s: %r' that was already present" " but had a different value or was not fixed. This is not allowed. " "The old info of the parameter was '%s: %r'", p, dict(pinfo), p, dict(pinfo_in)) raise HandledException else: log.error("This should not happen. Contact the developers.") raise HandledException out[_params][p] = pinfo # For the likelihood only, turn the rest of *derived* parameters into constants, # so that the likelihoods do not try to compute them) # But be careful to exclude *input* params that have a "derived: True" value # (which in "full info" turns into "derived: 'lambda [x]: [x]'") out_params_like = deepcopy(out[_params]) for p, pinfo in out_params_like.items(): if ((is_derived_param(pinfo) and not (_p_value in pinfo) and p not in add.get(_params, {}))): out_params_like[p] = {_p_value: np.nan, _p_drop: True} parameterization_like = Parameterization(out_params_like, ignore_unused_sampled=True) # 2.2 Manage adding/removing priors and likelihoods warn_remove = False for level in [_prior, _likelihood]: out[level] = getattr(dummy_model_in, level) if level == _prior: out[level].remove(_prior_1d_name) for pdf in info_post.get("remove", {}).get(level, []) or []: try: out[level].remove(pdf) warn_remove = True except ValueError: log.error( "Trying to remove %s '%s', but it is not present. " "Existing ones: %r", level, pdf, out[level]) raise HandledException if warn_remove: log.warning("You are removing a prior or likelihood pdf. " "Notice that if the resulting posterior is much wider " "than the original one, or displaced enough, " "it is probably safer to explore it directly.") if _prior in add: mlprior_names_add += [ _minuslogprior + _separator + name for name in add[_prior] ] out[_prior] += list(add[_prior]) prior_recompute_1d = (mlprior_names_add[:1] == [ _minuslogprior + _separator + _prior_1d_name ]) # Don't initialise the theory code if not adding/recomputing theory, # theory-derived params or likelihoods recompute_theory = info_in.get(_theory) and not (list( add[_likelihood]) == ["one"] and not any([ is_derived_param(pinfo) for pinfo in add.get(_params, {}).values() ])) if recompute_theory: # Inherit from the original chain (needs input|output_params, renames, etc theory = list(info_in[_theory].keys())[0] info_theory_out = odict([[ theory, recursive_update(deepcopy(info_in[_theory][theory]), add.get(_theory, {theory: {}})[theory]) ]]) else: info_theory_out = None chi2_names_add = [ _chi2 + _separator + name for name in add[_likelihood] if name is not "one" ] out[_likelihood] += [l for l in add[_likelihood] if l is not "one"] if recompute_theory: log.warn( "You are recomputing the theory, but in the current version this does " "not force recomputation of any likelihood or derived parameter, " "unless explicitly removed+added.") for level in [_prior, _likelihood]: for i, x_i in enumerate(out[level]): if x_i in list(out[level])[i + 1:]: log.error( "You have added %s '%s', which was already present. If you " "want to force its recomputation, you must also 'remove' it.", level, x_i) raise HandledException # 3. Create output collection if "suffix" not in info_post: log.error("You need to provide a 'suffix' for your chains.") raise HandledException # Use default prefix if it exists. If it does not, produce no output by default. # {post: {output: None}} suppresses output, and if it's a string, updates it. out_prefix = info_post.get(_output_prefix, info.get(_output_prefix)) if out_prefix not in [None, False]: out_prefix += "_" + _post + "_" + info_post["suffix"] output_out = Output(output_prefix=out_prefix, force_output=info.get(_force)) info_out = deepcopy(info) info_out[_post] = info_post # Updated with input info and extended (full) add info info_out.update(info_in) info_out[_post]["add"] = add dummy_model_out = DummyModel(out[_params], out[_likelihood], info_prior=out[_prior]) if recompute_theory: theory = list(info_theory_out.keys())[0] if _input_params not in info_theory_out[theory]: log.error( "You appear to be post-processing a chain generated with an older " "version of Cobaya. For post-processing to work, please edit the " "'[root]__full.info' file of the original chain to add, inside the " "theory code block, the list of its input parameters. E.g.\n----\n" "theory:\n %s:\n input_params: [param1, param2, ...]\n" "----\nIf you get strange errors later, it is likely that you did not " "specify the correct set of theory parameters.\n" "The full set of input parameters are %s.", theory, list(dummy_model_out.parameterization.input_params())) raise HandledException prior_add = Prior(dummy_model_out.parameterization, add.get(_prior)) likelihood_add = Likelihood(add[_likelihood], parameterization_like, info_theory=info_theory_out, modules=info.get(_path_install)) # Remove auxiliary "one" before dumping -- 'add' *is* info_out[_post]["add"] add[_likelihood].pop("one") if likelihood_add.theory: # Make sure that theory.needs is called at least once, for adjustments likelihood_add.theory.needs() collection_out = Collection(dummy_model_out, output_out, name="1") output_out.dump_info({}, info_out) # 4. Main loop! log.info("Running post-processing...") last_percent = 0 for i, point in enumerate(collection_in.data.itertuples()): log.debug("Point: %r", point) sampled = [ getattr(point, param) for param in dummy_model_in.parameterization.sampled_params() ] derived = odict( [[param, getattr(point, param, None)] for param in dummy_model_out.parameterization.derived_params()]) inputs = odict([[ param, getattr( point, param, dummy_model_in.parameterization.constant_params().get( param, dummy_model_out.parameterization.constant_params().get( param, None))) ] for param in dummy_model_out.parameterization.input_params()]) # Solve inputs that depend on a function and were not saved # (we don't use the Parameterization_to_input method in case there are references # to functions that cannot be loaded at the moment) for p, value in inputs.items(): if value is None: func = dummy_model_out.parameterization._input_funcs[p] args = dummy_model_out.parameterization._input_args[p] inputs[p] = func(*[getattr(point, arg) for arg in args]) # Add/remove priors priors_add = prior_add.logps(sampled) if not prior_recompute_1d: priors_add = priors_add[1:] logpriors_add = odict(zip(mlprior_names_add, priors_add)) logpriors_new = [ logpriors_add.get(name, -getattr(point, name, 0)) for name in collection_out.minuslogprior_names ] if log.getEffectiveLevel() <= logging.DEBUG: log.debug("New set of priors: %r", dict(zip(dummy_model_out.prior, logpriors_new))) if -np.inf in logpriors_new: continue # Add/remove likelihoods output_like = [] if likelihood_add: # Notice "one" (last in likelihood_add) is ignored: not in chi2_names loglikes_add = odict( zip(chi2_names_add, likelihood_add.logps(inputs, _derived=output_like))) output_like = dict(zip(likelihood_add.output_params, output_like)) else: loglikes_add = dict() loglikes_new = [ loglikes_add.get(name, -0.5 * getattr(point, name, 0)) for name in collection_out.chi2_names ] if log.getEffectiveLevel() <= logging.DEBUG: log.debug("New set of likelihoods: %r", dict(zip(dummy_model_out.likelihood, loglikes_new))) if output_like: log.debug("New set of likelihood-derived parameters: %r", output_like) if -np.inf in loglikes_new: continue # Add/remove derived parameters and change priors of sampled parameters for p in add[_params]: if p in dummy_model_out.parameterization._directly_output: derived[p] = output_like[p] elif p in dummy_model_out.parameterization._derived_funcs: func = dummy_model_out.parameterization._derived_funcs[p] args = dummy_model_out.parameterization._derived_args[p] derived[p] = func(*[ getattr(point, arg, output_like.get(arg, None)) for arg in args ]) if log.getEffectiveLevel() <= logging.DEBUG: log.debug( "New derived parameters: %r", dict([[ p, derived[p] ] for p in dummy_model_out.parameterization.derived_params() if p in add[_params]])) # Save to the collection (keep old weight for now) collection_out.add(sampled, derived=derived.values(), weight=getattr(point, _weight), logpriors=logpriors_new, loglikes=loglikes_new) # Display progress percent = np.round(i / collection_in.n() * 100) if percent != last_percent and not percent % 5: last_percent = percent progress_bar(log, percent, " (%d/%d)" % (i, collection_in.n())) if not collection_out.data.last_valid_index(): log.error( "No elements in the final sample. Possible causes: " "added a prior or likelihood valued zero over the full sampled domain, " "or the computation of the theory failed everywhere, etc.") raise HandledException # Reweight -- account for large dynamic range! # Prefer to rescale +inf to finite, and ignore final points with -inf. # Remove -inf's (0-weight), and correct indices difflogmax = max(collection_in[_minuslogpost] - collection_out[_minuslogpost]) collection_out.data[_weight] *= np.exp(collection_in[_minuslogpost] - collection_out[_minuslogpost] - difflogmax) collection_out.data = ( collection_out.data[collection_out.data.weight > 0].reset_index( drop=True)) collection_out._n = collection_out.data.last_valid_index() + 1 # Write! collection_out._out_update() log.info("Finished! Final number of samples: %d", collection_out.n()) return info_out, {"sample": collection_out}