示例#1
0
def _create_sampler(model,
                    nprocs,
                    nchains=None,
                    betas=None,
                    swap_interval=1,
                    seed=None,
                    proposals=None,
                    set_start=True):
    """Creates a sampler."""
    if nchains is None:
        nchains = NCHAINS
    if betas is None:
        betas = BETAS
    ntemps = len(betas)
    if nprocs == 1:
        pool = None
    else:
        pool = multiprocessing.Pool(nprocs)
    sampler = ParallelTemperedSampler(model.params,
                                      model,
                                      nchains,
                                      betas=betas,
                                      seed=seed,
                                      swap_interval=swap_interval,
                                      proposals=proposals,
                                      pool=pool)
    if set_start:
        sampler.start_position = model.prior_rvs(size=nchains * ntemps,
                                                 shape=(ntemps, nchains))
    return sampler
示例#2
0
    def __init__(self, model, nchains, ntemps=None, betas=None,
                 proposals=None, default_proposal=None,
                 default_proposal_args=None, seed=None,
                 swap_interval=1,
                 checkpoint_interval=None, checkpoint_signal=None,
                 loglikelihood_function=None,
                 nprocesses=1, use_mpi=False):

        # create the betas if not provided
        if betas is None:
            betas = default_beta_ladder(len(model.variable_params),
                                        ntemps=ntemps)
        self.model = model
        # create a wrapper for calling the model
        model_call = _EpsieCallModel(model, loglikelihood_function)
        # Set up the pool
        if nprocesses > 1:
            # these are used to help paralleize over multiple cores / MPI
            models._global_instance = model_call
            model_call = models._call_global_model
        pool = choose_pool(mpi=use_mpi, processes=nprocesses)
        if pool is not None:
            pool.count = nprocesses
        # initialize the sampler
        self._sampler = ParallelTemperedSampler(
            model.sampling_params, model_call, nchains, betas=betas,
            swap_interval=swap_interval,
            proposals=proposals, default_proposal=default_proposal,
            default_proposal_args=default_proposal_args,
            seed=seed, pool=pool)
        # set other parameters
        self._nwalkers = nchains
        self._ntemps = ntemps
        self._checkpoint_interval = checkpoint_interval
        self._checkpoint_signal = checkpoint_signal
示例#3
0
class EpsieSampler(MultiTemperedSupport, BaseMCMC, BaseSampler):
    """Constructs an MCMC sampler using epsie's parallel-tempered sampler.

    Parameters
    ----------
    model : model
        A model from ``pycbc.inference.models``.
    nchains : int
        Number of chains to use in the sampler.
    ntemps : int, optional
        Number of temperatures to use in the sampler. A geometrically-spaced
        temperature ladder with the gievn number of levels will be constructed
        based on the number of parameters. If not provided, must provide
        ``betas``.
    betas : array, optional
        An array of inverse temperature values to be used in for the
        temperature ladder. If not provided, must provide ``ntemps``.
    proposals : list, optional
        List of proposals to use. Any parameters that do not have a proposal
        provided will use the ``default_propsal``. **Note:** proposals should
        be specified for the sampling parameters, not the
        variable parameters.
    default_proposal : an epsie.Proposal class, optional
        The default proposal to use for parameters not in ``proposals``.
        Default is :py:class:`epsie.proposals.Normal`.
    default_proposal_args : dict, optional
        Dictionary of arguments to pass to the default proposal.
    swap_interval : int, optional
        The number of iterations between temperature swaps. Default is 1.
    seed : int, optional
        Seed for epsie's random number generator. If None provided, will create
        one.
    checkpoint_interval : int, optional
        Specify the number of iterations to do between checkpoints. If not
        provided, no checkpointin will be done.
    checkpoint_signal : str, optional
        Set the signal to use when checkpointing. For example, 'USR2'.
    loglikelihood_function : str, optional
        Set the function to call from the model for the ``loglikelihood``.
        Default is ``loglikelihood``.
    nprocesses : int, optional
        The number of parallel processes to use. Default is 1
        (no paralleliztion).
    use_mpi : bool, optional
        Use MPI for parallelization. Default (False) will use python's
        multiprocessing.
    """
    name = "epsie"
    _io = EpsieFile
    burn_in_class = MultiTemperedMCMCBurnInTests

    def __init__(self,
                 model,
                 nchains,
                 ntemps=None,
                 betas=None,
                 proposals=None,
                 default_proposal=None,
                 default_proposal_args=None,
                 seed=None,
                 swap_interval=1,
                 checkpoint_interval=None,
                 checkpoint_signal=None,
                 loglikelihood_function=None,
                 nprocesses=1,
                 use_mpi=False):

        # create the betas if not provided
        if betas is None:
            betas = default_beta_ladder(len(model.variable_params),
                                        ntemps=ntemps)
        self.model = model
        # create a wrapper for calling the model
        model_call = _EpsieCallModel(model, loglikelihood_function)

        # these are used to help paralleize over multiple cores / MPI
        models._global_instance = model_call
        model_call = models._call_global_model

        # Set up the pool
        pool = choose_pool(mpi=use_mpi, processes=nprocesses)

        # initialize the sampler
        self._sampler = ParallelTemperedSampler(
            model.sampling_params,
            model_call,
            nchains,
            betas=betas,
            swap_interval=swap_interval,
            proposals=proposals,
            default_proposal=default_proposal,
            default_proposal_args=default_proposal_args,
            seed=seed,
            pool=pool)
        # set other parameters
        self.nchains = nchains
        self._ntemps = ntemps
        self._checkpoint_interval = checkpoint_interval
        self._checkpoint_signal = checkpoint_signal

    @property
    def io(self):
        return self._io

    @property
    def base_shape(self):
        return (
            self.ntemps,
            self.nchains,
        )

    @property
    def betas(self):
        """The inverse temperatures being used."""
        return self._sampler.betas

    @property
    def seed(self):
        """The seed used for epsie's random bit generator.

        This is not the same as the seed used for the prior distributions.
        """
        return self._sampler.seed

    @property
    def swap_interval(self):
        """Number of iterations between temperature swaps."""
        return self._sampler.swap_interval

    @staticmethod
    def compute_acf(filename, **kwargs):
        r"""Computes the autocorrelation function.

        Calls :py:func:`base_multitemper.compute_acf`; see that
        function for details.

        Parameters
        ----------
        filename : str
            Name of a samples file to compute ACFs for.
        \**kwargs :
            All other keyword arguments are passed to
            :py:func:`base_multitemper.compute_acf`.

        Returns
        -------
        dict :
            Dictionary of arrays giving the ACFs for each parameter. The arrays
            will have shape ``ntemps x nchains x niterations``.
        """
        return compute_acf(filename, **kwargs)

    @staticmethod
    def compute_acl(filename, **kwargs):
        r"""Computes the autocorrelation length.

        Calls :py:func:`base_multitemper.compute_acl`; see that
        function for details.

        Parameters
        -----------
        filename : str
            Name of a samples file to compute ACLs for.
        \**kwargs :
            All other keyword arguments are passed to
            :py:func:`base_multitemper.compute_acl`.

        Returns
        -------
        dict
            A dictionary of ntemps-long arrays of the ACLs of each parameter.
        """
        return compute_acl(filename, **kwargs)

    @property
    def acl(self):  # pylint: disable=invalid-overridden-method
        """The autocorrelation lengths of the chains.
        """
        return acl_from_raw_acls(self.raw_acls)

    @property
    def effective_nsamples(self):  # pylint: disable=invalid-overridden-method
        """The effective number of samples post burn-in that the sampler has
        acquired so far.
        """
        act = self.act
        if act is None:
            act = numpy.inf
        if self.burn_in is None:
            start_iter = 0
        else:
            start_iter = self.burn_in.burn_in_iteration
        nperchain = nsamples_in_chain(start_iter, act, self.niterations)
        if self.burn_in is not None:
            # ensure that any chain not burned in has zero samples
            nperchain[~self.burn_in.is_burned_in] = 0
            # and that any chain that is burned in has at least one sample
            nperchain[self.burn_in.is_burned_in & (nperchain < 1)] = 1
        return int(nperchain.sum())

    @property
    def samples(self):
        """A dict mapping ``variable_params`` to arrays of samples currently
        in memory.

        The arrays have shape ``ntemps x nchains x niterations``.

        The dictionary also contains sampling parameters.
        """
        samples = epsie.array2dict(self._sampler.positions)
        # apply boundary conditions
        samples = self.model.prior_distribution.apply_boundary_conditions(
            **samples)
        # apply transforms to go to model's variable params space
        if self.model.sampling_transforms is not None:
            samples = self.model.sampling_transforms.apply(samples,
                                                           inverse=True)
        return samples

    @property
    def model_stats(self):
        """A dict mapping the model's ``default_stats`` to arrays of values.

        The arrays have shape ``ntemps x nchains x niterations``.
        """
        return epsie.array2dict(self._sampler.blobs)

    def clear_samples(self):
        """Clears the chain and blobs from memory.
        """
        # store the iteration that the clear is occuring on
        self._lastclear = self.niterations
        self._itercounter = 0
        # now clear the sampler
        self._sampler.clear()

    def set_state_from_file(self, filename):
        """Sets the state of the sampler back to the instance saved in a file.
        """
        with self.io(filename, 'r') as fp:
            # get the numpy state
            numpy_rstate_group = '/'.join(
                [fp.sampler_group, 'numpy_random_state'])
            rstate = fp.read_random_state(group=numpy_rstate_group)
            # set the sampler state for epsie
            self._sampler.set_state_from_checkpoint(fp, path=fp.sampler_group)
        # set the global numpy random state for pycbc
        numpy.random.set_state(rstate)

    def set_p0(self, samples_file=None, prior=None):
        p0 = super(EpsieSampler, self).set_p0(samples_file=samples_file,
                                              prior=prior)
        self._sampler.start_position = p0

    @property
    def pos(self):
        """A dictionary of the current chain positions."""
        # we override BaseMCMC's pos property because this can be directly
        # retrieved from epsie
        return self._sampler.current_positions

    def run_mcmc(self, niterations):
        """Advance the chains for a number of iterations.

        Parameters
        ----------
        niterations : int
            Number of samples to get from sampler.
        """
        self._sampler.run(niterations)

    def write_results(self, filename):
        """Writes samples, model stats, acceptance ratios, and random state
        to the given file.

        Parameters
        -----------
        filename : str
            The file to write to. The file is opened using the ``io`` class
            in an an append state.
        """
        with self.io(filename, 'a') as fp:
            # write samples
            fp.write_samples(self.samples,
                             parameters=self.model.variable_params,
                             last_iteration=self.niterations)
            # write stats
            fp.write_samples(self.model_stats, last_iteration=self.niterations)
            # write accpetance ratio
            acceptance = self._sampler.acceptance
            fp.write_acceptance_ratio(acceptance['acceptance_ratio'],
                                      last_iteration=self.niterations)
            # write temperature data
            if self.ntemps > 1:
                temp_ar = self._sampler.temperature_acceptance
                temp_swaps = self._sampler.temperature_swaps
                fp.write_temperature_data(temp_swaps,
                                          temp_ar,
                                          self.swap_interval,
                                          last_iteration=self.niterations)
            # write numpy's global state (for the distributions)
            numpy_rstate_group = '/'.join(
                [fp.sampler_group, 'numpy_random_state'])
            fp.write_random_state(group=numpy_rstate_group)
            # write the sampler's state
            self._sampler.checkpoint(fp, path=fp.sampler_group)

    def finalize(self):
        pass

    @classmethod
    def from_config(cls,
                    cp,
                    model,
                    output_file=None,
                    nprocesses=1,
                    use_mpi=False):
        """Loads the sampler from the given config file.

        The following options are retrieved in the ``[sampler]`` section:

        * ``name`` :
            (required) must match the samlper's name
        * ``nchains`` :
            (required) the number of chains to use
        * ``ntemps`` :
            The number of temperatures to use. Either this, or
            ``inverse-temperatures-file`` must be provided (but not both).
        * ``inverse-temperatures-file`` :
            Path to an hdf file containing the inverse temperatures ("betas")
            to use. The betas will be retrieved from the file's
            ``.attrs['betas']``. Either this or ``ntemps`` must be provided
            (but not both).
        * ``niterations`` :
            The number of iterations to run the sampler for. Either this or
            ``effective-nsamples`` must be provided (but not both).
        * ``effective-nsamples`` :
            Run the sampler until the given number of effective samples are
            obtained. A ``checkpoint-interval`` must also be provided in this
            case. Either this or ``niterations`` must be provided (but not
            both).
        * ``thin-interval`` :
            Thin the samples by the given value before saving to disk. May
            provide this, or ``max-samples-per-chain``, but not both. If
            neither options are provided, will save all samples.
        * ``max-samples-per-chain`` :
            Thin the samples such that the number of samples per chain per
            temperature that are saved to disk never exceeds the given value.
            May provide this, or ``thin-interval``, but not both. If neither
            options are provided, will save all samples.
        * ``checkpoint-interval`` :
            Sets the checkpoint interval to use. Must be provided if using
            ``effective-nsamples``.
        * ``checkpoint-signal`` :
            Set the checkpoint signal, e.g., "USR2". Optional.
        * ``seed`` :
            The seed to use for epsie's random number generator. If not
            provided, epsie will create one.
        * ``logl-function`` :
            The attribute of the model to use for the loglikelihood. If
            not provided, will default to ``loglikelihood``.
        * ``swap-interval`` :
            The number of iterations between temperature swaps. Default is 1.

        Jump proposals must be provided for every sampling
        parameter. These are retrieved from subsections
        ``[jump_proposal-{params}]``, where params is a
        :py:const:`pycbc.VARARGS_DELIM` separated list of parameters the
        proposal should be used for. See
        :py:func:`inference.jump.epsie_proposals_from_config` for
        details.

        .. note::
            Jump proposals should be specified for **sampling parameters**,
            not **variable parameters**.

        Settings for burn-in tests are read from ``[sampler-burn_in]``. In
        particular, the ``burn-in-test`` option is used to set the burn in
        tests to perform. See
        :py:func:`MultiTemperedMCMCBurnInTests.from_config` for details. If no
        ``burn-in-test`` is provided, no burn in tests will be carried out.


        Parameters
        ----------
        cp : WorkflowConfigParser instance
            Config file object to parse.
        model : pycbc.inference.model.BaseModel instance
            The model to use.
        output_file : str, optional
            The name of the output file to checkpoint and write results to.
        nprocesses : int, optional
            The number of parallel processes to use. Default is 1.
        use_mpi : bool, optional
            Use MPI for parallelization. Default is False.

        Returns
        -------
        EpsiePTSampler :
            The sampler instance.
        """
        section = "sampler"
        # check name
        assert cp.get(
            section,
            "name") == cls.name, ("name in section [sampler] must match mine")
        nchains = int(cp.get(section, "nchains"))
        seed = get_optional_arg_from_config(cp, section, 'seed', dtype=int)
        ntemps, betas = cls.betas_from_config(cp, section)
        # get the swap interval
        swap_interval = get_optional_arg_from_config(cp,
                                                     section,
                                                     'swap-interval',
                                                     dtype=int)
        if swap_interval is None:
            swap_interval = 1
        # get the checkpoint interval, if it's specified
        checkpoint_interval = cls.checkpoint_from_config(cp, section)
        checkpoint_signal = cls.ckpt_signal_from_config(cp, section)
        # get the loglikelihood function
        logl = get_optional_arg_from_config(cp, section, 'logl-function')
        # get the proposals
        proposals = epsie_proposals_from_config(cp)
        # check that all of the sampling parameters have a specified
        # proposal
        sampling_params = set(model.sampling_params)
        proposal_params = set(param for prop in proposals
                              for param in prop.parameters)
        missing = sampling_params - proposal_params
        if missing:
            raise ValueError("Missing jump proposals for sampling parameters "
                             "{}".format(', '.join(missing)))
        # initialize
        obj = cls(model,
                  nchains,
                  ntemps=ntemps,
                  betas=betas,
                  proposals=proposals,
                  swap_interval=swap_interval,
                  seed=seed,
                  checkpoint_interval=checkpoint_interval,
                  checkpoint_signal=checkpoint_signal,
                  loglikelihood_function=logl,
                  nprocesses=nprocesses,
                  use_mpi=use_mpi)
        # set target
        obj.set_target_from_config(cp, section)
        # add burn-in if it's specified
        obj.set_burn_in_from_config(cp)
        # set prethin options
        obj.set_thin_interval_from_config(cp, section)
        # Set up the output file
        setup_output(obj, output_file)
        if obj.new_checkpoint:
            obj.set_start_from_config(cp)
        else:
            obj.resume_from_checkpoint()
        return obj