示例#1
0
    def _check_mod_set(self, mod_set, name):
        """
        Checks validity of provided set of model outputs `mod_set` in this
        :obj:`~ModelLink` instance.

        Parameters
        ----------
        mod_set : 1D or 2D array_like or dict
            Model output (set) to validate in this :obj:`~ModelLink` instance.
        name : str
            The name of the model output (set), which is used in the error
            message if the validation fails.

        Returns
        -------
        mod_set : 1D or 2D :obj:`~numpy.ndarray` object
            The provided `mod_set` if the validation was successful. If
            `mod_set` was a dict, it will be converted to a
            :obj:`~numpy.ndarray` object (sorted on :attr:`~data_idx`).

        """

        # Make logger
        logger = getCLogger('CHECK')
        logger.info("Validating provided set of model outputs %r." % (name))

        # If mod_set is a dict, try to convert it to a NumPy array
        if isinstance(mod_set, dict):
            try:
                mod_set = np_array([mod_set[idx] for idx in self._data_idx]).T
            except KeyError as error:
                err_msg = (
                    "Input argument %r is missing data identifier '%r'!" %
                    (name, error.args[0]))
                raise_error(err_msg, KeyError, logger)

        # Make sure that mod_set is a NumPy array
        mod_set = np_array(mod_set)

        # Raise error if mod_set is not 1D or 2D
        if not (mod_set.ndim == 1 or mod_set.ndim == 2):
            err_msg = ("Input argument %r is not one-dimensional or "
                       "two-dimensional!" % (name))
            raise_error(err_msg, ShapeError, logger)

        # Raise error if mod_set does not have n_data data values
        if not (mod_set.shape[-1] == self._n_data):
            err_msg = ("Input argument %r has incorrect number of data values "
                       "(%i != %i)!" % (name, mod_set.shape[-1], self._n_data))
            raise_error(err_msg, ShapeError, logger)

        # Check if mod_set solely consists out of floats
        mod_set = check_vals(mod_set, name, 'float')

        # Log again and return mod_set
        logger.info("Finished validating provided set of model outputs %r." %
                    (name))
        return (mod_set)
示例#2
0
    def advance_chain(sam_set):
        # Make sure that sam_set is 2D
        sam_set = np_array(sam_set, ndmin=2)

        # Check if sam_set is within parameter space and reject if not
        par_rng = pipe._modellink._par_rng
        accept = ((par_rng[:, 0] <= sam_set) *
                  (sam_set <= par_rng[:, 1])).all(1)

        # Evaluate all non-rejected samples and accept if plausible
        emul_i = pipe._emulator._emul_i
        accept[accept] = pipe._make_call('_evaluate_sam_set', emul_i,
                                         sam_set[accept], 'project')[0]

        # Return which samples should be accepted or rejected
        return (accept)
示例#3
0
    def _check_md_var(self, md_var, name):
        """
        Checks validity of provided set of model discrepancy variances `md_var`
        in this :obj:`~ModelLink` instance.

        Parameters
        ----------
        md_var : 1D or 2D array_like or dict
            Model discrepancy variance set to validate in this
            :obj:`~ModelLink` instance.
        name : str
            The name of the model discrepancy set, which is used in the error
            message if the validation fails.

        Returns
        -------
        md_var : 2D :obj:`~numpy.ndarray` object
            The (converted) provided `md_var` if the validation was successful.
            If `md_var` was a dict, it will be converted to a
            :obj:`~numpy.ndarray` object.

        """

        # Make logger
        logger = getCLogger('CHECK')
        logger.info("Validating provided set of model discrepancy variances "
                    "%r." % (name))

        # If md_var is a dict, convert it to a NumPy array
        if isinstance(md_var, dict):
            md_var = np_array([md_var[idx] for idx in md_var.keys()])

        # Make sure that md_var is a NumPy array
        md_var = np_array(md_var)

        # Raise error if md_var is not 1D or 2D
        if not (md_var.ndim == 1 or md_var.ndim == 2):
            err_msg = ("Input argument %r is not one-dimensional or "
                       "two-dimensional!" % (name))
            raise_error(err_msg, ShapeError, logger)

        # Check if md_var contains n_data values
        if not (md_var.shape[0] == self._n_data):
            err_msg = ("Received array of model discrepancy variances %r has "
                       "incorrect number of data points (%i != %i)!" %
                       (name, md_var.shape[0], self._n_data))
            raise ShapeError(err_msg)

        # Check if single or dual values were given
        if (md_var.ndim == 1):
            md_var = np_array([md_var] * 2).T
        elif (md_var.shape[1] == 2):
            pass
        else:
            err_msg = ("Received array of model discrepancy variances %r has "
                       "incorrect number of values (%i != 2)!" %
                       (name, md_var.shape[1]))
            raise ShapeError(err_msg)

        # Check if all values are non-negative floats
        md_var = check_vals(md_var, 'md_var', 'nneg', 'float')

        # Log again and return md_var
        logger.info("Finished validating provided set of model discrepancy "
                    "variances %r." % (name))
        return (md_var)
示例#4
0
    def _check_sam_set(self, sam_set, name):
        """
        Checks validity of provided set of model parameter samples `sam_set` in
        this :obj:`~ModelLink` instance.

        Parameters
        ----------
        sam_set : 1D or 2D array_like or dict
            Parameter/sample set to validate in this :obj:`~ModelLink`
            instance.
        name : str
            The name of the parameter/sample set, which is used in the error
            message if the validation fails.

        Returns
        -------
        sam_set : 1D or 2D :obj:`~numpy.ndarray` object
            The provided `sam_set` if the validation was successful. If
            `sam_set` was a dict, it will be converted to a
            :obj:`~numpy.ndarray` object.

        """

        # Make logger
        logger = getCLogger('CHECK')
        logger.info("Validating provided set of model parameter samples %r." %
                    (name))

        # If sam_set is a dict, convert it to a NumPy array
        if isinstance(sam_set, dict):
            sam_set = np_array(sdict(sam_set).values()).T

        # Make sure that sam_set is a NumPy array
        sam_set = np_array(sam_set)

        # Raise error if sam_set is not 1D or 2D
        if not (sam_set.ndim == 1 or sam_set.ndim == 2):
            err_msg = ("Input argument %r is not one-dimensional or "
                       "two-dimensional!" % (name))
            raise_error(err_msg, ShapeError, logger)

        # Raise error if sam_set does not have n_par parameter values
        if not (sam_set.shape[-1] == self._n_par):
            err_msg = ("Input argument %r has incorrect number of parameters "
                       "(%i != %i)!" % (name, sam_set.shape[-1], self._n_par))
            raise_error(err_msg, ShapeError, logger)

        # Check if sam_set solely consists out of floats
        sam_set = check_vals(sam_set, name, 'float')

        # Check if all samples are within parameter space
        sam_set_2D = np_array(sam_set, ndmin=2)
        rng = self._par_rng
        check = np.apply_along_axis(
            lambda x: ((rng[:, 0] <= x) * (x <= rng[:, 1])).all(), 1,
            sam_set_2D)

        # If check is not empty (can be indexed), raise error
        try:
            index = np.argwhere(~check)[0]
        except IndexError:
            pass
        else:
            err_msg = ("Input argument '%s%s' is outside parameter space!" %
                       (name, index if sam_set.ndim != 1 else ''))
            raise_error(err_msg, ValueError, logger)

        # Log again and return sam_set
        logger.info("Finished validating provided set of model parameter "
                    "samples %r." % (name))
        return (sam_set)
示例#5
0
def convert_data(model_data):
    """
    Converts the provided `model_data` into a full data dict, taking into
    account all formatting options, and returns it.

    This function can be used externally to check how the provided `model_data`
    would be interpreted when provided to the
    :class:`~prism.modellink.ModelLink` subclass. Its output can be used for
    the 'model_data' input argument.

    Parameters
    ----------
    model_data : array_like, dict or str
        Anything that can be converted to a dict that provides model data
        information.

    Returns
    -------
    data_dict : dict
        Dict with the provided `model_data` converted to its full format.

    """

    # If a data file is given
    if isinstance(model_data, str):
        # Obtain absolute path to given file
        data_file = path.abspath(model_data)

        # Read the data file in as a string
        data_points = np.genfromtxt(data_file, dtype=(str), delimiter=':',
                                    autostrip=True)

        # Make sure that data_points is 2D
        data_points = np_array(data_points, ndmin=2)

        # Convert read-in data to dict
        model_data = dict(data_points)

    # If a data dict is given
    elif isinstance(model_data, dict):
        model_data = dict(model_data)

    # If anything else is given
    else:
        # Check if it can be converted to a dict
        try:
            model_data = dict(model_data)
        except Exception:
            raise TypeError("Input model data cannot be converted to type "
                            "'dict'!")

    # Make empty data_dict
    data_dict = dict()

    # Loop over all items in model_data
    for key, value in model_data.items():
        # Convert key to an actual data_idx
        idx = e13.split_seq(key)

        # Check if tmp_idx is not empty
        if not idx:
            raise e13.InputError("Model data contains a data point with no "
                                 "identifier!")

        # Convert value to an actual data point
        data = e13.split_seq(value)

        # Check if provided data value is valid
        val = check_vals(data[0], 'data_val%s' % (idx), 'float')

        # Extract data error and space
        # If length is two, centered error and no data space were given
        if(len(data) == 2):
            err = [check_vals(data[1], 'data_err%s' % (idx), 'float', 'pos')]*2
            spc = 'lin'

        # If length is three, there are two possibilities
        elif(len(data) == 3):
            # If the third column contains a string, it is the data space
            if isinstance(data[2], str):
                err = [check_vals(data[1], 'data_err%s' % (idx),
                                  'float', 'pos')]*2
                spc = data[2]

            # If the third column contains no string, it is error interval
            else:
                err = check_vals(data[1:3], 'data_err%s' % (idx),
                                 'float', 'pos')
                spc = 'lin'

        # If length is four+, error interval and data space were given
        else:
            err = check_vals(data[1:3], 'data_err%s' % (idx), 'float', 'pos')
            spc = data[3]

        # Check if valid data space has been provided
        spc = str(spc).replace("'", '').replace('"', '')
        if spc.lower() in ('lin', 'linear'):
            spc = 'lin'
        elif spc.lower() in ('log', 'log10', 'log_10'):
            spc = 'log10'
        elif spc.lower() in ('ln', 'loge', 'log_e'):
            spc = 'ln'
        else:
            raise ValueError("Input argument 'data_spc%s' is invalid (%r)!"
                             % (idx, spc))

        # Save data identifier as tuple or single element
        if(len(idx) == 1):
            idx = idx[0]
        else:
            idx = tuple(idx)

        # Add entire data point to data_dict
        data_dict[idx] = [val, *err, spc]

    # Return data_dict
    return(data_dict)
示例#6
0
def convert_parameters(model_parameters):
    """
    Converts the provided `model_parameters` into a full parameters dict,
    taking into account all formatting options, and returns it.

    This function can be used externally to check how the provided
    `model_parameters` would be interpreted when provided to the
    :class:`~prism.modellink.ModelLink` subclass. Its output can be used for
    the 'model_parameters' input argument.

    Parameters
    ----------
    model_parameters : array_like, dict or str
        Anything that can be converted to a dict that provides model parameters
        information.

    Returns
    -------
    par_dict : dict
        Dict with the provided `model_parameters` converted to its full format.

    """

    # If a parameter file is given
    if isinstance(model_parameters, str):
        # Obtain absolute path to given file
        par_file = path.abspath(model_parameters)

        # Read the parameter file in as a string
        pars = np.genfromtxt(par_file, dtype=(str), delimiter=':',
                             autostrip=True)

        # Make sure that pars is 2D
        pars = np_array(pars, ndmin=2)

        # Convert read-in parameters to dict
        model_parameters = sdict(pars)

    # If a parameter dict is given
    elif isinstance(model_parameters, dict):
        model_parameters = sdict(model_parameters)

    # If anything else is given
    else:
        # Check if it can be converted to a dict
        try:
            model_parameters = sdict(model_parameters)
        except Exception:
            raise TypeError("Input model parameters cannot be converted to"
                            " type 'dict'!")

    # Initialize empty par_dict
    par_dict = sdict()

    # Loop over all items in model_parameters
    for name, values_str in model_parameters.items():
        # Convert values_str to values
        values = e13.split_seq(values_str)

        # Check if provided name is a string
        name = check_vals(name, 'par_name[%r]' % (name), 'str')

        # Check if provided range consists of two floats
        par_rng = check_vals(values[:2], 'par_rng[%r]' % (name), 'float')

        # Check if provided lower bound is lower than the upper bound
        if(par_rng[0] >= par_rng[1]):
            raise ValueError("Input argument 'par_rng[%r]' does not define a "
                             "valid parameter range (%f !< %f)!"
                             % (name, par_rng[0], par_rng[1]))

        # Check if a float parameter estimate was provided
        try:
            est = check_vals(values[2], 'par_est[%r]' % (name), 'float')
        # If no estimate was provided, save it as None
        except IndexError:
            est = None
        # If no float was provided, check if it was None
        except TypeError as error:
            # If it is None, save it as such
            if(str(values[2]).lower() == 'none'):
                est = None
            # If it is not None, reraise the previous error
            else:
                raise error
        # If a float was provided, check if it is within parameter range
        else:
            if not(values[0] <= est <= values[1]):
                raise ValueError("Input argument 'par_est[%r]' is outside "
                                 "of defined parameter range!" % (name))

        # Add parameter to par_dict
        par_dict[name] = [*par_rng, est]

    # Return par_dict
    return(par_dict)
示例#7
0
def get_walkers(pipeline_obj,
                *,
                emul_i=None,
                init_walkers=None,
                req_n_walkers=None,
                unit_space=False,
                lnpost_fn=None,
                **kwargs):
    """
    Analyzes proposed `init_walkers` and returns plausible `p0_walkers`.

    Analyzes sample set `init_walkers` in the provided `pipeline_obj` at
    iteration `emul_i` and returns all samples that are plausible to be used as
    starting positions for MCMC walkers. The provided samples and returned
    walkers should be/are given in unit space if `unit_space` is *True*.

    If `init_walkers` is *None*, returns :attr:`~prism.Pipeline.impl_sam`
    instead if it is available.

    This function needs to be called by all MPI ranks.

    Parameters
    ----------
    pipeline_obj : :obj:`~prism.Pipeline` object
        The instance of the :class:`~prism.Pipeline` class that needs to be
        used for determining the plausibility of the proposed starting
        positions.

    Optional
    --------
    %(emul_i)s
    init_walkers : 2D array_like, dict, int or None. Default: None
        Sample set of proposed initial MCMC walker positions. All plausible
        samples in `init_walkers` will be returned.
        If int, generate an LHD of provided size and return all plausible
        samples.
        If *None*, return :attr:`~prism.Pipeline.impl_sam` corresponding to
        iteration `emul_i` instead.
    req_n_walkers : int or None. Default: None
        The minimum required number of plausible starting positions that should
        be returned. If *None*, all plausible starting positions in
        `init_walkers` are returned instead.

        .. versionadded:: 1.2.0
    unit_space : bool. Default: False
        Bool determining whether or not the provided samples and returned
        walkers are given in unit space.
    lnpost_fn : function or None. Default: None
        If function, call :func:`~get_hybrid_lnpost_fn` using `lnpost_fn` and
        the same values for `pipeline_obj`, `emul_i` and `unit_space`, and
        return the resulting function definition `hybrid_lnpost()`. Any
        additionally provided `kwargs` are also passed to it.

    Returns
    -------
    n_walkers : int
        Number of returned MCMC walkers. Note that this number can be higher
        than `req_n_walkers` if not *None*.
    p0_walkers : 2D :obj:`~numpy.ndarray` object or dict
        Array containing plausible starting positions of valid MCMC walkers.
        If `init_walkers` was provided as a dict, `p0_walkers` will be a dict.
    hybrid_lnpost : function (if `lnpost_fn` is a function)
        The function returned by :func:`~get_hybrid_lnpost_fn` using
        `lnpost_fn`, `pipeline_obj`, `emul_i`, `unit_space` and `kwargs` as the
        input values.

    See also
    --------
    :func:`~get_hybrid_lnpost_fn`
        Returns a function definition ``hybrid_lnpost(par_set, *args,
        **kwargs)``.

    :attr:`~prism.Pipeline.worker_mode`
        Special context manager within which all code is executed in worker
        mode.

    Notes
    -----
    If `init_walkers` is *None* and emulator iteration `emul_i` has not been
    analyzed yet, a :class:`~prism._internal.RequestError` will be raised.

    If `req_n_walkers` is not *None*, a custom Metropolis-Hastings sampling
    algorithm is used to generate the required number of starting positions.
    All plausible samples in `init_walkers` are used as the start of every MCMC
    chain. Note that if the number of plausible samples in `init_walkers` is
    small, it is possible that the returned `p0_walkers` are not spread out
    properly over parameter space.

    """

    # Make abbreviation for pipeline_obj
    pipe = pipeline_obj

    # Check if provided pipeline_obj is an instance of the Pipeline class
    if not isinstance(pipe, Pipeline):
        raise TypeError("Input argument 'pipeline_obj' must be an instance of "
                        "the Pipeline class!")

    # Check if the provided pipeline_obj uses a default emulator
    if (pipe._emulator._emul_type != 'default'):
        raise e13.InputError("Input argument 'pipeline_obj' does not use a "
                             "default emulator!")

    # Get emulator iteration
    emul_i = pipe._emulator._get_emul_i(emul_i)

    # If req_n_walkers is not None, check if it is an integer
    if req_n_walkers is not None:
        req_n_walkers = check_vals(req_n_walkers, 'req_n_walkers', 'int',
                                   'pos')

    # Check if unit_space is a bool
    unit_space = check_vals(unit_space, 'unit_space', 'bool')

    # Assume that walkers are not to be returned as a dict
    walker_dict = False

    # Check if lnpost_fn is None and try to get hybrid_lnpost function if not
    if lnpost_fn is not None:
        try:
            hybrid_lnpost =\
                get_hybrid_lnpost_fn(lnpost_fn, pipe, emul_i=emul_i,
                                     unit_space=unit_space, **kwargs)
        except e13.InputError:
            raise e13.InputError("Input argument 'lnpost_fn' is invalid!")

    # If init_walkers is None, use impl_sam of emul_i
    if init_walkers is None:
        # Controller checking if emul_i has already been analyzed
        if pipe._is_controller:
            # If iteration has not been analyzed, raise error
            if not pipe._n_eval_sam[emul_i]:
                raise RequestError("Emulator iteration %i has not been "
                                   "analyzed yet!" % (emul_i))
            # If iteration is last iteration, init_walkers is current impl_sam
            elif (emul_i == pipe._emulator._emul_i):
                init_walkers = pipe._impl_sam
            # If iteration is not last, init_walkers is previous impl_sam
            else:
                init_walkers = pipe._emulator._sam_set[emul_i + 1]

            # Make sure to make a copy of init_walkers to avoid modifications
            init_walkers = init_walkers.copy()

        # Broadcast init_walkers to workers as p0_walkers
        p0_walkers = pipe._comm.bcast(init_walkers, 0)

    # If init_walkers is not None, use provided samples or LHD size
    else:
        # Controller checking if init_walkers is valid
        if pipe._is_controller:
            # If init_walkers is an int, create LHD of provided size
            if isinstance(init_walkers, int):
                # Check if provided integer is positive
                n_sam = check_vals(init_walkers, 'init_walkers', 'pos')

                # Obtain the par_space to sample in
                par_space = pipe._get_impl_space(emul_i)

                # If par_space is None, use the corresponding emul_space
                if par_space is None:
                    par_space = pipe._emulator._emul_space[emul_i]

                # Create LHD of provided size
                init_walkers = e13.lhd(n_sam, pipe._modellink._n_par,
                                       par_space, 'center', pipe._criterion,
                                       100)

            # If init_walkers is not an int, it must be array_like or dict
            else:
                # If init_walkers is provided as a dict, convert it
                if isinstance(init_walkers, dict):
                    # Make sure that init_walkers is a SortedDict
                    init_walkers = sdict(init_walkers)

                    # Convert it to normal
                    init_walkers = np_array(init_walkers.values()).T

                    # Return p0_walkers as a dict
                    walker_dict = True

                # Make sure that init_walkers is a NumPy array
                init_walkers = np_array(init_walkers, ndmin=2)

                # If unit_space is True, convert init_walkers to par_space
                if unit_space:
                    init_walkers = pipe._modellink._to_par_space(init_walkers)

                # Check if init_walkers is valid
                init_walkers = pipe._modellink._check_sam_set(
                    init_walkers, 'init_walkers')

        # Broadcast init_walkers to workers
        init_walkers = pipe._comm.bcast(init_walkers, 0)

        # Analyze init_walkers and save them as p0_walkers
        p0_walkers = pipe._evaluate_sam_set(emul_i, init_walkers, 'analyze')

    # Check if init_walkers is not empty and raise error if it is
    if not p0_walkers.shape[0]:
        raise e13.InputError("Input argument 'init_walkers' contains no "
                             "plausible samples!")

    # If req_n_walkers is not None, use MH MCMC to find all required walkers
    if req_n_walkers is not None:
        n_walkers, p0_walkers = _do_mh_walkers(pipe, p0_walkers, req_n_walkers)
    else:
        p0_walkers = np.unique(p0_walkers, axis=0)
        n_walkers = p0_walkers.shape[0]

    # Check if p0_walkers needs to be converted
    if unit_space:
        p0_walkers = pipe._modellink._to_unit_space(p0_walkers)

    # Check if p0_walkers needs to be returned as a dict
    if walker_dict:
        p0_walkers = pipe._modellink._get_sam_dict(p0_walkers)

    # Check if hybrid_lnpost was requested and return it as well if so
    if lnpost_fn is not None:
        return (n_walkers, p0_walkers, hybrid_lnpost)
    else:
        return (n_walkers, p0_walkers)
示例#8
0
    def hybrid_lnpost(par_set, *args, **kwargs):
        """
        Calculates the natural logarithm of the posterior probability of
        `par_set` using the provided function `lnpost_fn`, in addition to
        constraining it first with the emulator defined in the `pipeline_obj`.

        This function needs to be called by all MPI ranks unless called within
        the :attr:`~prism.Pipeline.worker_mode` context manager.

        Parameters
        ----------
        par_set : 1D array_like or dict
            Sample to calculate the posterior probability for. This sample is
            first analyzed in `pipeline_obj` and only given to `lnpost_fn` if
            it is plausible. If `par_dict` is *True*, this is a dict.
        args : positional arguments
            Positional arguments that need to be passed to `lnpost_fn`.
        kwargs : keyword arguments
            Keyword arguments that need to be passed to `lnpost_fn`.

        Returns
        -------
        lnp : float
            The natural logarithm of the posterior probability of `par_set`, as
            determined by `lnpost_fn` if `par_set` is plausible. If
            `impl_prior` is *True*, `lnp` is calculated as `lnprior` +
            `lnpost_fn()`, with `lnprior` the natural logarithm of the first
            implausibility cut-off value of `par_set` scaled with its maximum.

        """

        # If par_dict is True, convert par_set to a NumPy array
        if par_dict:
            sam = np_array(sdict(par_set).values(), ndmin=2)
        else:
            sam = np_array(par_set, ndmin=2)

        # If unit_space is True, convert par_set to par_space
        if unit_space:
            sam = pipe._modellink._to_par_space(sam)

        # Check if par_set is within parameter space and return -inf if not
        par_rng = pipe._modellink._par_rng
        if not ((par_rng[:, 0] <= sam[0]) * (sam[0] <= par_rng[:, 1])).all():
            return (-np.infty)

        # Check what sampling is requested and analyze par_set
        if impl_prior:
            impl_sam, lnprior = pipe._make_call('_evaluate_sam_set', emul_i,
                                                sam, 'hybrid')
        else:
            impl_sam = pipe._make_call('_evaluate_sam_set', emul_i, sam,
                                       'analyze')
            lnprior = 0

        # If par_set is plausible, call lnpost_fn
        if len(impl_sam):
            return (lnprior + lnpost_fn(par_set, *args, **kwargs))

        # If par_set is not plausible, return -inf
        else:
            return (-np.infty)
示例#9
0
def test_np_array():
    array = np.array([1, 2])
    assert np_array(array) is array