Exemplo n.º 1
0
def uniform_partition_uniform_distribution_rectangle_size(data_set, 
                                                          Q_ref=None,
                                                          rect_size=None, 
                                                          M=50,
                                                          num_d_emulate=1E6):
    r"""
    Creates a simple function approximation of :math:`\rho_{\mathcal{D}}`
    where :math:`\rho_{\mathcal{D}}` is a uniform probability density on
    a generalized rectangle centered at ``Q_ref`` or the ``reference_value``
    of a sample set. If ``Q_ref`` is not given the reference value is used.
    The support of this density is defined by ``rect_size``, which determines
    the size of the generalized rectangle.
    The simple function approximation is then defined by determining ``M``
    Voronoi cells (i.e., "bins") partitioning :math:`\mathcal{D}`. These
    bins are only implicitly defined by ``M`` samples in :math:`\mathcal{D}`.
    Finally, the probabilities of each of these bins is computed by
    sampling from :math:`\rho{\mathcal{D}}` and using nearest neighbor
    searches to bin these samples in the ``M`` implicitly defined bins.
    The result is the simple function approximation denoted by
    :math:`\rho_{\mathcal{D},M}`.

    .. note::

        ``data_set`` is only used to determine dimension.

    Note that all computations in the measure-theoretic framework that
    follow from this are for the fixed simple function approximation
    :math:`\rho_{\mathcal{D},M}`.

    :param int M: Defines number M samples in D used to define
        :math:`\rho_{\mathcal{D},M}` The choice of M is something of an "art" -
        play around with it and you can get reasonable results with a
        relatively small number here like 50.
    :param rect_size: Determines the size of the support of the
        uniform distribution on a generalized rectangle
    :type rect_size: double or list
    :param int num_d_emulate: Number of samples used to emulate using an MC
        assumption
    :param data_set: Sample set that the probability measure is defined for.
    :type data_set: :class:`~bet.sample.discretization` 
        or :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray`
    :param Q_ref: :math:`Q(`\lambda_{reference})`
    :type Q_ref: :class:`~numpy.ndarray` of size (mdim,)

    :rtype: :class:`~bet.sample.voronoi_sample_set`
    :returns: sample_set object defininng simple function approximation
    """

    (num, dim, values, Q_ref) = check_inputs(data_set, Q_ref)

    if rect_size is None:
        raise wrong_argument_type("Rectangle size required.")
    elif not isinstance(rect_size, collections.Iterable):
        rect_size = rect_size * np.ones((dim,))
    if np.any(np.less_equal(rect_size, 0)):
        msg = 'rect_size must be greater than 0'
        raise wrong_argument_type(msg)

    r'''
    Create M samples defining M Voronoi cells (i.e., "bins") in D used to
    define the simple function approximation :math:`\rho_{\mathcal{D},M}`.

    This does not have to be random, but here we assume this to be the case.
    We can choose these samples deterministically but that fails to scale with
    dimension efficiently.

    Note that these M samples are chosen for the sole purpose of determining
    the bins used to create the approximation to :math:`rho_{\mathcal{D}}`.

    We call these M samples "d_distr_samples" because they are samples on the
    data space and the distr implies these samples are chosen to create the
    approximation to the probability measure (distribution) on D.

    Note that we create these samples in a set containing the hyperrectangle in
    order to get output cells with zero probability. If all of the
    d_dstr_samples were taken from within the support of
    :math:`\rho_{\mathcal{D}}` then each of the M bins would have positive
    probability. This would in turn imply that the support of
    :math:`\rho_{\Lambda}` is all of :math:`\Lambda`.
    '''

    if comm.rank == 0:
        d_distr_samples = 1.5 * rect_size * (np.random.random((M,
                                            dim)) - 0.5) + Q_ref
    else:
        d_distr_samples = np.empty((M, dim))
    comm.Bcast([d_distr_samples, MPI.DOUBLE], root=0)

    # Initialize sample set object
    s_set = samp.voronoi_sample_set(dim)
    s_set.set_values(d_distr_samples)
    s_set.set_kdtree()

    r'''
    Compute probabilities in the M bins used to define
    :math:`\rho_{\mathcal{D},M}` by Monte Carlo approximations
    that in this context amount to binning with nearest neighbor
    approximations the num_d_emulate samples taken from
    :math:`\rho_{\mathcal{D}}`.
    '''
    # Generate the samples from :math:`\rho_{\mathcal{D}}`
    num_d_emulate_local = int((num_d_emulate/comm.size) + \
                        (comm.rank < num_d_emulate%comm.size))
    d_distr_emulate = rect_size * (np.random.random((num_d_emulate_local,
                                                     dim)) - 0.5) + Q_ref

    # Bin these samples using nearest neighbor searches
    (_, k) = s_set.query(d_distr_emulate)

    count_neighbors = np.zeros((M,), dtype=np.int)
    for i in xrange(M):
        count_neighbors[i] = np.sum(np.equal(k, i))

    # Use the binning to define :math:`\rho_{\mathcal{D},M}`
    ccount_neighbors = np.copy(count_neighbors)
    comm.Allreduce([count_neighbors, MPI.INT], [ccount_neighbors, MPI.INT],
                   op=MPI.SUM)
    count_neighbors = ccount_neighbors
    rho_D_M = count_neighbors.astype(np.float64) / float(num_d_emulate)
    s_set.set_probabilities(rho_D_M)

    '''
    NOTE: The computation of q_distr_prob, q_distr_emulate, q_distr_samples
    above, while possibly informed by the sampling of the map Q, do not require
    solving the model EVER! This can be done "offline" so to speak. The results
    can then be stored and accessed later by the algorithm using a completely
    different set of parameter samples and model solves.
    '''
    if isinstance(data_set, samp.discretization):
        data_set._output_probability_set = s_set
    return s_set
Exemplo n.º 2
0
def uniform_partition_normal_distribution(data_set, Q_ref, std, M,
        num_d_emulate=1E6): 
    r"""
    Creates a simple function approximation of :math:`\rho_{\mathcal{D},M}`
    where :math:`\rho_{\mathcal{D},M}` is a multivariate normal probability
    density centered at ``Q_ref`` with standard deviation ``std`` using
    ``M`` bins sampled from a uniform distribution with a size 4 standard
    deviations in each direction.

    :param data_set: Sample set that the probability measure is defined for.
    :type data_set: :class:`~bet.sample.discretization` or
        :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray`
    :param int M: Defines number M samples in D used to define
        :math:`\rho_{\mathcal{D},M}` The choice of M is something of an "art" -
        play around with it and you can get reasonable results with a
        relatively small number here like 50.
    :param int num_d_emulate: Number of samples used to emulate using an MC
        assumption
    :param Q_ref: :math:`Q(\lambda_{reference})`
    :type Q_ref: :class:`~numpy.ndarray` of size (mdim,)
    :param std: The standard deviation of each QoI
    :type std: :class:`~numpy.ndarray` of size (mdim,)

    :rtype: :class:`~bet.sample.voronoi_sample_set`
    :returns: sample_set object defininng simple function approximation

    """
    r'''Create M samples defining M bins in D used to define
    :math:`\rho_{\mathcal{D},M}` rho_D is assumed to be a multi-variate normal
    distribution with mean Q_ref and standard deviation std.'''
    if not isinstance(Q_ref, collections.Iterable):
        Q_ref = np.array([Q_ref])
    if not isinstance(std, collections.Iterable):
        std = np.array([std])

    bin_size = 4.0 * std
    d_distr_samples = np.zeros((M, len(Q_ref)))
    if comm.rank == 0:
        d_distr_samples = bin_size * (np.random.random((M,
                                            len(Q_ref))) - 0.5) + Q_ref
    comm.Bcast([d_distr_samples, MPI.DOUBLE], root=0)

    # Initialize sample set object
    s_set = samp.voronoi_sample_set(len(Q_ref))
    s_set.set_values(d_distr_samples)
    s_set.set_kdtree()

    r'''Now compute probabilities for :math:`\rho_{\mathcal{D},M}` by sampling
    from rho_D First generate samples of rho_D - I sometimes call this
    emulation'''
    num_d_emulate_local = int((num_d_emulate/comm.size) + \
            (comm.rank < num_d_emulate%comm.size))
    d_distr_emulate = np.zeros((num_d_emulate_local, len(Q_ref)))
    for i in xrange(len(Q_ref)):
        d_distr_emulate[:, i] = np.random.normal(Q_ref[i], std[i],
                                                 num_d_emulate_local)

        # Now bin samples of rho_D in the M bins of D to compute rho_{D, M}
    if len(d_distr_samples.shape) == 1:
        d_distr_samples = np.expand_dims(d_distr_samples, axis=1)

    (_, k) = s_set.query(d_distr_emulate)
    count_neighbors = np.zeros((M,), dtype=np.int)
    # volumes = np.zeros((M,))
    for i in xrange(M):
        Itemp = np.equal(k, i)
        count_neighbors[i] = np.sum(Itemp)

    r'''Now define probability of the d_distr_samples This together with
    d_distr_samples defines :math:`\rho_{\mathcal{D},M}`'''
    ccount_neighbors = np.copy(count_neighbors)
    comm.Allreduce([count_neighbors, MPI.INT], [ccount_neighbors, MPI.INT],
                   op=MPI.SUM)
    count_neighbors = ccount_neighbors
    rho_D_M = count_neighbors.astype(np.float64) / float(num_d_emulate)
    s_set.set_probabilities(rho_D_M)
    # NOTE: The computation of q_distr_prob, q_distr_emulate, q_distr_samples
    # above, while informed by the sampling of the map Q, do not require
    # solving the model EVER! This can be done "offline" so to speak.
    if isinstance(data_set, samp.discretization):
        data_set._output_probability_set = s_set
    return s_set
Exemplo n.º 3
0
def unif_normal(Q_ref, M, std, num_d_emulate=1E6):
    r"""
    Creates a simple function approximation of :math:`\rho_{\mathcal{D},M}`
    where :math:`\rho_{\mathcal{D},M}` is a multivariate normal probability
    density centered at Q_ref with standard deviation std using M bins sampled
    from a uniform distribution with a size 4 standard deviations in each
    direction.

    :param int M: Defines number M samples in D used to define
        :math:`\rho_{\mathcal{D},M}` The choice of M is something of an "art" -
        play around with it and you can get reasonable results with a
        relatively small number here like 50.
    :param int num_d_emulate: Number of samples used to emulate using an MC
        assumption 
    :param Q_ref: :math:`Q(\lambda_{reference})`
    :type Q_ref: :class:`~numpy.ndarray` of size (mdim,)
    :param std: The standard deviation of each QoI
    :type std: :class:`~numpy.ndarray` of size (mdim,)
    :rtype: tuple
    :returns: (rho_D_M, d_distr_samples, d_Tree) where ``rho_D_M`` is (M,) and
    ``d_distr_samples`` are (M, mdim) :class:`~numpy.ndarray` and `d_Tree` is
    the :class:`~scipy.spatial.KDTree` for d_distr_samples

    """
    r'''Create M smaples defining M bins in D used to define
    :math:`\rho_{\mathcal{D},M}` rho_D is assumed to be a multi-variate normal
    distribution with mean Q_ref and standard deviation std.'''

    bin_size = 4.0*std
    d_distr_samples = np.zeros((M, len(Q_ref)))
    if comm.rank == 0:
        d_distr_samples = bin_size*(np.random.random((M, 
            len(Q_ref)))-0.5)+Q_ref
    comm.Bcast([d_distr_samples, MPI.DOUBLE], root=0)

 
    r'''Now compute probabilities for :math:`\rho_{\mathcal{D},M}` by sampling
    from rho_D First generate samples of rho_D - I sometimes call this
    emulation''' 
    num_d_emulate = int(num_d_emulate/comm.size)+1
    d_distr_emulate = np.zeros((num_d_emulate, len(Q_ref)))
    for i in range(len(Q_ref)):
        d_distr_emulate[:, i] = np.random.normal(Q_ref[i], std[i], 
                num_d_emulate) 

    # Now bin samples of rho_D in the M bins of D to compute rho_{D, M}
    if len(d_distr_samples.shape) == 1:
        d_distr_samples = np.expand_dims(d_distr_samples, axis=1)

    d_Tree = spatial.KDTree(d_distr_samples)
    (_, k) = d_Tree.query(d_distr_emulate)
    count_neighbors = np.zeros((M,), dtype=np.int)
    #volumes = np.zeros((M,))
    for i in range(M):
        Itemp = np.equal(k, i)
        count_neighbors[i] = np.sum(Itemp)
        
    r'''Now define probability of the d_distr_samples This together with
    d_distr_samples defines :math:`\rho_{\mathcal{D},M}`'''
    ccount_neighbors = np.copy(count_neighbors)
    comm.Allreduce([count_neighbors, MPI.INT], [ccount_neighbors, MPI.INT],
            op=MPI.SUM) 
    count_neighbors = ccount_neighbors
    rho_D_M = count_neighbors.astype(np.float64)/float(comm.size*num_d_emulate)
    
    # NOTE: The computation of q_distr_prob, q_distr_emulate, q_distr_samples
    # above, while informed by the sampling of the map Q, do not require
    # solving the model EVER! This can be done "offline" so to speak.
    return (rho_D_M, d_distr_samples, d_Tree)
Exemplo n.º 4
0
def normal_partition_normal_distribution(data_set, Q_ref=None, std=1, M=1,
                                         num_d_emulate=1E6):
    r"""
    Creates a simple function approximation of :math:`\rho_{\mathcal{D},M}`
    where :math:`\rho_{\mathcal{D},M}` is a multivariate normal probability
    density centered at ``Q_ref`` with standard deviation ``std`` using
    ``M`` bins sampled from the given normal distribution.

    :param data_set: Sample set that the probability measure is defined for.
    :type data_set: :class:`~bet.sample.discretization` or
        :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray`
    :param int M: Defines number M samples in D used to define
        :math:`\rho_{\mathcal{D},M}` The choice of M is something of an "art" -
        play around with it and you can get reasonable results with a
        relatively small number here like 50.
    :param int num_d_emulate: Number of samples used to emulate using an MC
        assumption
    :param Q_ref: :math:`Q(\lambda_{reference})`
    :type Q_ref: :class:`~numpy.ndarray` of size (mdim,)
    :param std: The standard deviation of each QoI
    :type std: :class:`~numpy.ndarray` of size (mdim,)

    :rtype: :class:`~bet.sample.voronoi_sample_set`
    :returns: sample_set object defining simple function approximation

    """
    if Q_ref is None:
        Q_ref = infer_Q(data_set)
    import scipy.stats as stats
    r'''Create M samples defining M bins in D used to define
    :math:`\rho_{\mathcal{D},M}` rho_D is assumed to be a multi-variate normal
    distribution with mean Q_ref and standard deviation std.'''
    Q_ref = check_type(Q_ref, data_set)
    std = check_type(std, data_set)

    covariance = std ** 2

    d_distr_samples = np.zeros((M, len(Q_ref)))
    logging.info("d_distr_samples.shape " + str(d_distr_samples.shape))
    logging.info("Q_ref.shape " + str(Q_ref.shape))
    logging.info("std.shape " + str(std.shape))

    if comm.rank == 0:
        for i in range(len(Q_ref)):
            d_distr_samples[:, i] = np.random.normal(Q_ref[i], std[i], M)
    comm.Bcast([d_distr_samples, MPI.DOUBLE], root=0)

    # Initialize sample set object
    s_set = samp.voronoi_sample_set(len(Q_ref))
    s_set.set_values(d_distr_samples)
    s_set.set_kdtree()

    r'''Now compute probabilities for :math:`\rho_{\mathcal{D},M}` by sampling
    from rho_D First generate samples of rho_D - I sometimes call this
    emulation'''
    num_d_emulate_local = int((num_d_emulate / comm.size) +
                              (comm.rank < num_d_emulate % comm.size))
    d_distr_emulate = np.zeros((num_d_emulate_local, len(Q_ref)))
    for i in range(len(Q_ref)):
        d_distr_emulate[:, i] = np.random.normal(Q_ref[i], std[i],
                                                 num_d_emulate_local)

        # Now bin samples of rho_D in the M bins of D to compute rho_{D, M}
    if len(d_distr_samples.shape) == 1:
        d_distr_samples = np.expand_dims(d_distr_samples, axis=1)

    (_, k) = s_set.query(d_distr_emulate)
    count_neighbors = np.zeros((M,), dtype=np.int)
    volumes = np.zeros((M,))
    for i in range(M):
        Itemp = np.equal(k, i)
        count_neighbors[i] = np.sum(Itemp)
        volumes[i] = np.sum(1.0 / stats.multivariate_normal.pdf
                            (d_distr_emulate[Itemp, :], Q_ref, covariance))
    # Now define probability of the d_distr_samples
    # This together with d_distr_samples defines :math:`\rho_{\mathcal{D},M}`
    ccount_neighbors = np.copy(count_neighbors)
    comm.Allreduce([count_neighbors, MPI.INT], [ccount_neighbors, MPI.INT],
                   op=MPI.SUM)
    count_neighbors = ccount_neighbors
    cvolumes = np.copy(volumes)
    comm.Allreduce([volumes, MPI.DOUBLE], [cvolumes, MPI.DOUBLE], op=MPI.SUM)
    volumes = cvolumes
    rho_D_M = count_neighbors.astype(np.float64) * volumes
    rho_D_M = rho_D_M / np.sum(rho_D_M)
    s_set.set_probabilities(rho_D_M)
    s_set.set_volumes(volumes)

    # NOTE: The computation of q_distr_prob, q_distr_emulate, q_distr_samples
    # above, while informed by the sampling of the map Q, do not require
    # solving the model EVER! This can be done "offline" so to speak.
    if isinstance(data_set, samp.discretization):
        data_set._output_probability_set = s_set
        data_set.set_io_ptr(globalize=False)
    return s_set
Exemplo n.º 5
0
def unif_unif(data, Q_ref, M=50, bin_ratio=0.2, num_d_emulate=1E6):
    r"""
    Creates a simple function approximation of :math:`\rho_{\mathcal{D}}`
    where :math:`\rho_{\mathcal{D}}` is a uniform probability density on
    a generalized rectangle centered at Q_ref.
    The support of this density is defined by bin_ratio, which determines
    the size of the generalized rectangle by scaling the circumscribing 
    generalized rectangle of :math:`\mathcal{D}`.
    The simple function approximation is then defined by determining M 
    Voronoi cells (i.e., "bins") partitioning :math:`\mathcal{D}`. These
    bins are only implicitly defined by M samples in :math:`\mathcal{D}`.
    Finally, the probabilities of each of these bins is computed by 
    sampling from :math:`\rho{\mathcal{D}}` and using nearest neighbor 
    searches to bin these samples in the M implicitly defined bins. 
    The result is the simple function approximation denoted by
    :math:`\rho_{\mathcal{D},M}`.
    
    Note that all computations in the measure-theoretic framework that
    follow from this are for the fixed simple function approximation
    :math:`\rho_{\mathcal{D},M}`.

    :param int M: Defines number M samples in D used to define
        :math:`\rho_{\mathcal{D},M}` The choice of M is something of an "art" -
        play around with it and you can get reasonable results with a
        relatively small number here like 50.
    :param bin_ratio: The ratio used to determine the width of the
        uniform distributiion as ``bin_size = (data_max-data_min)*bin_ratio``
    :type bin_ratio: double or list()
    :param int num_d_emulate: Number of samples used to emulate using an MC
        assumption 
    :param data: Array containing QoI data where the QoI is mdim
        diminsional
    :type data: :class:`~numpy.ndarray` of size (num_samples, mdim)
    :param Q_ref: :math:`Q(`\lambda_{reference})`
    :type Q_ref: :class:`~numpy.ndarray` of size (mdim,)
    :rtype: tuple
    :returns: (rho_D_M, d_distr_samples, d_Tree) where ``rho_D_M`` is (M,) and
    ``d_distr_samples`` are (M, mdim) :class:`~numpy.ndarray` and `d_Tree` is
    the :class:`~scipy.spatial.KDTree` for d_distr_samples
    """
    data = util.fix_dimensions_data(data)
    bin_size = (np.max(data, 0) - np.min(data, 0))*bin_ratio


    r'''
    Create M samples defining M Voronoi cells (i.e., "bins") in D used to 
    define the simple function approximation :math:`\rho_{\mathcal{D},M}`.
    
    This does not have to be random, but here we assume this to be the case.
    We can choose these samples deterministically but that fails to scale with
    dimension efficiently.
    
    Note that these M samples are chosen for the sole purpose of determining
    the bins used to create the approximation to :math:`rho_{\mathcal{D}}`.
    
    We call these M samples "d_distr_samples" because they are samples on the
    data space and the distr implies these samples are chosen to create the
    approximation to the probability measure (distribution) on D.
    
    Note that we create these samples in a set containing the hyperrectangle in
    order to get output cells with zero probability. If all of the
    d_dstr_samples were taken from within the support of
    :math:`\rho_{\mathcal{D}}` then each of the M bins would have positive
    probability. This would in turn imply that the support of
    :math:`\rho_{\Lambda}` is all of :math:`\Lambda`.
    '''
    if comm.rank == 0:
        d_distr_samples = 1.5*bin_size*(np.random.random((M,
            data.shape[1]))-0.5)+Q_ref 
    else:
        d_distr_samples = np.empty((M, data.shape[1]))
    comm.Bcast([d_distr_samples, MPI.DOUBLE], root=0)

    r'''
    Compute probabilities in the M bins used to define
    :math:`\rho_{\mathcal{D},M}` by Monte Carlo approximations
    that in this context amount to binning with nearest neighbor
    approximations the num_d_emulate samples taken from
    :math:`\rho_{\mathcal{D}}`.
    '''
    # Generate the samples from :math:`\rho_{\mathcal{D}}`
    num_d_emulate = int(num_d_emulate/comm.size)+1
    d_distr_emulate = bin_size*(np.random.random((num_d_emulate,
        data.shape[1]))-0.5) + Q_ref

    # Bin these samples using nearest neighbor searches
    d_Tree = spatial.KDTree(d_distr_samples)
    (_, k) = d_Tree.query(d_distr_emulate)
    count_neighbors = np.zeros((M,), dtype=np.int)
    for i in range(M):
        count_neighbors[i] = np.sum(np.equal(k, i))


    # Use the binning to define :math:`\rho_{\mathcal{D},M}`
    ccount_neighbors = np.copy(count_neighbors)
    comm.Allreduce([count_neighbors, MPI.INT], [ccount_neighbors, MPI.INT],
            op=MPI.SUM)
    count_neighbors = ccount_neighbors
    rho_D_M = count_neighbors.astype(np.float64) / float(num_d_emulate*comm.size)

    '''
    NOTE: The computation of q_distr_prob, q_distr_emulate, q_distr_samples
    above, while possibly informed by the sampling of the map Q, do not require
    solving the model EVER! This can be done "offline" so to speak. The results
    can then be stored and accessed later by the algorithm using a completely
    different set of parameter samples and model solves.
    '''
    return (rho_D_M, d_distr_samples, d_Tree)