Пример #1
0
def logpdf(sample, mean, cov):
    psd(cov)
    inv_cov = minv(cov)

    log_part_func = (-.5 * T.log(det(cov)) -
                     .5 * sample.shape[0] * T.log(2 * np.pi))

    mean = T.shape_padleft(mean)
    residual = sample - mean
    log_density = -.5 * T.dot(T.dot(residual, inv_cov), residual.T)

    return log_density + log_part_func
Пример #2
0
def pdf(sample, mean, cov):
    dim = sample.shape[0]
    psd(cov)
    inv_cov = minv(cov)

    part_func = (2 * np.pi)**(dim / 2.) * det(cov)**0.5

    mean = T.shape_padleft(mean)
    residual = sample - mean
    density = T.exp(-.5 * T.dot(T.dot(residual, inv_cov), residual.T))

    return density / part_func
Пример #3
0
def pdf(sample, mean, cov):
    dim = sample.shape[0]
    psd(cov)
    inv_cov = minv(cov)

    part_func = (2 * np.pi) ** (dim / 2.) * det(cov) ** 0.5

    mean = T.shape_padleft(mean)
    residual = sample - mean
    density = T.exp(-.5 * T.dot(T.dot(residual, inv_cov), residual.T))

    return density / part_func
Пример #4
0
def logpdf(sample, mean, cov):
    psd(cov)
    inv_cov = minv(cov)

    log_part_func = (
        - .5 * T.log(det(cov))
        - .5 * sample.shape[0] * T.log(2 * np.pi))

    mean = T.shape_padleft(mean)
    residual = sample - mean
    log_density = - .5 * T.dot(T.dot(residual, inv_cov), residual.T)

    return log_density + log_part_func
Пример #5
0
def exprs(inpt, test_inpt, target, length_scales, noise, amplitude, kernel):
    exprs = {}
    # To stay compatible with the prediction api, target will
    # be a matrix to the outside. But in the following, it's easier
    # if it is a vector in the inside. We keep a reference to the
    # matrix anyway, to return it.

    target_ = target[:, 0]

    # The Kernel parameters are parametrized in the log domain. Here we recover
    # them and make sure that they do not get zero.
    minimal_noise = 1e-4
    minimal_length_scale = 1e-4
    minimal_amplitude = 1e-4

    noise = T.exp(noise) + minimal_noise
    length_scales = T.exp(length_scales) + minimal_length_scale
    amplitude = T.exp(amplitude) + minimal_amplitude

    # In the case of stationary kernels (those which work on the distances
    # only) we can save some work by caching the distances. Thus we first
    # find out if it is a stationary tensor by checking whether the kernel
    # can be computed by looking at diffs only---this is the case if a
    # ``XXX_by_diff`` function is available in the kernel module.
    # If that is the case, we add the diff expr to the exprs dict, so it can
    # be exploited by code on the top via a givens directory.

    kernel_by_dist_func = lookup('%s_by_dist' % kernel, kernel_, None)
    stationary = kernel_by_dist_func is not None
    kernel_func = lookup(kernel, kernel_)

    if stationary:
        inpt_scaled = inpt * length_scales.dimshuffle('x', 0)
        diff = exprs['diff'] = misc.pairwise_diff(inpt_scaled, inpt_scaled)
        D2 = exprs['sqrd_dist'] = misc.distance_matrix_by_diff(diff, 'l2')
        gram_matrix = amplitude * kernel_by_dist_func(D2)
        exprs['D2'] = D2
    else:
        gram_matrix = kernel_func(inpt, inpt, length_scales, amplitude)

    # TODO clarify nomenclature; the gram matrix is actually the whole thing
    # without noise.

    gram_matrix += T.identity_like(gram_matrix) * noise

    # This is an informed choice. I played around a little with various
    # methods (e.g. using cholesky first) and came to the conclusion that
    # this way of doing it was way faster than explicitly doing a Cholesky
    # or so.

    psd(gram_matrix)
    inv_gram_matrix = minv(gram_matrix)

    n_samples = gram_matrix.shape[0]
    ll = (
        - 0.5 * T.dot(T.dot(target_.T, inv_gram_matrix), target_)
        - 0.5 * T.log(det(gram_matrix))
        - 0.5 * n_samples * T.log(2 * np.pi))
    nll = -ll

    # We are interested in a loss that is invariant to the number of
    # samples.
    nll /= n_samples
    loss = nll

    # Whenever we are working with points not in the training set, the
    # corresponding expressions are prefixed with test_. Thus test_inpt,
    # test_K (this is the Kernel matrix of the test inputs only), and
    # test_kernel (this is the kernel matrix of the training inpt with the
    # test inpt.
    test_kernel = kernel_func(inpt, test_inpt, length_scales, amplitude)

    kTK = T.dot(test_kernel.T, inv_gram_matrix)
    output = output_mean = T.dot(kTK, target_).dimshuffle(0, 'x')

    kTKk = T.dot(kTK, test_kernel)

    chol_inv_gram_matrix = cholesky(inv_gram_matrix)

    diag_kTKk = (T.dot(chol_inv_gram_matrix.T, test_kernel) ** 2).sum(axis=0)
    test_K = kernel_func(test_inpt, test_inpt, length_scales, amplitude,
                         diag=True)
    output_var = ((test_K - diag_kTKk)).dimshuffle(0, 'x')

    return get_named_variables(locals())
Пример #6
0
Файл: mvn.py Проект: osdf/breze
def logpdf(sample, mean, cov):
    """Return a theano expression representing the values of the log probability
    density function of the multivariate normal.

    Parameters
    ----------

    sample : Theano variable
        Array of shape ``(n, d)`` where ``n`` is the number of samples and
        ``d`` the dimensionality of the data.

    mean : Theano variable
        Array of shape ``(d,)`` representing the mean of the distribution.

    cov : Theano variable
        Array of shape ``(d, d)`` representing the covariance of the
        distribution.


    Returns
    -------

    l : Theano variable
        Array of shape ``(n,)`` where each entry represents the log density of
        the corresponding sample.


    Examples
    --------

    >>> import theano
    >>> import theano.tensor as T
    >>> import numpy as np
    >>> from breze.learn.utils import theano_floatx
    >>> sample = T.matrix('sample')
    >>> mean = T.vector('mean')
    >>> cov = T.matrix('cov')
    >>> p = logpdf(sample, mean, cov)
    >>> f_p = theano.function([sample, mean, cov], p)

    >>> mu = np.array([-1, 1])
    >>> sigma = np.array([[.9, .4], [.4, .3]])
    >>> X = np.array([[-1, 1], [1, -1]])
    >>> mu, sigma, X = theano_floatx(mu, sigma, X)
    >>> ps = f_p(X, mu, sigma)
    >>> np.allclose(ps, np.log([4.798702e-01, 7.73744047e-17]))
    True
    """
    psd(cov)
    inv_cov = minv(cov)

    inv_cov = minv(cov)
    L = chol(inv_cov)

    log_part_func = (
        - .5 * T.log(det(cov))
        - .5 * sample.shape[0] * T.log(2 * np.pi))

    mean = T.shape_padleft(mean)
    residual = sample - mean
    B = T.dot(residual, L)
    A = (B ** 2).sum(axis=1)
    log_density = - .5 * A

    return log_density + log_part_func
Пример #7
0
    def make_exprs(inpt, test_inpt, target,
                   length_scales, noise, amplitude, kernel):
        exprs = {}

        # To stay compatible with the prediction api, target will
        # be a matrix to the outside. But in the following, it's easier
        # if it is a vector in the inside. We keep a reference to the
        # matrix anyway, to return it.

        target_ = target
        target = target[:, 0]
        noise = T.exp(noise) + GaussianProcess.minimal_noise
        length_scales = T.exp(length_scales) + GaussianProcess.minimal_length_scale
        amplitude = T.exp(amplitude) + 1e-4

        # In the case of stationary kernels (those which work on the distances
        # only) we can save some work by caching the distances. Thus we first
        # find out if it is a stationary tensor by checking whether the kernel
        # can be computed by looking at diffs only---this is the case if a
        # ``XXX_by_diff`` function is available in the kernel modile.
        # I that is the case, we add the diff expr to the exprs dict, so it can
        # be exploited by code on the top via a givens directory.

        kernel_by_dist_func = lookup('%s_by_dist' % kernel, kernel_, None)
        stationary = kernel_by_dist_func is not None
        kernel_func = lookup(kernel, kernel_)

        if stationary:
            inpt_scaled = inpt * length_scales.dimshuffle('x', 0)
            diff = exprs['diff'] = misc.pairwise_diff(inpt_scaled, inpt_scaled)
            D2 = exprs['sqrd_dist'] = misc.distance_matrix_by_diff(diff, 'l2')
            K = amplitude * kernel_by_dist_func(D2)
            exprs['D2'] = D2
        else:
            K = kernel_func(inpt, inpt, length_scales, amplitude)

        K += T.identity_like(K) * noise

        # This is an informed choice. I played around a little with various
        # methods (e.g. using cholesky first) and came to the conclusion that
        # this way of doing it was way faster than explicitly doing a Cholesky
        # or so.

        psd(K)
        inv_K = minv(K)

        n_samples = K.shape[0]
        ll = (
            - 0.5 * T.dot(T.dot(target.T, inv_K), target)
            - 0.5 * T.log(det(K))
            - 0.5 * n_samples * T.log(2 * np.pi))
        nll = -ll

        # We are interested in a loss that is invariant to the number of
        # samples.
        nll /= n_samples

        # Whenever we are working with points not in the training set, the
        # corresponding expressions are prefixed with test_. Thus test_inpt,
        # test_K (this is the Kernel matrix of the test inputs only), and
        # test_kernel (this is the kernel matrix of the training inpt with the
        # test inpt.
        test_kernel = kernel_func(inpt, test_inpt, length_scales, amplitude)

        kTK = T.dot(test_kernel.T, inv_K)
        output_mean = T.dot(kTK, target).dimshuffle(0, 'x')

        kTKk = T.dot(kTK, test_kernel)

        chol_inv_K = cholesky(inv_K)

        diag_kTKk = (T.dot(chol_inv_K.T, test_kernel) ** 2).sum(axis=0)
        test_K = kernel_func(test_inpt, test_inpt, length_scales, amplitude,
                             diag=True)
        output_var = ((test_K - diag_kTKk)).dimshuffle(0, 'x')

        exprs.update({
            'inpt': inpt,
            'test_inpt': test_inpt,
            'target': target_,
            'gram_matrix': K,
            'inv_gram_matrix': inv_K,
            'chol_inv_gram_matrix': chol_inv_K,
            'nll': nll,
            'loss': nll,
            'output': output_mean,
            'output_var': output_var,
            'test_kernel': test_kernel,
            'test_K': test_K,
            'kTKk': kTKk,
        })
        return exprs