def callback(params, t, g):
        print("Iteration {} lower bound {}".format(t, -objective(params, t)))

        plt.cla()
        target_distribution = lambda x: np.exp(log_posterior(x, t))
        plot_isocontours(ax, target_distribution)

        mean, log_std = unpack_params(params)
        variational_contour = lambda x: mvn.pdf(x, mean, np.diag(np.exp(2 * log_std)))
        plot_isocontours(ax, variational_contour)
        plt.draw()
        plt.pause(1.0 / 30.0)
 def log_posterior(x, t):
     """An example 2D intractable distribution:
     a Gaussian evaluated at zero with a Gaussian prior on the log-variance."""
     mu, log_sigma = x[:, 0], x[:, 1]
     prior = norm.logpdf(log_sigma, 0, 1.35)
     likelihood = norm.logpdf(mu, 0, np.exp(log_sigma))
     return prior + likelihood
 def variational_objective(params, t):
     """Provides a stochastic estimate of the variational lower bound."""
     mean, log_std = unpack_params(params)
     samples = rs.randn(num_samples, D) * np.exp(log_std) + mean
     lower_bound = gaussian_entropy(log_std) + np.mean(logprob(samples, t))
     loss = np.mean(logprob(samples, t))
     print("loss is "+ str(loss))
     return -lower_bound
Exemplo n.º 4
0
 def variational_objective(params, t):
     """Provides a stochastic estimate of the variational lower bound."""
     mean, log_std = unpack_params(params)
     generatedSample=rs.randn(num_samples, D) * np.exp(log_std)
     samples = generatedSample + mean
     #samples: sample of weights
     #t: targets
     #inputs used in logprob is the inputs user initial generated
     logvalue = logprob(samples, t)
     lower_bound = gaussian_entropy(log_std) + np.mean(logprob(samples, t))
     loss = np.mean(logprob(samples, t))
     print("loss is "+ str(loss))
     return -lower_bound
Exemplo n.º 5
0
 def unpack_params(params):
     """Unpacks parameter vector into the proportions, means and covariances
     of each mixture component.  The covariance matrices are parametrized by
     their Cholesky decompositions."""
     log_proportions    = parser.get(params, 'log proportions')
     normalized_log_proportions = log_proportions - logsumexp(log_proportions)
     means              = parser.get(params, 'means')
     lower_tris = np.tril(parser.get(params, 'lower triangles'), k=-1)
     diag_chols = np.exp( parser.get(params, 'log diagonals'))
     chols = []
     for lower_tri, diag in zip(lower_tris, diag_chols):
         chols.append(np.expand_dims(lower_tri + np.diag(diag), 0))
     chols = np.concatenate(chols, axis=0)
     return normalized_log_proportions, means, chols
    def callback(params, t, g):
        print("Iteration {} lower bound {}".format(t, -objective(params, t)))

        # Sample functions from posterior.
        rs = npr.RandomState(0)
        mean, log_std = unpack_params(params)
        #rs = npr.RandomState(0)
        sample_weights = rs.randn(10, num_weights) * np.exp(log_std) + mean
        plot_inputs = np.linspace(-8, 8, num=400)
        outputs = predictions(sample_weights, np.expand_dims(plot_inputs, 1))

        # Plot data and functions.
        plt.cla()
        ax.plot(inputs.ravel(), targets.ravel(), 'bx')
        ax.plot(plot_inputs, outputs[:, :, 0].T)
        ax.set_ylim([-2, 3])
        plt.draw()
        plt.pause(1.0/60.0)
def rbf_covariance(kernel_params, x, xp):
    output_scale = np.exp(kernel_params[0])
    lengthscales = np.exp(kernel_params[1:])
    diffs = np.expand_dims(x /lengthscales, 1)\
          - np.expand_dims(xp/lengthscales, 0)
    return output_scale * np.exp(-0.5 * np.sum(diffs**2, axis=2))
 def unpack_params(params):
     mean        = params[0]
     cov_params  = params[2:]
     noise_scale = np.exp(params[1]) + 0.001
     return mean, cov_params, noise_scale
 def gradient_product(g):
     # This closure multiplies g with the Jacobian of logsumexp (d_ans/d_x).
     # Because autogradwithbay uses reverse-mode differentiation, g contains
     # the gradient of the objective w.r.t. ans, the output of logsumexp.
     return np.full(x.shape, g) * np.exp(x - np.full(x.shape, ans))
def logsumexp(x):
    """Numerically stable log(sum(exp(x))), also defined in scipy.misc"""
    max_x = np.max(x)
    return max_x + np.log(np.sum(np.exp(x - max_x)))
Exemplo n.º 11
0
"""Gradients of the normal distribution."""

from __future__ import absolute_import
import scipy.stats
import autogradwithbay.numpy as anp

from autogradwithbay.core import primitive
from autogradwithbay.numpy.numpy_grads import unbroadcast

pdf = primitive(scipy.stats.norm.pdf)
cdf = primitive(scipy.stats.norm.cdf)
logpdf = primitive(scipy.stats.norm.logpdf)
logcdf = primitive(scipy.stats.norm.logcdf)

pdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x,     lambda g: -g * ans * (x - loc) / scale**2))
pdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc,   lambda g:  g * ans * (x - loc) / scale**2), argnum=1)
pdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g:  g * ans * (((x - loc)/scale)**2 - 1.0)/scale), argnum=2)

cdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x,     lambda g:  g * pdf(x, loc, scale)))
cdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc,   lambda g: -g * pdf(x, loc, scale)), argnum=1)
cdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: -g * pdf(x, loc, scale)*(x-loc)/scale), argnum=2)

logpdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x,     lambda g: -g * (x - loc) / scale**2))
logpdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc,   lambda g:  g * (x - loc) / scale**2), argnum=1)
logpdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g:  g * (-1.0/scale + (x - loc)**2/scale**3)), argnum=2)

logcdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x,     lambda g:  g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale))))
logcdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc,   lambda g: -g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale))), argnum=1)
logcdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: -g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale))*(x-loc)/scale), argnum=2)
Exemplo n.º 12
0
    lambda g: g * np.sum(digamma(np.expand_dims(a, -1) - np.arange(d)/2.), -1))
multigammaln.defgrad_is_zero(argnums=(1,))


### Bessel functions ###

j0 = primitive(scipy.special.j0)
y0 = primitive(scipy.special.y0)
j1 = primitive(scipy.special.j1)
y1 = primitive(scipy.special.y1)
jn = primitive(scipy.special.jn)
yn = primitive(scipy.special.yn)

j0.defgrad(lambda ans, x: lambda g: -g * j1(x))
y0.defgrad(lambda ans, x: lambda g: -g * y1(x))
j1.defgrad(lambda ans, x: lambda g: g * (j0(x) - jn(2, x)) / 2.0)
y1.defgrad(lambda ans, x: lambda g: g * (y0(x) - yn(2, x)) / 2.0)
jn.defgrad_is_zero(argnums=(0,))
yn.defgrad_is_zero(argnums=(0,))
jn.defgrad(lambda ans, n, x: lambda g: g * (jn(n - 1, x) - jn(n + 1, x)) / 2.0, argnum=1)
yn.defgrad(lambda ans, n, x: lambda g: g * (yn(n - 1, x) - yn(n + 1, x)) / 2.0, argnum=1)


### Error Function ###
inv_root_pi = 0.56418958354775627928
erf = primitive(scipy.special.erf)
erfc = primitive(scipy.special.erfc)

erf.defgrad(lambda ans, x: lambda g: 2.*g*inv_root_pi*np.exp(-x**2))
erfc.defgrad(lambda ans, x: lambda g: -2.*g*inv_root_pi*np.exp(-x**2))
    return r, p(r)


if __name__ == "__main__":
    # generate data
    npr.seed(0)
    data = negbin_sample(r=5, p=0.5, size=1000)

    # fit likelihood-extremizing parameters
    r, p = fit_maxlike(data, r_guess=1)

    # report fit
    print('Fit parameters:')
    print('r={r}, p={p}'.format(r=r, p=p))

    print('Check that we are at a local stationary point:')
    loglike = lambda r, p: np.sum(negbin_loglike(r, p, data))
    grad_both = multigrad(loglike, argnums=[0,1])
    print(grad_both(r, p))

    import matplotlib.pyplot as plt
    xm = data.max()
    plt.figure()
    plt.hist(data, bins=np.arange(xm+1)-0.5, normed=True, label='normed data counts')
    plt.xlim(0,xm)
    plt.plot(np.arange(xm), np.exp(negbin_loglike(r, p, np.arange(xm))), label='maxlike fit')
    plt.xlabel('k')
    plt.ylabel('p(k)')
    plt.legend(loc='best')
    plt.show()
Exemplo n.º 14
0
def make_grad_logsumexp(ans, x, axis=None, b=1.0, keepdims=False):
    repeater, _ = repeat_to_match_shape(x, axis, keepdims)
    return lambda g: repeater(g) * b * anp.exp(x - repeater(ans))
Exemplo n.º 15
0
def tanh(x):
    return (1.0 - np.exp(-x))  / (1.0 + np.exp(-x))

def build_toy_dataset(n_data=80, noise_std=0.1, D=1):
    rs = npr.RandomState(0)
    inputs = np.concatenate([np.linspace(0, 3, num=n_data / 2), np.linspace(6, 8, num=n_data / 2)])
    targets = np.cos(inputs) + rs.randn(n_data) * noise_std
    inputs = (inputs - 4.0) / 2.0
    inputs = inputs.reshape((len(inputs), D))
    targets = targets.reshape((len(targets), D)) / 2.0
    return inputs, targets


if __name__ == "__main__":

    # Specify inference problem by its unnormalized log-posterior.
    rbf = lambda x: np.exp(-x ** 2)
    relu = lambda x: np.maximum(x, 0.0)

    # Implement a 3-hidden layer neural network.
    num_weights, predictions, logprob = make_nn_funs(layer_sizes=[1, 20, 20, 20, 1], nonlinearity=rbf)

    inputs, targets = build_toy_dataset()
    objective = lambda weights, t: -logprob(weights, inputs, targets)

    # Set up figure.
    fig = plt.figure(figsize=(12, 8), facecolor="white")
    ax = fig.add_subplot(111, frameon=False)
    plt.show(block=False)

    def callback(params, t, g):
        print("Iteration {} log likelihood {}".format(t, -objective(params, t)))
Exemplo n.º 17
0
def logsumexp(X, axis, keepdims=False):
    max_X = np.max(X)
    return max_X + np.log(np.sum(np.exp(X - max_X), axis=axis, keepdims=keepdims))
Exemplo n.º 18
0
def logsumexp(X, axis=1):
    max_X = np.max(X)
    return max_X + np.log(np.sum(np.exp(X - max_X), axis=axis, keepdims=True))
Exemplo n.º 19
0
    # Wrap function to only have one argument, for scipy.minimize.
    def training_loss(weights):
        return -loglike_fun(weights, train_inputs, train_inputs)

    def callback(weights):
        print("Train loss:", training_loss(weights))
        print_training_prediction(weights)

   # Build gradient of loss function using autogradwithbay.
    training_loss_and_grad = value_and_grad(training_loss)

    init_weights = npr.randn(num_weights) * param_scale
    # Check the gradients numerically, just to be safe
    quick_grad_check(training_loss, init_weights)

    print("Training LSTM...")
    result = minimize(training_loss_and_grad, init_weights, jac=True, method='CG',
                      options={'maxiter':train_iters}, callback=callback)
    trained_weights = result.x

    print()
    print("Generating text from RNN...")
    num_letters = 30
    for t in range(20):
        text = ""
        for i in range(num_letters):
            seqs = string_to_one_hot(text, output_size)[:, np.newaxis, :]
            logprobs = pred_fun(trained_weights, seqs)[-1].ravel()
            text += chr(npr.choice(len(logprobs), p=np.exp(logprobs)))
        print(text)