def update_nn(init_var_params, batch_data, batch_labels):
    log_posterior = lambda weights, t: logprob(weights, batch_data, batch_labels)

    # Build variational objective.
    objective, gradient, unpack_params = \
        black_box_variational_inference(log_posterior, num_weights, num_samples=20)

    variational_params = adam(gradient, init_var_params, step_size=0.01, num_iters=50)

    return variational_params
示例#2
0
def update_nn(init_var_params, batch_data, batch_labels, iteration):
    log_posterior = lambda weights, t: logprob(weights, batch_data,
                                               batch_labels)

    # Build variational objective.
    objective, gradient, unpack_params = \
        black_box_variational_inference(log_posterior, num_weights, num_samples=20)

    variational_params = adam(gradient,
                              init_var_params,
                              step_size=0.1,
                              num_iters=10)

    return variational_params, objective
示例#3
0
    coverage_df[input_idx, :] = 1  # indicate the training data index
    for b in range(B):
        print(b)
        # Specify inference problem by its unnormalized log-posterior.
        rbf = lambda x: np.exp(-x**2)
        num_weights, predictions, logprob = \
            make_nn_funs(layer_sizes=[1, 2, 2, 1], L2_reg=0.1,
                         noise_variance=0.01, nonlinearity=rbf)

        inputs, targets, tot_inputs, tot_targets, input_idx = build_toy_dataset(
            n_data=40, noise_std=tau, type=t)
        log_posterior = lambda weights, t: logprob(weights, inputs, targets)

        # Build variational objective.
        objective, gradient, unpack_params = \
            black_box_variational_inference(log_posterior, num_weights,
                                            num_samples=20)

        # Set up figure.
        fig = plt.figure(figsize=(12, 8), facecolor='white')
        ax = fig.add_subplot(111, frameon=False)
        plt.ion()
        plt.show(block=False)

        def callback(params, t, g):
            print("Iteration {} lower bound {}".format(t,
                                                       -objective(params, t)))

            # Sample functions from posterior.
            rs = npr.RandomState(0)
            mean, log_std = unpack_params(params)
            # rs = npr.RandomState(0)

if __name__ == '__main__':

    # Specify inference problem by its unnormalized log-posterior.
    rbf = lambda x: norm.pdf(x, 0, 1)
    num_weights, predictions, logprob = \
        make_nn_funs(layer_sizes=[1, 20, 1], L2_reg=0.01,
                     noise_variance = 0.01, nonlinearity=rbf)

    inputs, targets = build_toy_dataset()
    log_posterior = lambda weights, t: logprob(weights, inputs, targets)

    # Build variational objective.
    objective, gradient, unpack_params = \
        black_box_variational_inference(log_posterior, num_weights,
                                        num_samples=20)

    # Set up figure.
    fig = plt.figure(figsize=(8,8), facecolor='white')
    ax = fig.add_subplot(111, frameon=False)
    plt.ion()
    plt.show(block=False)

    def callback(params, t, g):
        print("Iteration {} lower bound {}".format(t, -objective(params, t)))

        # Sample functions from posterior.
        mean, cov = unpack_params(params)
        rs = npr.RandomState(0)
        sample_weights = rs.randn(10, num_weights) * np.sqrt(cov) + mean
        plot_inputs = np.linspace(-8, 8, num=200)
示例#5
0
    # it's difficult to see the benefit in low dimensions
    # model parameters are a mean and a log_sigma
    np.random.seed(42)
    obs_dim = 20
    Y = np.random.randn(obs_dim, obs_dim).dot(np.random.randn(obs_dim))

    def log_density(x, t):
        mu, log_sigma = x[:, :obs_dim], x[:, obs_dim:]
        sigma_density = np.sum(norm.logpdf(log_sigma, 0, 1.35), axis=1)
        mu_density = np.sum(norm.logpdf(Y, mu, np.exp(log_sigma)), axis=1)
        return sigma_density + mu_density

    # Build variational objective.
    D = obs_dim * 2  # dimension of our posterior
    objective, gradient, unpack_params = \
        black_box_variational_inference(log_density, D, num_samples=2000)

    # Define the natural gradient
    #   The natural gradient of the ELBO is the gradient of the elbo,
    #   preconditioned by the inverse Fisher Information Matrix.  The Fisher,
    #   in the case of a diagonal gaussian, is a diagonal matrix that is a
    #   simple function of the variance.  Intuitively, statistical distance
    #   created by perturbing the mean of an independent Gaussian is
    #   determined by how wide the distribution is along that dimension ---
    #   the wider the distribution, the less sensitive statistical distances is
    #   to perturbations of the mean; the narrower the distribution, the more
    #   the statistical distance changes when you perturb the mean (imagine
    #   an extremely narrow Gaussian --- basically a spike.  The KL between
    #   this Gaussian and a Gaussian $\epsilon$ away in location can be big ---
    #   moving the Gaussian could significantly reduce overlap in support
    #   which corresponds to a greater statistical distance).
    # Specify an inference problem by its unnormalized log-density.
    # it's difficult to see the benefit in low dimensions
    # model parameters are a mean and a log_sigma
    np.random.seed(42)
    obs_dim = 20
    Y = np.random.randn(obs_dim, obs_dim).dot(np.random.randn(obs_dim))
    def log_density(x, t):
        mu, log_sigma = x[:, :obs_dim], x[:, obs_dim:]
        sigma_density = np.sum(norm.logpdf(log_sigma, 0, 1.35), axis=1)
        mu_density    = np.sum(norm.logpdf(Y, mu, np.exp(log_sigma)), axis=1)
        return sigma_density + mu_density

    # Build variational objective.
    D = obs_dim * 2    # dimension of our posterior
    objective, gradient, unpack_params = \
        black_box_variational_inference(log_density, D, num_samples=2000)

    # Define the natural gradient
    #   The natural gradient of the ELBO is the gradient of the elbo,
    #   preconditioned by the inverse Fisher Information Matrix.  The Fisher,
    #   in the case of a diagonal gaussian, is a diagonal matrix that is a
    #   simple function of the variance.  Intuitively, statistical distance
    #   created by perturbing the mean of an independent Gaussian is
    #   determined by how wide the distribution is along that dimension ---
    #   the wider the distribution, the less sensitive statistical distances is
    #   to perturbations of the mean; the narrower the distribution, the more
    #   the statistical distance changes when you perturb the mean (imagine
    #   an extremely narrow Gaussian --- basically a spike.  The KL between
    #   this Gaussian and a Gaussian $\epsilon$ away in location can be big ---
    #   moving the Gaussian could significantly reduce overlap in support
    #   which corresponds to a greater statistical distance).