Exemplo n.º 1
0
    def df(self, x, mu, sigma):
        r"""

        Density function for the LogNormal Distribution:

        .. math::
            f(x) = \frac{1}{x \sigma \sqrt{2\pi}}e^{-\frac{1}{2}\left ( \frac{\ln x - \mu}{\sigma} \right )^{2}}

        Parameters
        ----------

        x : numpy array or scalar
            The values at which the function will be calculated 
        mu : numpy array or scalar
            The location parameter for the LogNormal distribution
        sigma : numpy array or scalar
            The scale parameter for the LogNormal distribution

        Returns
        -------

        df : scalar or numpy array 
            The value(s) of the density function at x.

        Examples
        --------
        >>> import numpy as np
        >>> from surpyval import LogNormal
        >>> x = np.array([1, 2, 3, 4, 5])
        >>> LogNormal.df(x, 3, 4)
        array([0.07528436, 0.04222769, 0.02969364, 0.02298522, 0.01877747])
        """
        return 1. / x * norm.pdf(np.log(x), mu, sigma)
Exemplo n.º 2
0
    def hf(self, x, mu, sigma):
        r"""

        Instantaneous hazard rate for the Normal Distribution:

        .. math::
            h(x) = \frac{\frac{1}{\sigma \sqrt{2\pi}}e^{-\frac{1}{2}\left ( \frac{x - \mu}{\sigma} \right )^{2}}}{1 - \Phi \left( \frac{x - \mu}{\sigma} \right )}

        Parameters
        ----------

        x : numpy array or scalar
            The values at which the function will be calculated 
        mu : numpy array or scalar
            The location parameter for the Normal distribution
        sigma : numpy array or scalar
            The scale parameter for the Normal distribution

        Returns
        -------

        hf : scalar or numpy array 
            The value(s) of the instantaneous hazard rate function at x.

        Examples
        --------
        >>> import numpy as np
        >>> from surpyval import Normal
        >>> x = np.array([1, 2, 3, 4, 5])
        >>> Normal.hf(x, 3, 4)
        array([0.12729011, 0.16145984, 0.19947114, 0.24088849, 0.28526944])
        """
        return norm.pdf(x, mu, sigma) / self.sf(x, mu, sigma)
Exemplo n.º 3
0
    def df(self, x, mu, sigma):
        r"""

        Density function for the Normal Distribution:

        .. math::
            f(x) = \frac{1}{\sigma \sqrt{2\pi}}e^{-\frac{1}{2}\left ( \frac{x - \mu}{\sigma} \right )^{2}}

        Parameters
        ----------

        x : numpy array or scalar
            The values at which the function will be calculated 
        mu : numpy array or scalar
            The location parameter for the Normal distribution
        sigma : numpy array or scalar
            The scale parameter for the Normal distribution

        Returns
        -------

        df : scalar or numpy array 
            The value(s) of the density function at x.

        Examples
        --------
        >>> import numpy as np
        >>> from surpyval import Normal
        >>> x = np.array([1, 2, 3, 4, 5])
        >>> Normal.df(x, 3, 4)
        array([0.08801633, 0.09666703, 0.09973557, 0.09666703, 0.08801633])
        """
        return norm.pdf(x, mu, sigma)
Exemplo n.º 4
0
def callback(params, t, g):
    print("Iteration {0:} " \
          "lower bound {1:.4f}; " \
          "mean {2:.4f} [{3:.4f}]; " \
          "variance {4:.4f}[{5:.4f}]".format(
        t,
        -elbo(params, t),
        params[0],
        true_mean,
        np.exp(params[1]) ,
        true_std))
    ax1.clear()
    ax1.set_xlim([-10, 10])
    ax1.set_ylim(bottom=0)
    mu, log_std = params[0], params[1]
    xs = np.linspace(-10, 10, 800)
    ys = norm.pdf(xs, mu, np.exp(log_std))
    ax1.plot(xs, ys, color='#f3c273', linewidth=2.0)

    ys = np.exp(logpx(xs))
    ax1.fill_between(xs, 0, ys, color='#aaaaaa')
    gray_patch = Patch(color='#aaaaaa', label='$p(x)$')
    yellow_patch = Patch(color='#f3c273', label='$q(x)$')
    ax1.legend(handles=[gray_patch, yellow_patch])

    ax2.clear()
    ax2.set_xlabel('Mean')
    ax2.set_ylabel('Variance')
    ax2.set_zlabel('Negative ELBO')
    ax2.set_zlim([-100, 150])
    ax2.plot_surface(X,
                     Y,
                     Z,
                     cmap=cm.coolwarm,
                     shade=True,
                     cstride=1,
                     rstride=1,
                     zorder=1)
    ax2.contour(X,
                Y,
                Z,
                zdir='z',
                offset=-100,
                cmap=cm.coolwarm,
                zorder=0,
                levels=np.linspace(0, 30, 30))

    a = Arrow(params[0], params[1], -g[0], -g[1], width=0.5, zorder=2)
    ax2.add_patch(a)
    art3d.pathpatch_2d_to_3d(a, z=-100, zdir="z")
    # ax2.plot([params[0], params[0]],
    #          [params[1], params[1]],
    #          [-50, elbo(params, 0)], '--', linewidth=2.0, zorder=5)
    # ax2.scatter(params[0], params[1], elbo(params, 0), marker='o', s=100)
    plt.draw()
    plt.pause(1.0 / 30.0)
    def callback(hyper_weights, opt_iteration, g):
        """Do whatever work is desired on each optimization iteration.
        Draws graphs, prints information, and stores information.

        :param hyper_weights: The weights ([[float]]) of the hypernetwork.
        :param opt_iteration: The current iteration of optimization.
        :param g: The gradient ([[float]]) of the optimizer.
        :return: None.
        """
        global log_likelihoods, valid_loss, test_loss, grad_norms_hyper, grad_norms_hypernet, global_opt_iteration
        global hyper_cur
        log_likelihood = hyper_train_objective(hyper_weights, hyper_cur)
        log_likelihoods[global_opt_iteration] = log_likelihood  # Store the training loss.
        weights_cur = hypernet(hyper_weights, hyper_cur)
        train_performance[global_opt_iteration] = log_likelihood - hyper_loss(weights_cur, hyper_cur)
        valid_loss[global_opt_iteration] = hyper_valid_objective(hyper_weights, hyper_cur)
        test_loss[global_opt_iteration] = hyper_test_objective(hyper_weights, hyper_cur)
        grad_norm = np.sum([np.sum([np.sum(np.abs(weight_or_bias)) for weight_or_bias in layer]) for layer in g])
        grad_norms_hypernet[global_opt_iteration] = grad_norm
        grad_norms_hyper[global_opt_iteration] = grad_norms_hyper[global_opt_iteration-1]
        global_opt_iteration += 1
        print("Iteration {} Loss {} Grad L1 Norm {}".format(opt_iteration, log_likelihood, grad_norm))

        if global_opt_iteration % graph_mod == 0:  # Only print on every iteration that is a multiple of graph_mod.
            [ax.cla() for ax in axs]  # Clear all of the axes.
            axs[0].set_xlabel('Hyperparameter $\lambda$'), axs[0].set_ylabel('Loss $\mathcal{L}$')

            for cur, hyper in enumerate(learned_hyper_range):
                hyper_train_loss[cur] = hyper_train_objective(hyper_weights, hyper)
                weights = hypernet(hyper_weights, hyper)
                hyper_train_performance[cur] = hyper_train_loss[cur] - hyper_loss(weights, hyper)
                hyper_valid_loss[cur] = hyper_valid_objective(hyper_weights, hyper)
                hyper_test_loss[cur] = hyper_test_objective(hyper_weights, hyper)

            axs[0].plot(real_hyper_range, real_train_loss, 'bx', ms=28, label='Train loss of optimized weights')
            axs[0].plot(learned_hyper_range, hyper_train_loss, 'b-', label='Train loss of hypernetwork weights')
            axs[0].set_ylim([-1.5, 3.8])

            axs[0].plot(real_hyper_range, real_valid_loss, 'rx', ms=28, label='Valid. loss of optimized weights')
            axs[0].plot(learned_hyper_range, hyper_valid_loss, 'r-', label='Valid. loss of hypernetwork weights')
            min_hyper_found = 1.836  # Known minimum from doing a search with 1000 points over this range.
            axs[0].axvline(x=min_hyper_found, c='k', linestyle='dashed', label='Optimal hyperparameter $\lambda$')

            pdf_range = np.linspace(hyper_cur - 0.5, hyper_cur + 0.5, 100)
            axs[0].plot(pdf_range, norm.pdf(pdf_range, loc=hyper_cur, scale=0.06) / 4.0 + axs[0].get_ylim()[0], c='g',
                        label='$p (\lambda | \hat{\lambda})$')

            [ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.45),
                       borderaxespad=0.0, fancybox=True, framealpha=0.0, fontsize=28)
             for ax in axs]  # Create a legend for all the axes.
            setup_ax_and_save(axs, fig, 'hypernets_local_small')
def expected_new_max(mean, std, max_so_far):
    return max_so_far - \
           (mean - max_so_far) * norm.cdf(mean, max_so_far, std) \
                         + std * norm.pdf(mean, max_so_far, std)
Exemplo n.º 7
0
def fit_nn_reg(X,
               y,
               hidden_layer_sizes,
               batch_size,
               epochs,
               X_test,
               y_test,
               no_samples=20,
               mean_y_train=0.0,
               std_y_train=1.0,
               nonln='relu',
               weight_prior_std=1.0,
               noise_var=0.1,
               plot_toy=False,
               init_w=None):

    layer_sizes = np.array([X.shape[1]] + hidden_layer_sizes + [1])
    if nonln == 'tanh':
        nonlinearity = np.tanh
    elif nonln == 'relu':
        nonlinearity = lambda x: np.maximum(x, 0.0)
    elif nonln == 'rbf':
        nonlinearity = lambda x: norm.pdf(x, 0, 1)
    elif nonln == 'sin':
        nonlinearity = lambda x: np.sin(x)
    elif nonln == 'sigmoid':
        nonlinearity = lambda x: 1 / (1 + np.exp(-x))


    num_weights, elbo, predictions, get_error_and_ll, unpack_layers, prediction_test, unpack_params \
        = make_nn_funs(layer_sizes, nonlinearity=nonlinearity, weight_prior_std=weight_prior_std, noise_var=noise_var)
    elbo_grad = grad(elbo)
    prior_var = 1.0
    N_train = X.shape[0]

    print("    Epoch      |   train RMSE   |   test RMSE")

    if plot_toy:
        # Set up figure.
        fig = plt.figure(figsize=(12, 8), facecolor='white')
        ax = fig.add_subplot(111, frameon=True)
        plt.show(block=False)

    def print_perf(epoch, w):
        num_samples_test = 500
        pred_mean, pred_var, rmse_train, ll = get_error_and_ll(
            w, X, y, location=0.0, scale=1.0, num_samples=num_samples_test)
        pred_mean, pred_var, rmse_test, ll = get_error_and_ll(
            w,
            X_test,
            y_test,
            location=0.0,
            scale=1.0,
            num_samples=num_samples_test)
        print("{0:15}|{1:15}|{2:15}|".format(epoch, rmse_train, rmse_test))

        if plot_toy:
            # # Plot data and functions.
            # plt.cla()
            # ax.plot(X.ravel(), y.ravel(), 'bx')
            # plot_inputs = np.reshape(np.linspace(-7, 7, num=300), (300,1))
            # outputs_mean, outputs_var = prediction_test(w, plot_inputs, num_samples_test)
            # ax.plot(plot_inputs, outputs_mean, 'b-')
            # ax.plot(plot_inputs, outputs_mean + 2*np.sqrt(outputs_var), 'b-')
            # ax.plot(plot_inputs, outputs_mean - 2*np.sqrt(outputs_var), 'b-')
            # ax.set_ylim([-1, 1])
            # plt.draw()
            # plt.pause(1.0/60.0)

            # Sample functions from posterior.
            rs = npr.RandomState(0)
            mean, std = unpack_params(w)
            #rs = npr.RandomState(0)
            sample_weights = rs.randn(10, num_weights) * std + mean
            plot_inputs = np.linspace(-7, 7, num=400)
            outputs = predictions(sample_weights,
                                  np.expand_dims(plot_inputs, 1))

            # Plot data and functions.
            plt.cla()
            ax.plot(X.ravel(), y.ravel(), 'bx')
            ax.plot(plot_inputs, outputs[:, :, 0].T)
            ax.set_ylim([-2, 3])
            plt.draw()
            plt.pause(1.0 / 60.0)

    # Train with adam
    batch_idxs = make_batches(X.shape[0], batch_size)

    # Initialize parameters
    rs = npr.RandomState(0)
    if init_w is None:
        init_mean = 0.1 * rs.randn(num_weights)
    else:
        init_mean = init_w

    init_log_std = -2 * np.ones(num_weights)
    init_var_params = np.concatenate([init_mean, init_log_std])
    w = init_var_params

    m1 = 0
    m2 = 0
    beta1 = 0.9
    beta2 = 0.999
    epsilon = 1e-8
    alpha = 5e-3
    t = 0
    elbo_vec = []
    for epoch in range(epochs):
        permutation = np.random.choice(range(X.shape[0]),
                                       X.shape[0],
                                       replace=False)
        # print_perf(epoch, w)
        for idxs in batch_idxs:
            t += 1
            eb = elbo(w,
                      weight_prior_std**2,
                      X[permutation[idxs]],
                      y[permutation[idxs]],
                      N_train,
                      num_samples=no_samples)
            elbo_vec.append(eb)
            print(eb)
            grad_w = elbo_grad(w,
                               weight_prior_std**2,
                               X[permutation[idxs]],
                               y[permutation[idxs]],
                               N_train,
                               num_samples=no_samples)
            m1 = beta1 * m1 + (1 - beta1) * grad_w
            m2 = beta2 * m2 + (1 - beta2) * grad_w**2
            m1_hat = m1 / (1 - beta1**t)
            m2_hat = m2 / (1 - beta2**t)
            w -= alpha * m1_hat / (np.sqrt(m2_hat) + epsilon)
            t += 1

    print_perf(epochs - 1, w)
    return w, get_error_and_ll, prediction_test, unpack_params, elbo_vec
Exemplo n.º 8
0
def build_toy_dataset(n_data=40, noise_std=0.1):
    D = 1
    rs = npr.RandomState(0)
    inputs  = np.concatenate([np.linspace(0, 2, num=n_data/2),
                              np.linspace(6, 8, num=n_data/2)])
    targets = np.cos(inputs) + rs.randn(n_data) * noise_std
    inputs = (inputs - 5.0) / 4.0
    inputs  = inputs.reshape((len(inputs), D))
    targets = targets.reshape((len(targets), D))
    return inputs, targets


if __name__ == '__main__':

    # Specify inference problem by its unnormalized log-posterior.
    rbf = lambda x: norm.pdf(x, 0, 1)
    num_weights, predictions, logprob = \
        make_nn_funs(layer_sizes=[1, 20, 1], L2_reg=0.01,
                     noise_variance = 0.01, nonlinearity=rbf)

    inputs, targets = build_toy_dataset()
    log_posterior = lambda weights, t: logprob(weights, inputs, targets)

    # Build variational objective.
    objective, gradient, unpack_params = \
        black_box_variational_inference(log_posterior, num_weights,
                                        num_samples=20)

    # Set up figure.
    fig = plt.figure(figsize=(8,8), facecolor='white')
    ax = fig.add_subplot(111, frameon=False)
Exemplo n.º 9
0
    # Network parameters
    layer_sizes = [784, 200, 100, 10]
    L2_reg = .01
    D = 784

    # Training parameters
    param_scale = 0.1
    learning_rate = 1e-2
    momentum = 0.9
    batch_size = 100
    num_epochs = 50

    # Load and process MNIST data (borrowing from Kayak)
    N_data, train_images, train_labels, test_images, test_labels = load_mnist()

    rbf = lambda x: norm.pdf(x, 0, 1)
    relu = lambda x: np.maximum(x, 0.0)

    # Make neural net functions
    num_weights, predictions, logprob, frac_err = \
        make_nn_funs(layer_sizes,
                     L2_reg,
                     noise_variance=0.01,
                     nonlinearity=relu)

    #loss_grad = grad(log_prob)

    # Initialize weights
    rs = npr.RandomState(0)
    num_samples = 20
    init_mean = rs.randn(num_weights)
Exemplo n.º 10
0
def fit_nn_reg(X,
               y,
               hidden_layer_sizes,
               batch_size,
               epochs,
               X_test,
               y_test,
               mean_y_train=0.0,
               std_y_train=1.0,
               nonln='relu',
               weight_prior_std=1.0,
               noise_var=0.1,
               plot_toy=False):

    layer_sizes = np.array([X.shape[1]] + hidden_layer_sizes + [1])
    if nonln == 'tanh':
        nonlinearity = np.tanh
    elif nonln == 'relu':
        nonlinearity = lambda x: np.maximum(x, 0.0)
    elif nonln == 'rbf':
        nonlinearity = lambda x: norm.pdf(x, 0, 1)
    elif nonln == 'sin':
        nonlinearity = lambda x: np.sin(x)
    elif nonln == 'sigmoid':
        nonlinearity = lambda x: 1 / (1 + np.exp(-x))


    num_weights, predictions, logprob, get_error \
        = make_nn_funs(layer_sizes, nonlinearity=nonlinearity, weight_prior_std=weight_prior_std, noise_var=noise_var)
    logprob_grad = grad(logprob)
    Ntrain = X.shape[0]

    print("    Epoch      |   train RMSE   |   test RMSE")

    if plot_toy:
        # Set up figure.
        fig = plt.figure(figsize=(12, 8), facecolor='white')
        ax = fig.add_subplot(111, frameon=True)
        plt.show(block=False)

    def print_perf(epoch, w):
        rmse_train = get_error(w, X, y, location=0.0, scale=1.0)
        rmse_test = get_error(w, X_test, y_test, location=0.0, scale=1.0)
        print("{0:15}|{1:15}|{2:15}|".format(epoch, rmse_train, rmse_test))

        if plot_toy:
            # Plot data and functions.
            plt.cla()
            ax.plot(X.ravel(), y.ravel(), 'bx')
            plot_inputs = np.reshape(np.linspace(-7, 7, num=300), (300, 1))
            outputs = predictions(w, plot_inputs)
            ax.plot(plot_inputs, outputs)
            ax.set_ylim([-1, 1])
            plt.draw()
            plt.pause(1.0 / 60.0)

    # Train with adam
    batch_idxs = make_batches(X.shape[0], batch_size)

    # Initialize parameters
    rs = npr.RandomState(0)
    init_weights = 0.1 * rs.randn(num_weights)
    w = init_weights
    N_test = X_test.shape[0]

    m1 = 0
    m2 = 0
    beta1 = 0.9
    beta2 = 0.999
    epsilon = 1e-8
    alpha = 1e-2
    t = 0
    log_prob_vec = []
    for epoch in range(epochs):
        permutation = np.random.choice(range(X.shape[0]),
                                       X.shape[0],
                                       replace=False)
        print_perf(epoch, w)
        for idxs in batch_idxs:
            t += 1
            lp = logprob(w, X[permutation[idxs]], y[permutation[idxs]],
                         X.shape[0])
            log_prob_vec.append(lp)
            grad_w = logprob_grad(w, X[permutation[idxs]],
                                  y[permutation[idxs]], X.shape[0])
            m1 = beta1 * m1 + (1 - beta1) * grad_w
            m2 = beta2 * m2 + (1 - beta2) * grad_w**2
            m1_hat = m1 / (1 - beta1**t)
            m2_hat = m2 / (1 - beta2**t)
            w += alpha * m1_hat / (np.sqrt(m2_hat) + epsilon)
            t += 1

    return w, np.array(log_prob_vec)
Exemplo n.º 11
0
import autograd.numpy as np
import autograd.numpy.random as npr
import matplotlib.pyplot as plt
from autograd.misc.optimizers import adam
from autograd import grad, elementwise_grad
from autograd.scipy.stats import multivariate_normal as mvn
import autograd.scipy.stats.norm as norm
rs = npr.RandomState(0)

egrad = elementwise_grad


def objective(p):
    return norm.cdf(p)


x = np.linspace(0, 7, 5)
g = grad(objective)(1.0)
eg = egrad(objective)
print(eg(x), norm.pdf(x))

print(g - norm.pdf(1.0))
Exemplo n.º 12
0
def diag_gaussian_density(x, mu, log_std):
    return np.sum(norm.pdf(x, mu, np.exp(log_std)), axis=-1)