Пример #1
def embedding_lookup_train(variational_params,
    R"""Embedding trained with variational dropout.

  In a standard embedding lookup, `ids` are looked-up in a list of embedding
  tensors. In an embedding trained with variational dropout, we lookup the
  parameters of the fully-factorized Gaussian posterior over the embedding
  tensor for each index in `ids` and draw a sample from this distribution
  that is returned.

  The `ids` argument is analogous to those in the standard tf.embedding_lookup.

    variational_params: 2-tuple of Tensors, where the first tensor is the \theta
      values and the second contains the log of the \sigma^2 values.
    ids: A Tensor with type int32 or int64 containing the ids to be looked up
      in params.
    name: String. Name of the operator.
    clip_alpha: Int or None. If integer, we clip the log \alpha values
      to [-clip_alpha, clip_alpha]. If None, don't clip the values.
    eps: Small constant value to use in log and sqrt operations to avoid NaNs.

    The output Tensor result of the embedding lookup.

    RuntimeError: If the input variational_params is not a 2-tuple of Tensors
      that have the same shape.
    theta, log_sigma2 = _verify_variational_params(variational_params)

    # Before we do anything, lookup the mean and log variances of the embedding
    # vectors we are going to output and do all our operations in this lower
    # dimensional space
    embedding_theta = layer_utils.gather(theta, ids)
    embedding_log_sigma2 = layer_utils.gather(log_sigma2, ids)

    if clip_alpha:
        # Compute the log_alphas and then compute the
        # log_sigma2 again so that we can clip on the
        # log alpha magnitudes
        embedding_log_alpha = common.compute_log_alpha(embedding_log_sigma2,
                                                       embedding_theta, eps,
        embedding_log_sigma2 = common.compute_log_sigma2(
            embedding_log_alpha, embedding_theta, eps)

    # Calculate the standard deviation from the log variance
    embedding_std = tf.sqrt(tf.exp(embedding_log_sigma2) + eps)

    # Output samples from the distribution over the embedding vectors
    output_shape = tf.shape(embedding_std)
    embedding = embedding_theta + embedding_std * tf.random_normal(
    return tf.identity(embedding, name=name)
Пример #2
def matmul_train(x,
    R"""Training computation for a variation matmul.

  In variational dropout we train a Bayesian neural network where we assume a
  fully-factorized Gaussian posterior and log uniform prior over the weights.

  During training, we need to sample weights from this distribution. Rather
  than sample weights for each sample in the input batch, we can calculate the
  parameters of the distribution over the pre-activations analytically (this
  step is called the local reparameterization trick). This function calculates
  the mean and standard deviation of the distribution over the pre-activations,
  and then draws a single sample for each element in the input batch and passes
  them as output.

    x: 2D Tensor representing the input batch.
    variational_params: 2-tuple of Tensors, where the first tensor is the \theta
      values and the second contains the log of the \sigma^2 values.
    transpose_a: If True, a is transposed before multiplication.
    transpose_b: If True, b is transposed before multiplication.
    clip_alpha: Int or None. If integer, we clip the log \alpha values to
      [-clip_alpha, clip_alpha]. If None, don't clip the values.
    eps: Small constant value to use in log and sqrt operations to avoid NaNs.

    Output Tensor of the matmul operation.

    RuntimeError: If the variational_params argument is not a 2-tuple.
    # We expect a 2D input tensor, as in standard in fully-connected layers

    theta, log_sigma2 = _verify_variational_params(variational_params)

    if clip_alpha is not None:
        # Compute the log_alphas and then compute the
        # log_sigma2 again so that we can clip on the
        # log alpha magnitudes
        log_alpha = common.compute_log_alpha(log_sigma2, theta, eps,
        log_sigma2 = common.compute_log_sigma2(log_alpha, theta, eps)

    # Compute the mean and standard deviation of the distributions over the
    # activations
    mu_activation = tf.matmul(x,
    std_activation = tf.sqrt(
                  transpose_b=transpose_b) + eps)

    output_shape = tf.shape(std_activation)
    return mu_activation + std_activation * tf.random_normal(output_shape)
Пример #3
def conv2d_train(x,
    R"""Training computation for a variational conv2d.

  In variational dropout we train a Bayesian neural network where we assume a
  fully-factorized Gaussian posterior and log uniform prior over the weights.

  During training, we need to sample weights from this distribution. Rather
  than sample weights for each sample in the input batch, we can calculate the
  parameters of the distribution over the pre-activations analytically (this
  step is called the local reparameterization trick). This function calculates
  the mean and standard deviation of the distribution over the pre-activations,
  and then draws a single sample for each element in the input batch and passes
  them as output.

    x: NHWC tf.Tensor representing the input batch of features.
    variational_params: 2-tuple of Tensors, where the first tensor is the \theta
      values and the second contains the log of the \sigma^2 values.
    strides: The stride of the sliding window for each dimension of `x`.
      Identical to standard strides argument for tf.conv2d.
    padding: String. One of "SAME", or "VALID". Identical to standard padding
      argument for tf.conv2d.
    data_format: 'NHWC' or 'NCHW' ordering of 4-D input Tensor.
    clip_alpha: Int or None. If integer, we clip the log \alpha values to
      [-clip_alpha, clip_alpha]. If None, don't clip the values.
    eps: Small constant value to use in log and sqrt operations to avoid NaNs.

    Output Tensor of the conv2d operation.

    RuntimeError: If the variational_params argument
    is not a 2-tuple.
    theta, log_sigma2 = _verify_variational_params(variational_params)

    if clip_alpha:
        # Compute the log_alphas and then compute the
        # log_sigma2 again so that we can clip on the
        # log alpha magnitudes
        log_alpha = common.compute_log_alpha(log_sigma2, theta, eps,
        log_sigma2 = common.compute_log_sigma2(log_alpha, theta, eps)

    # Compute the mean and standard deviation of the distribution over the
    # convolution outputs
    mu_activation = tf.nn.conv2d(x,
    std_activation = tf.sqrt(
                     data_format=data_format) + eps)

    output_shape = tf.shape(std_activation)
    return mu_activation + std_activation * tf.random_normal(output_shape)
Пример #4
def broadcast_matmul_train(x,
    R"""Training computation for VD matrix multiplication with N input matrices.

  Multiplies a 3D tensor `x` with a set of 2D parameters. Each 2D matrix
  `x[i, :, :]` in the input tensor is multiplied indendently with the
  parameters, resulting in a 3D output tensor with shape
  `x.shape[:2] + weight_parameters[0].shape[1]`.

    x: 3D Tensor representing the input batch.
    variational_params: 2-tuple of Tensors, where the first tensor is the
      unscaled weight values and the second is the log of the alpha values
      for the hard concrete distribution.
    clip_alpha: Int or None. If integer, we clip the log \alpha values to
      [-clip_alpha, clip_alpha]. If None, don't clip the values.
    eps: Small constant value to use in log and sqrt operations to avoid NaNs.

    Output Tensor of the batched matmul operation.

    RuntimeError: If the variational_params argument is not a 2-tuple.
    theta, log_sigma2 = _verify_variational_params(variational_params)

    # The input data must have be rank 2 or greater
    assert x.get_shape().ndims >= 2
    input_rank = x.get_shape().ndims

    if clip_alpha is not None:
        # Compute the log_alphas and then compute the
        # log_sigma2 again so that we can clip on the
        # log alpha magnitudes
        log_alpha = common.compute_log_alpha(log_sigma2, theta, eps,
        log_sigma2 = common.compute_log_sigma2(log_alpha, theta, eps)

    # Compute the mean and standard deviation of the distributions over the
    # activations
    mu_activation = tf.tensordot(x, theta, [[input_rank - 1], [0]])

    var_activation = tf.tensordot(tf.square(x), tf.exp(log_sigma2),
                                  [[input_rank - 1], [0]])
    std_activation = tf.sqrt(var_activation + eps)

    # Reshape the output back to the rank of the input
    input_shape = x.get_shape().as_list()
    weight_shape = theta.get_shape().as_list()
    output_shape = input_shape[:-1] + [weight_shape[1]]

    # NOTE: We sample noise for each weight in theta, which will be shared by
    # each matrix product that was done. This is equivalent to sampling the same
    # set of weights for all matrix products done by this op in an iteration.
    # The element-wise multiply below broadcasts.
    num_pad_dims = len(output_shape) - 2
    padding = [tf.constant(1, dtype=tf.int32) for _ in range(num_pad_dims)]

    # NOTE: On GPU, the first dim may not be defined w/ the Transformer. Create
    # a tf.Tensor from the list shape and TF should match the first dim
    # appropriately
    batch_size = tf.shape(x)[0]
    data_dim = tf.shape(theta)[-1]
    noise_shape = tf.stack([batch_size] + padding + [data_dim], axis=0)

    output = mu_activation + std_activation * tf.random_normal(noise_shape)
    return output