示例#1
0
def convolutional(input_data, filters_shape, trainable, name, downsample=False, activate=True, bn=True):

    with tf.variable_scope(name):
        if downsample:
            pad_h, pad_w = (filters_shape[0] - 2) // 2 + 1, (filters_shape[1] - 2) // 2 + 1
            paddings = tf.constant([[0, 0], [pad_h, pad_h], [pad_w, pad_w], [0, 0]])
            input_data = tf.pad(input_data, paddings, 'CONSTANT')
            strides = (1, 2, 2, 1)
            padding = 'VALID'
        else:
            strides = (1, 1, 1, 1)
            padding = "SAME"

        weight = tf.get_variable(name='weight', dtype=tf.float32, trainable=True,
                                 shape=filters_shape, initializer=tf.random_normal_initializer(stddev=0.01))
        conv = tf.nn.conv2d(input=input_data, filter=weight, strides=strides, padding=padding)

        if bn:
            conv = tf.layers.batch_normalization(conv, beta_initializer=tf.zeros_initializer(),
                                                 gamma_initializer=tf.ones_initializer(),
                                                 moving_mean_initializer=tf.zeros_initializer(),
                                                 moving_variance_initializer=tf.ones_initializer(), training=trainable)
        else:
            bias = tf.get_variable(name='bias', shape=filters_shape[-1], trainable=True,
                                   dtype=tf.float32, initializer=tf.constant_initializer(0.0))
            conv = tf.nn.bias_add(conv, bias)

        if activate == True: conv = tf.nn.leaky_relu(conv, alpha=0.1)

    return conv
 def _batch_norm_without_layers(self, input_layer, decay, use_scale,
                                epsilon):
     """Batch normalization on `input_layer` without tf.layers."""
     # We make this function as similar as possible to the
     # tf.contrib.layers.batch_norm, to minimize the differences between using
     # layers and not using layers.
     shape = input_layer.shape
     num_channels = shape[3] if self.data_format == 'NHWC' else shape[1]
     beta = self.get_variable('beta', [num_channels],
                              tf.float32,
                              tf.float32,
                              initializer=tf.zeros_initializer())
     if use_scale:
         gamma = self.get_variable('gamma', [num_channels],
                                   tf.float32,
                                   tf.float32,
                                   initializer=tf.ones_initializer())
     else:
         gamma = tf.constant(1.0, tf.float32, [num_channels])
     # For moving variables, we use tf.get_variable instead of self.get_variable,
     # since self.get_variable returns the result of tf.cast which we cannot
     # assign to.
     moving_mean = tf.get_variable('moving_mean', [num_channels],
                                   tf.float32,
                                   initializer=tf.zeros_initializer(),
                                   trainable=False)
     moving_variance = tf.get_variable('moving_variance', [num_channels],
                                       tf.float32,
                                       initializer=tf.ones_initializer(),
                                       trainable=False)
     if self.phase_train:
         bn, batch_mean, batch_variance = tf.nn.fused_batch_norm(
             input_layer,
             gamma,
             beta,
             epsilon=epsilon,
             data_format=self.data_format,
             is_training=True)
         mean_update = moving_averages.assign_moving_average(
             moving_mean, batch_mean, decay=decay, zero_debias=False)
         variance_update = moving_averages.assign_moving_average(
             moving_variance,
             batch_variance,
             decay=decay,
             zero_debias=False)
         tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, mean_update)
         tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, variance_update)
     else:
         bn, _, _ = tf.nn.fused_batch_norm(input_layer,
                                           gamma,
                                           beta,
                                           mean=moving_mean,
                                           variance=moving_variance,
                                           epsilon=epsilon,
                                           data_format=self.data_format,
                                           is_training=False)
     return bn
    def separable_conv_block(self, input, dw_filter, output_channel, strides,
                             name):
        """
        Params:
        input:
        filter:  a 4-D tuple: [filter_width, filter_height, in_channels, multiplier]
        output_channel: output channel of the separable_conv_block
        strides: a 4-D list: [1,strides,strides,1]
        """
        with tf.variable_scope(name):

            dw_weight = tf.get_variable(
                name='dw_filter',
                dtype=tf.float32,
                trainable=True,
                shape=dw_filter,
                initializer=tf.random_normal_initializer(stddev=0.01))

            dw = tf.nn.depthwise_conv2d(input=input,
                                        filter=dw_weight,
                                        strides=strides,
                                        padding="SAME",
                                        name='Conv/dw')

            bn_dw = tf.layers.batch_normalization(
                dw,
                beta_initializer=tf.zeros_initializer(),
                gamma_initializer=tf.ones_initializer(),
                moving_mean_initializer=tf.zeros_initializer(),
                moving_variance_initializer=tf.ones_initializer(),
                training=self.trainable,
                name='dw/bn')
            relu = tf.nn.leaky_relu(bn_dw, 0.1)
            weight = tf.get_variable(
                name='weight',
                dtype=tf.float32,
                trainable=True,
                shape=(1, 1, dw_filter[2] * dw_filter[3], output_channel),
                initializer=tf.random_normal_initializer(stddev=0.01))

            conv = tf.nn.conv2d(input=relu,
                                filter=weight,
                                strides=[1, 1, 1, 1],
                                padding="SAME",
                                name="conv/s1")
            bn_pt = tf.layers.batch_normalization(
                conv,
                beta_initializer=tf.zeros_initializer(),
                gamma_initializer=tf.ones_initializer(),
                moving_mean_initializer=tf.zeros_initializer(),
                moving_variance_initializer=tf.ones_initializer(),
                training=self.trainable,
                name='pt/bn')
            return tf.nn.leaky_relu(bn_pt, 0.1)
def _bn(x, is_train, global_step=None, name='bn'):
    moving_average_decay = 0.9
    # moving_average_decay = 0.99
    # moving_average_decay_init = 0.99
    with tf.variable_scope(name):
        decay = moving_average_decay
        # if global_step is None:
        # decay = moving_average_decay
        # else:
        # decay = tf.cond(tf.greater(global_step, 100)
        # , lambda: tf.constant(moving_average_decay, tf.float32)
        # , lambda: tf.constant(moving_average_decay_init, tf.float32))
        batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2])
        with tf.device('/CPU:0'):
            mu = tf.get_variable('mu',
                                 batch_mean.get_shape(),
                                 tf.float32,
                                 initializer=tf.zeros_initializer(),
                                 trainable=False)
            sigma = tf.get_variable('sigma',
                                    batch_var.get_shape(),
                                    tf.float32,
                                    initializer=tf.ones_initializer(),
                                    trainable=False)
            beta = tf.get_variable('beta',
                                   batch_mean.get_shape(),
                                   tf.float32,
                                   initializer=tf.zeros_initializer())
            gamma = tf.get_variable('gamma',
                                    batch_var.get_shape(),
                                    tf.float32,
                                    initializer=tf.ones_initializer())
        # BN when training
        update = 1.0 - decay
        # with tf.control_dependencies([tf.Print(decay, [decay])]):
        # update_mu = mu.assign_sub(update*(mu - batch_mean))
        update_mu = mu.assign_sub(update * (mu - batch_mean))
        update_sigma = sigma.assign_sub(update * (sigma - batch_var))
        tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_mu)
        tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_sigma)

        mean, var = tf.cond(is_train, lambda: (batch_mean, batch_var), lambda:
                            (mu, sigma))
        bn = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-5)

        # bn = tf.nn.batch_normalization(x, batch_mean, batch_var, beta, gamma, 1e-5)

        # bn = tf.contrib.layers.batch_norm(inputs=x, decay=decay,
        # updates_collections=[tf.GraphKeys.UPDATE_OPS], center=True,
        # scale=True, epsilon=1e-5, is_training=is_train,
        # trainable=True)
    return bn
示例#5
0
def batch_norm(inputs,
               bn_param,
               scale=True,
               momentum=0.99,
               epsilon=1e-5,
               name='batch_norm'):
    with tf.variable_scope(name):
        beta = _variable('beta', [inputs.get_shape()[-1]],
                         initializer=tf.zeros_initializer(),
                         trainable=True)

        if scale:
            gamma = _variable('gamma', [inputs.get_shape()[-1]],
                              initializer=tf.ones_initializer(),
                              trainable=True)
        else:
            gamma = None

        reduced_dim = [i for i in range(len(inputs.get_shape()) - 1)]
        batch_mean, batch_var = tf.nn.moments(inputs,
                                              reduced_dim,
                                              keep_dims=False)

        # moving average of the populations
        pop_mean = _variable('pop_mean',
                             shape=[inputs.get_shape()[-1]],
                             initializer=tf.zeros_initializer(),
                             trainable=False)
        pop_var = _variable('pop_var',
                            shape=[inputs.get_shape()[-1]],
                            initializer=tf.ones_initializer(),
                            trainable=False)

        pop_mean_op = tf.assign(
            pop_mean, pop_mean * momentum + batch_mean * (1 - momentum))
        pop_var_op = tf.assign(pop_var,
                               pop_var * momentum + batch_var * (1 - momentum))

        tf.add_to_collection('batch_norm_update', pop_mean_op)
        tf.add_to_collection('batch_norm_update', pop_var_op)

        # for training, bn_param[0]=0
        # for evaluation, bn_param[0]=1
        mean = bn_param[0] * pop_mean + (1 - bn_param[0]) * batch_mean
        var = bn_param[0] * pop_var + (1 - bn_param[0]) * batch_var

        return tf.nn.batch_normalization(inputs, mean, var, beta, gamma,
                                         epsilon)
 def decoder(self, z):
     nl = tf.nn.leaky_relu
     z_has_timesteps = (z.get_shape().ndims == 3)
     if z_has_timesteps:
         sh = tf.shape(z)
         z = flatten_two_dims(z)
     with tf.variable_scope(self.scope + "decoder"):
         z = small_deconvnet(z,
                             nl=nl,
                             ch=4 if self.spherical_obs else 8,
                             positional_bias=True)
         if z_has_timesteps:
             z = unflatten_first_dim(z, sh)
         if self.spherical_obs:
             scale = tf.get_variable(name="scale",
                                     shape=(),
                                     dtype=tf.float32,
                                     initializer=tf.ones_initializer())
             scale = tf.maximum(scale, -4.)
             scale = tf.nn.softplus(scale)
             scale = scale * tf.ones_like(z)
         else:
             z, scale = tf.split(z, 2, -1)
             scale = tf.nn.softplus(scale)
         # scale = tf.Print(scale, [scale])
         return tf.distributions.Normal(loc=z, scale=scale)
示例#7
0
def primer_norm(x, dim, epsilon=1e-6, name="layer_prepostprocess"):
    """Primer normalization over dimension `dim`.

  Args:
    x: a mtf.Tensor whose shape contains `dim`.
    dim: a mtf.Dimension.
    epsilon: a floating point number.
    name: a string used for tf.variable_scope.

  Returns:
    a mtf.Tensor with same shape as x.
  """
    with tf.variable_scope(name + "/primer_norm"):
        scale = mtf.get_variable(x.mesh,
                                 "primer_norm_scale",
                                 mtf.Shape([dim]),
                                 initializer=tf.ones_initializer(),
                                 activation_dtype=x.dtype)
        bias = mtf.get_variable(x.mesh,
                                "primer_norm_bias",
                                mtf.Shape([dim]),
                                initializer=tf.zeros_initializer(),
                                activation_dtype=x.dtype)
        reduced_shape = x.shape - dim
        mean = mtf.reduce_mean(x, output_shape=reduced_shape)
        mean_centered_x = x - mean
        pseudo_variance = mtf.reduce_mean(x * mean_centered_x,
                                          output_shape=reduced_shape)
        norm_x = mean_centered_x * mtf.rsqrt(pseudo_variance + epsilon)
        return norm_x * scale + bias
示例#8
0
def batch_norm_relu(inputs,
                    is_training,
                    relu=True,
                    init_zero=False,
                    data_format='channels_last'):
  """Performs a batch normalization followed by a ReLU.

  Args:
    inputs: `Tensor` of shape `[batch, channels, ...]`.
    is_training: `bool` for whether the model is training.
    relu: `bool` if False, omits the ReLU operation.
    init_zero: `bool` if True, initializes scale parameter of batch
      normalization with 0 instead of 1 (default).
    data_format: `str` either "channels_first" for `[batch, channels, height,
      width]` or "channels_last for `[batch, height, width, channels]`.

  Returns:
    A normalized `Tensor` with the same `data_format`.
  """
  del data_format
  if init_zero:
    gamma_initializer = tf.zeros_initializer()
  else:
    gamma_initializer = tf.ones_initializer()

  var = {
      'beta': None,
      'gamma': None,
      'moving_mean': ['moving_vars'],
      'moving_variance': ['moving_vars'],
  }
def batch_norm_relu(inputs,
                    is_training,
                    relu=True,
                    init_zero=False,
                    data_format='channels_first'):
    """Performs a batch normalization followed by a ReLU."""
    if init_zero:
        gamma_initializer = tf.zeros_initializer()
    else:
        gamma_initializer = tf.ones_initializer()

    if data_format == 'channels_first':
        axis = 1
    else:
        axis = 3

    inputs = tf.layers.batch_normalization(inputs=inputs,
                                           axis=axis,
                                           momentum=0.9,
                                           epsilon=1e-5,
                                           center=True,
                                           scale=True,
                                           training=is_training,
                                           fused=True,
                                           gamma_initializer=gamma_initializer)

    if relu:
        inputs = tf.nn.relu(inputs)
    return inputs
示例#10
0
def apply_norm(x, epsilon=1e-6):
    """Applies layer normalization to x.

  Based on "Layer Normalization":
  https://arxiv.org/abs/1607.06450

  Args:
    x: <float>[..., input_size]
    epsilon: Used to avoid division by 0.

  Returns:
    <float>[..., input_size]
  """
    input_size = x.get_shape()[-1]
    with tf.variable_scope("layer_norm", values=[x]):
        scale = tf.get_variable("layer_norm_scale", [input_size],
                                initializer=tf.ones_initializer())
        bias = tf.get_variable("layer_norm_bias", [input_size],
                               initializer=tf.zeros_initializer())
        mean = tf.reduce_mean(x, axis=[-1], keepdims=True)
        variance = tf.reduce_mean(tf.square(x - mean),
                                  axis=[-1],
                                  keepdims=True)
        norm_x = (x - mean) * tf.rsqrt(variance + epsilon)
        result = norm_x * scale + bias
        return result
示例#11
0
def batch_norm(x, is_training, bn_decay):
    input_dims = x.get_shape()[-1].value
    moment_dims = list(range(len(x.get_shape()) - 1))
    beta = tf.Variable(tf.zeros_initializer()(shape=[input_dims]),
                       dtype=tf.float32,
                       trainable=True,
                       name='beta')
    gamma = tf.Variable(tf.ones_initializer()(shape=[input_dims]),
                        dtype=tf.float32,
                        trainable=True,
                        name='gamma')
    batch_mean, batch_var = tf.nn.moments(x, moment_dims, name='moments')

    decay = bn_decay if bn_decay is not None else 0.9
    ema = tf.train.ExponentialMovingAverage(decay=decay)
    # Operator that maintains moving averages of variables.
    ema_apply_op = tf.cond(is_training,
                           lambda: ema.apply([batch_mean, batch_var]),
                           lambda: tf.no_op())

    # Update moving average and return current batch's avg and var.
    def mean_var_with_update():
        with tf.control_dependencies([ema_apply_op]):
            return tf.identity(batch_mean), tf.identity(batch_var)

    # ema.average returns the Variable holding the average of var.
    mean, var = tf.cond(
        is_training, mean_var_with_update, lambda:
        (ema.average(batch_mean), ema.average(batch_var)))
    x = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-3)
    return x
示例#12
0
def _CreateLSTMPruneVariables(lstm_obj, input_depth, h_depth):
    """Function to create additional variables for pruning."""

    mask = lstm_obj.add_variable(name="mask",
                                 shape=[input_depth + h_depth, 4 * h_depth],
                                 initializer=tf.ones_initializer(),
                                 trainable=False,
                                 dtype=lstm_obj.dtype)
    threshold = lstm_obj.add_variable(name="threshold",
                                      shape=[],
                                      initializer=tf.zeros_initializer(),
                                      trainable=False,
                                      dtype=lstm_obj.dtype)
    # Add old_weights, old_old_weights, gradient for gradient
    # based pruning.
    old_weight = lstm_obj.add_variable(
        name="old_weight",
        shape=[input_depth + h_depth, 4 * h_depth],
        initializer=tf.zeros_initializer(),
        trainable=False,
        dtype=lstm_obj.dtype)
    old_old_weight = lstm_obj.add_variable(
        name="old_old_weight",
        shape=[input_depth + h_depth, 4 * h_depth],
        initializer=tf.zeros_initializer(),
        trainable=False,
        dtype=lstm_obj.dtype)
    gradient = lstm_obj.add_variable(
        name="gradient",
        shape=[input_depth + h_depth, 4 * h_depth],
        initializer=tf.zeros_initializer(),
        trainable=False,
        dtype=lstm_obj.dtype)

    return mask, threshold, old_weight, old_old_weight, gradient
    def testMinimalRun(self):
        x = basic.TrainableVariable(shape=(),
                                    initializers={'w':
                                                  tf.ones_initializer()})()
        x2 = x**2.0
        min_value = 0.5
        constr = optimization_constraints.OptimizationConstraints().add(
            x > min_value)

        self.assertFalse(constr._is_connected)
        loss = moving_average.MovingAverage()(x2 + tf.random.normal(
            (), stddev=1.0)) + constr()

        self.assertTrue(constr._is_connected)
        with self.assertRaisesRegexp(ValueError,
                                     'Cannot add further constraints'):
            constr.add(x > min_value)
        with self.assertRaisesRegexp(ValueError,
                                     'Cannot add further constraints'):
            constr.add_geq(x, min_value)
        with self.assertRaisesRegexp(ValueError,
                                     'Cannot add further constraints'):
            constr.add_leq(min_value < x)

        opt = tf.train.AdamOptimizer(1e-2, beta1=0.0)
        update = opt.minimize(loss)
        with tf.control_dependencies([update]):
            x2 = tf.identity(x2)

        with tf.train.MonitoredSession() as sess:
            for _ in range(500):
                v, _ = sess.run([x2, update])
        self.assertAllClose(v, min_value**2)
示例#14
0
def batch_norm_relu(inputs,
                    is_training,
                    relu=True,
                    init_zero=False,
                    center=True,
                    scale=True,
                    data_format='channels_first'):
    """Performs a batch normalization followed by a ReLU.

  Args:
    inputs: `Tensor` of shape `[batch, channels, ...]`.
    is_training: `bool` for whether the model is training.
    relu: `bool` if False, omits the ReLU operation.
    init_zero: `bool` if True, initializes scale parameter of batch
        normalization with 0 instead of 1 (default).
    center: `bool` whether to add learnable bias factor.
    scale: `bool` whether to add learnable scaling factor.
    data_format: `str` either "channels_first" for `[batch, channels, height,
        width]` or "channels_last for `[batch, height, width, channels]`.

  Returns:
    A normalized `Tensor` with the same `data_format`.
  """
    if init_zero:
        gamma_initializer = tf.zeros_initializer()
    else:
        gamma_initializer = tf.ones_initializer()

    if data_format == 'channels_first':
        axis = 1
    else:
        axis = 3

    if FLAGS.global_bn:
        bn_foo = BatchNormalization(axis=axis,
                                    momentum=FLAGS.batch_norm_decay,
                                    epsilon=BATCH_NORM_EPSILON,
                                    center=center,
                                    scale=scale,
                                    fused=False,
                                    gamma_initializer=gamma_initializer)
        inputs = bn_foo(inputs, training=is_training)
    else:
        print("usng the default batch norm, not global normalized one")
        # Done: pass in the batch_norm_decay
        inputs = tf.layers.batch_normalization(
            inputs=inputs,
            axis=axis,
            momentum=FLAGS.batch_norm_decay,
            epsilon=BATCH_NORM_EPSILON,
            center=center,
            scale=scale,
            training=is_training,
            fused=True,
            gamma_initializer=gamma_initializer)

    if relu:
        inputs = tf.nn.relu(inputs)
    return inputs
示例#15
0
def layer_norm_vars(filters, layer_idx, total_layers):
    """Create Variables for layer norm."""
    if total_layers == 0:
        scale = tf.get_variable("gamma",
                                filters,
                                initializer=tf.ones_initializer())
        bias = tf.get_variable("beta",
                               filters,
                               initializer=tf.zeros_initializer())
    else:
        scale = tf.get_variable("gamma", [total_layers, filters],
                                initializer=tf.ones_initializer())
        bias = tf.get_variable("beta", [total_layers, filters],
                               initializer=tf.zeros_initializer())
        scale = tf.gather(scale, layer_idx)
        bias = tf.gather(bias, layer_idx)
    return scale, bias
示例#16
0
def layer_norm_op(inputs,
                  norm_shape=None,
                  begin_norm_axis=-1,
                  center=True,
                  scale=True,
                  activation_fn=None,
                  reuse=None,
                  trainable=True,
                  name=None):
  """Custom Layer Normalization layer."""

  if norm_shape is None:
    # If `norm_shape` is not provided, use `begin_norm_axis` to infer
    norm_shape = inputs.shape[begin_norm_axis:]
  elif isinstance(norm_shape, int):
    # If `norm_shape` is provided as int, convert it to list
    norm_shape = [norm_shape]

  with tf.variable_scope(name, "layer_norm", [inputs], reuse=reuse):
    inputs_rank = inputs.shape.ndims
    if inputs_rank is None:
      raise ValueError("Inputs %s has undefined rank." % inputs.name)
    dtype = inputs.dtype.base_dtype
    # Allocate parameters for the beta and gamma of the normalization.
    beta, gamma = None, None
    if center:
      beta = tf.get_variable(
          "beta",
          shape=norm_shape,
          dtype=dtype,
          initializer=tf.zeros_initializer(),
          trainable=trainable)
    if scale:
      gamma = tf.get_variable(
          "gamma",
          shape=norm_shape,
          dtype=dtype,
          initializer=tf.ones_initializer(),
          trainable=trainable)
    # By default, compute the moments across all the dimensions except the one
    # with index 0.
    norm_axes = list(range(inputs_rank - len(norm_shape), inputs_rank))
    mean, variance = tf.nn.moments(inputs, norm_axes, keep_dims=True)
    # Compute layer normalization using the batch_normalization function.
    # Note that epsilon must be increased for float16 due to the limited
    # representable range.
    variance_epsilon = 1e-8 if dtype != tf.float16 else 1e-3
    outputs = tf.nn.batch_normalization(
        inputs,
        mean,
        variance,
        offset=beta,
        scale=gamma,
        variance_epsilon=variance_epsilon)
    outputs.set_shape(inputs.shape)
    if activation_fn is not None:
      outputs = activation_fn(outputs)
    return outputs
示例#17
0
文件: tf_util.py 项目: xuyongzhi/ASIS
def batch_norm_dist_template(inputs, is_training, scope, moments_dims,
                             bn_decay):
    """ The batch normalization for distributed training.
  Args:
      inputs:        Tensor, k-D input ... x C could be BC or BHWC or BDHWC
      is_training:   boolean tf.Varialbe, true indicates training phase
      scope:         string, variable scope
      moments_dims:  a list of ints, indicating dimensions for moments calculation
      bn_decay:      float or float tensor variable, controling moving average weight
  Return:
      normed:        batch-normalized maps
  """
    with tf.variable_scope(scope) as sc:
        num_channels = inputs.get_shape()[-1].value
        beta = _variable_on_cpu('beta', [num_channels],
                                initializer=tf.zeros_initializer())
        gamma = _variable_on_cpu('gamma', [num_channels],
                                 initializer=tf.ones_initializer())

        pop_mean = _variable_on_cpu('pop_mean', [num_channels],
                                    initializer=tf.zeros_initializer(),
                                    trainable=False)
        pop_var = _variable_on_cpu('pop_var', [num_channels],
                                   initializer=tf.ones_initializer(),
                                   trainable=False)

        def train_bn_op():
            batch_mean, batch_var = tf.nn.moments(inputs,
                                                  moments_dims,
                                                  name='moments')
            decay = bn_decay if bn_decay is not None else 0.9
            train_mean = tf.assign(pop_mean,
                                   pop_mean * decay + batch_mean * (1 - decay))
            train_var = tf.assign(pop_var,
                                  pop_var * decay + batch_var * (1 - decay))
            with tf.control_dependencies([train_mean, train_var]):
                return tf.nn.batch_normalization(inputs, batch_mean, batch_var,
                                                 beta, gamma, 1e-3)

        def test_bn_op():
            return tf.nn.batch_normalization(inputs, pop_mean, pop_var, beta,
                                             gamma, 1e-3)

        normed = tf.cond(is_training, train_bn_op, test_bn_op)
        return normed
示例#18
0
 def __init__(self, num_actions, quantile_embedding_dim, **kwargs):
     # This weights_initializer gives action 0 a higher weight, ensuring
     # that it gets picked by the argmax.
     super(MockImplicitQuantileNetwork, self).__init__(**kwargs)
     self.num_actions = num_actions
     self.layer = tf.keras.layers.Dense(
         self.num_actions,
         kernel_initializer=tf.ones_initializer(),
         bias_initializer=tf.zeros_initializer())
示例#19
0
def layer_norm(inputs, epsilon=1e-8):
    mean, variance = tf.nn.moments(inputs, [-1], keep_dims=True)
    normalized = (inputs - mean) / (tf.sqrt(variance + epsilon))
    params_shape = inputs.get_shape()[-1:]
    gamma = tf.get_variable('gamma', params_shape, tf.float32,
                            tf.ones_initializer())
    beta = tf.get_variable('beta', params_shape, tf.float32,
                           tf.zeros_initializer())
    return gamma * normalized + beta
示例#20
0
    def testInitializers(self):
        inputs = tf.ones(dtype=tf.float32,
                         shape=[self.batch_size, self.in_size])
        prev_state = tf.ones(dtype=tf.float32,
                             shape=[self.batch_size, self.hidden_size])

        with self.assertRaisesRegexp(KeyError, "Invalid initializer keys.*"):
            snt.VanillaRNN(name="rnn",
                           hidden_size=self.hidden_size,
                           initializers={"invalid": None})

        err = "Initializer for 'w' is not a callable function"
        with self.assertRaisesRegexp(TypeError, err):
            snt.VanillaRNN(
                name="rnn",
                hidden_size=self.hidden_size,
                initializers={"in_to_hidden": {
                    "w": tf.zeros([10, 10])
                }})

        # Nested initializer.
        valid_initializers = {
            "in_to_hidden": {
                "w": tf.ones_initializer(),
            },
            "hidden_to_hidden": {
                "b": tf.ones_initializer(),
            }
        }

        vanilla_rnn = snt.VanillaRNN(name="rnn",
                                     hidden_size=self.hidden_size,
                                     initializers=valid_initializers)

        vanilla_rnn(inputs, prev_state)
        init = tf.global_variables_initializer()

        self.evaluate(init)
        w_v, b_v = self.evaluate([
            vanilla_rnn.in_to_hidden_linear.w,
            vanilla_rnn.hidden_to_hidden_linear.b,
        ])
        self.assertAllClose(w_v, np.ones([self.in_size, self.hidden_size]))
        self.assertAllClose(b_v, np.ones([self.hidden_size]))
示例#21
0
 def __init__(self, num_actions, **kwargs):
   # This weights_initializer gives action 0 a higher weight, ensuring
   # that it gets picked by the argmax.
   super(MockDQNNetwork, self).__init__(**kwargs)
   weights_initializer = np.tile(
       np.arange(num_actions, 0, -1), (stack_size, 1))
   self.layer = tf.keras.layers.Dense(
       num_actions,
       kernel_initializer=tf.constant_initializer(weights_initializer),
       bias_initializer=tf.ones_initializer())
示例#22
0
 def __init__(self, dim, eps=1e-5, name=None):
     super(LayerNorm, self).__init__(name=name)
     self.eps = eps
     with self.variable_scope:
         self.g = tf.get_variable('g',
                                  shape=[dim],
                                  initializer=tf.ones_initializer())
         self.b = tf.get_variable('b',
                                  shape=[dim],
                                  initializer=tf.zeros_initializer())
示例#23
0
def batch_norm_act(inputs,
                   is_training_bn: bool,
                   act_type: Union[Text, None],
                   init_zero: bool = False,
                   data_format: Text = 'channels_last',
                   momentum: float = 0.99,
                   epsilon: float = 1e-3,
                   strategy: Text = None,
                   name: Text = None,
                   batch_norm_trainable: bool = True):
    """Performs a batch normalization followed by a non-linear activation.

  Args:
    inputs: `Tensor` of shape `[batch, channels, ...]`.
    is_training_bn: `bool` for whether the model is training.
    act_type: non-linear relu function type. If None, omits the relu operation.
    init_zero: `bool` if True, initializes scale parameter of batch
      normalization with 0 instead of 1 (default).
    data_format: `str` either "channels_first" for `[batch, channels, height,
      width]` or "channels_last for `[batch, height, width, channels]`.
    momentum: `float`, momentume of batch norm.
    epsilon: `float`, small value for numerical stability.
    strategy: string to specify training strategy for TPU/GPU/CPU.
    name: the name of the batch normalization layer
    batch_norm_trainable: 'bool' if False, the batch statistics will not be
      updated.

  Returns:
    A normalized `Tensor` with the same `data_format`.
  """
    if init_zero:
        gamma_initializer = tf.zeros_initializer()
    else:
        gamma_initializer = tf.ones_initializer()

    if data_format == 'channels_first':
        axis = 1
    else:
        axis = 3

    inputs = batch_normalization(inputs=inputs,
                                 axis=axis,
                                 momentum=momentum,
                                 epsilon=epsilon,
                                 center=True,
                                 scale=True,
                                 trainable=batch_norm_trainable,
                                 training=is_training_bn,
                                 strategy=strategy,
                                 gamma_initializer=gamma_initializer,
                                 name=name)

    if act_type:
        inputs = activation_fn(inputs, act_type)
    return inputs
def batch_norm_relu(inputs,
                    is_training,
                    relu=True,
                    swish=False,
                    init_zero=False,
                    bn_decay=BATCH_NORM_DECAY,
                    bn_epsilon=BATCH_NORM_EPSILON,
                    data_format='channels_first'):
  """Performs a batch normalization followed by a ReLU.

  Args:
    inputs: `Tensor` of shape `[batch, channels, ...]`.
    is_training: `bool` for whether the model is training.
    relu: `bool` if False, omits the ReLU operation.
    swish: `bool`. True to use swish activation function, False for ReLU.
    init_zero: `bool` if True, initializes scale parameter of batch
      normalization with 0 instead of 1 (default).
    bn_decay: `float` batch norm decay parameter to use.
    bn_epsilon: `float` batch norm epsilon parameter to use.
    data_format: `str` either "channels_first" for `[batch, channels, height,
      width]` or "channels_last for `[batch, height, width, channels]`.

  Returns:
    A normalized `Tensor` with the same `data_format`.
  """

  if init_zero:
    gamma_initializer = tf.zeros_initializer()
  else:
    gamma_initializer = tf.ones_initializer()

  assert data_format == 'channels_last'

  if data_format == 'channels_first':
    axis = 1
  else:
    axis = -1

  inputs = tf.layers.batch_normalization(
      inputs=inputs,
      axis=axis,
      momentum=bn_decay,
      epsilon=bn_epsilon,
      center=True,
      scale=True,
      training=is_training,
      fused=True,
      gamma_initializer=gamma_initializer)

  if swish:
    inputs = tf.keras.activations.swish(inputs)
  elif relu:
    inputs = tf.nn.relu(inputs)
  return inputs
示例#25
0
    def __call__(self,
                 inputs,
                 relu=True,
                 init_zero=False,
                 is_training=False,
                 name=None):
        """Builds layers for a batch normalization followed by a ReLU.

    Args:
      inputs: `Tensor` of shape `[batch, channels, ...]`.
      relu: `bool` if False, omits the ReLU operation.
      init_zero: `bool` if True, initializes scale parameter of batch
        normalization with 0. If False, initialize it with 1.
      is_training: `boolean`, if True if model is in training mode.
      name: `str` name for the operation.

    Returns:
      A normalized `Tensor` with the same `data_format`.
    """
        if init_zero:
            gamma_initializer = tf.zeros_initializer()
        else:
            gamma_initializer = tf.ones_initializer()

        if self._use_sync_bn:
            inputs = cross_replica_batch_normalization(
                inputs=inputs,
                momentum=self._momentum,
                epsilon=self._epsilon,
                center=True,
                scale=True,
                training=(is_training and self._trainable),
                trainable=self._trainable,
                fused=True,
                gamma_initializer=gamma_initializer,
                num_distributed_groups=1,
                name=name)
        else:
            inputs = tf.layers.batch_normalization(
                inputs=inputs,
                momentum=self._momentum,
                epsilon=self._epsilon,
                center=True,
                scale=True,
                training=(is_training and self._trainable),
                trainable=self._trainable,
                fused=True,
                gamma_initializer=gamma_initializer,
                name=name)

        if relu:
            inputs = tf.nn.relu(inputs)
        return inputs
示例#26
0
    def addBiases(self, inp1, inp2, dim, bInitial = 0, name = ""):
        with tf.variable_scope("additiveBiases" + name):
            b = tf.get_variable("biases", shape = (dim,), 
                initializer = tf.zeros_initializer()) + bInitial
        with tf.variable_scope("multiplicativeBias" + name):
            beta = tf.get_variable("biases", shape = (3 * dim,), 
                initializer = tf.ones_initializer())

        Wx, Uh, inter = tf.split(beta * tf.concat([inp1, inp2, inp1 * inp2], axis = 1), 
            num_or_size_splits = 3, axis = 1)
        output = Wx + Uh + inter + b        
        return output
示例#27
0
def batch_norm_relu(inputs,
                    is_training_bn,
                    relu=True,
                    init_zero=False,
                    data_format='channels_last',
                    momentum=0.99,
                    epsilon=1e-3,
                    use_tpu=False,
                    name=None):
  """Performs a batch normalization followed by a ReLU.

  Args:
    inputs: `Tensor` of shape `[batch, channels, ...]`.
    is_training_bn: `bool` for whether the model is training.
    relu: `bool` if False, omits the ReLU operation.
    init_zero: `bool` if True, initializes scale parameter of batch
      normalization with 0 instead of 1 (default).
    data_format: `str` either "channels_first" for `[batch, channels, height,
      width]` or "channels_last for `[batch, height, width, channels]`.
    momentum: `float`, momentume of batch norm.
    epsilon: `float`, small value for numerical stability.
    use_tpu: `bool`, whether to use tpu version of batch norm.
    name: the name of the batch normalization layer

  Returns:
    A normalized `Tensor` with the same `data_format`.
  """
  if init_zero:
    gamma_initializer = tf.zeros_initializer()
  else:
    gamma_initializer = tf.ones_initializer()

  if data_format == 'channels_first':
    axis = 1
  else:
    axis = 3

  inputs = tpu_batch_normalization(
      inputs=inputs,
      axis=axis,
      momentum=momentum,
      epsilon=epsilon,
      center=True,
      scale=True,
      training=is_training_bn,
      use_tpu=use_tpu,
      gamma_initializer=gamma_initializer,
      name=name)

  if relu:
    inputs = relu_fn(inputs)
  return inputs
示例#28
0
def get_initializer(initializer, dtype):
    if initializer == 'zeros':
        return tf.zeros_initializer(dtype=dtype)
    elif initializer == 'ones':
        return tf.ones_initializer(dtype=dtype)
    elif initializer == 'vs':
        return tf.variance_scaling_initializer(dtype=dtype)
    elif initializer == 'xavier':
        return tf.glorot_normal_initializer(dtype=dtype)
    elif initializer == 'he':
        return tf.variance_scaling_initializer(dtype=dtype)
    else:
        raise NotImplementedError
示例#29
0
def Conv2D(inputs, filters, name, strides=[1, 1, 1, 1], padding="SAME", pooling=None, activation="leaky", trainable=True, bn=False):
    with tf.variable_scope(name):
        weight = tf.get_variable(name='weight', dtype=tf.float32, trainable=True, shape=filters, initializer=tf.random_normal_initializer(stddev=0.01))

        output = tf.nn.conv2d(inputs, weight, strides=strides, padding=padding)
        if pooling == "max":
            output = tf.nn.max_pool(output, ksize=[1], strides=[1, 2, 2, 1], padding="SAME") 
        elif pooling == "avg":
            output = tf.nn.avg_pool(output, ksize=[1], strides=[1, 2, 2, 1], padding="SAME") 
 
        if bn:
            conv = tf.layers.batch_normalization(conv, beta_initializer=tf.zeros_initializer(),
                gamma_initializer=tf.ones_initializer(),
                moving_mean_initializer=tf.zeros_initializer(),
                moving_variance_initializer=tf.ones_initializer(),
                training=trainable)

        if activation == "leaky":
            output = tf.nn.leaky_relu(output) 
        elif activation == "relu":
            output = tf.nn.leaky_relu(output)
    return output
示例#30
0
    def _create_continuous_state_encoder(self, s_size, h_size, num_streams, activation, num_layers):
        """
        Builds a set of hidden state encoders.
        :param s_size: state input size.
        :param h_size: Hidden layer size.
        :param num_streams: Number of state streams to construct.
        :param activation: What type of activation function to use for layers.
        :return: List of hidden layer tensors.
        """
        self.state_in = tf.placeholder(shape=[None, s_size[0], s_size[1], s_size[2]],
                                       dtype=tf.float32, name='state')

        if self.normalize > 0:
            self.running_mean = tf.get_variable("running_mean", [s_size], trainable=False, dtype=tf.float32,
                                                initializer=tf.zeros_initializer())
            self.running_variance = tf.get_variable("running_variance", [s_size], trainable=False, dtype=tf.float32,
                                                    initializer=tf.ones_initializer())
            self.norm_running_variance = tf.get_variable("norm_running_variance", [s_size], trainable=False,
                                                         dtype=tf.float32,
                                                         initializer=tf.ones_initializer())

            self.normalized_state = tf.clip_by_value(
                (self.state_in - self.running_mean) / tf.sqrt(self.norm_running_variance), -5, 5, name="normalized_state")

            self.new_mean = tf.placeholder(shape=[s_size], dtype=tf.float32, name='new_mean')
            self.new_variance = tf.placeholder(shape=[s_size], dtype=tf.float32, name='new_variance')
            self.update_mean = tf.assign(self.running_mean, self.new_mean)
            self.update_variance = tf.assign(self.running_variance, self.new_variance)
            self.update_norm_variance = tf.assign(self.norm_running_variance,
                                                  self.running_variance / (tf.cast(self.global_step, tf.float32) + 1))
        else:
            self.normalized_state = self.state_in
        streams = []
        for i in range(num_streams):
            hidden = self.normalized_state
            for j in range(num_layers):
                hidden = tf.layers.dense(hidden, h_size, use_bias=False, activation=activation)
            streams.append(hidden)
        return streams