Пример #1
0
    def get_real_model(self):
        """Get real model of initializer."""
        if self.model:
            return self.model
        else:
            if self.type == 'truncated_normal_initializer':
                self.model = tf.truncated_normal_initializer(
                    mean=self.mean, stddev=self.stddev)
            elif self.type == 'random_normal_initializer':
                self.model = tf.random_normal_initializer(mean=self.mean,
                                                          stddev=self.stddev)
            elif self.type == 'variance_scaling_initializer':
                enum_descriptor = (hyperparams_pb2.VarianceScalingInitializer.
                                   DESCRIPTOR.enum_types_by_name['Mode'])
                mode = self.mode
                if mode == 'FAN_IN':
                    mode = 0
                elif mode == 'FAN_OUT':
                    mode = 1
                elif mode == 'FAN_AVG':
                    mode = 2
                mode = enum_descriptor.values_by_number[mode].name
                self.mode = slim.variance_scaling_initializer(
                    factor=self.factor, mode=mode, uniform=self.uniform)
            else:
                self.model = None
                raise ValueError('Unknown initializer type: {}'.format(
                    self.type))

            return self.model
Пример #2
0
def get_initializer(desc):
    """Get initializer function."""
    mean = desc.mean if 'mean' in desc else 0.0
    stddev = desc.stddev if 'stddev' in desc else 0.01
    if desc.type == 'truncated_normal_initializer':
        return tf.truncated_normal_initializer(mean=mean, stddev=stddev)
    elif desc.type == 'random_normal_initializer':
        return tf.random_normal_initializer(mean=mean, stddev=stddev)
    elif desc.type == 'variance_scaling_initializer':
        enum_descriptor = (hyperparams_pb2.VarianceScalingInitializer.
                           DESCRIPTOR.enum_types_by_name['Mode'])
        mode = desc.mode
        if mode == 'FAN_IN':
            mode = 0
        elif mode == 'FAN_OUT':
            mode = 1
        elif mode == 'FAN_AVG':
            mode = 2

        mode = enum_descriptor.values_by_number[mode].name
        return slim.variance_scaling_initializer(factor=desc.factor,
                                                 mode=mode,
                                                 uniform=desc.uniform)
    else:
        raise ValueError('Unknown initializer type: {}'.format(desc.type))
Пример #3
0
def nasnet_large_arg_scope(weight_decay=5e-5,
                           batch_norm_decay=0.9997,
                           batch_norm_epsilon=1e-3):
    """Defines the default arg scope for the NASNet-A Large ImageNet model.

  Args:
    weight_decay: The weight decay to use for regularizing the model.
    batch_norm_decay: Decay for batch norm moving average.
    batch_norm_epsilon: Small float added to variance to avoid dividing by zero
      in batch norm.

  Returns:
    An `arg_scope` to use for the NASNet Large Model.
  """
    batch_norm_params = {
        # Decay for the moving averages.
        'decay': batch_norm_decay,
        # epsilon to prevent 0s in variance.
        'epsilon': batch_norm_epsilon,
        'scale': True,
        'fused': True,
    }
    weights_regularizer = slim.l2_regularizer(weight_decay)
    weights_initializer = slim.variance_scaling_initializer(mode='FAN_OUT')
    with arg_scope([slim.fully_connected, slim.conv2d, slim.separable_conv2d],
                   weights_regularizer=weights_regularizer,
                   weights_initializer=weights_initializer):
        with arg_scope([slim.fully_connected], activation_fn=None, scope='FC'):
            with arg_scope([slim.conv2d, slim.separable_conv2d],
                           activation_fn=None,
                           biases_initializer=None):
                with arg_scope([slim.batch_norm], **batch_norm_params) as sc:
                    return sc
Пример #4
0
def resnet_arg_scope(
    weight_decay=0.0001,
    batch_norm_decay=0.997,
    batch_norm_epsilon=1e-5,
    batch_norm_scale=True,
    activation_fn=tf.nn.relu,
    use_batch_norm=True,
    batch_norm_updates_collections=tf.GraphKeys.UPDATE_OPS):
  """Defines the default ResNet arg scope.

  TODO(gpapan): The batch-normalization related default values above are
    appropriate for use in conjunction with the reference ResNet models
    released at https://github.com/KaimingHe/deep-residual-networks. When
    training ResNets from scratch, they might need to be tuned.

  Args:
    weight_decay: The weight decay to use for regularizing the model.
    batch_norm_decay: The moving average decay when estimating layer activation
      statistics in batch normalization.
    batch_norm_epsilon: Small constant to prevent division by zero when
      normalizing activations by their variance in batch normalization.
    batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
      activations in the batch normalization layer.
    activation_fn: The activation function which is used in ResNet.
    use_batch_norm: Whether or not to use batch normalization.
    batch_norm_updates_collections: Collection for the update ops for
      batch norm.

  Returns:
    An `arg_scope` to use for the resnet152 models.
  """
  batch_norm_params = {
      'decay': batch_norm_decay,
      'epsilon': batch_norm_epsilon,
      'scale': batch_norm_scale,
      'updates_collections': batch_norm_updates_collections,
      'fused': None,  # Use fused batch norm if possible.
  }

  with slim.arg_scope(
      [slim.conv2d],
      weights_regularizer=slim.l2_regularizer(weight_decay),
      weights_initializer=slim.variance_scaling_initializer(),
      activation_fn=activation_fn,
      normalizer_fn=slim.batch_norm if use_batch_norm else None,
      normalizer_params=batch_norm_params):
    with slim.arg_scope([slim.batch_norm], **batch_norm_params):
      # The following implies padding='SAME' for pool1, which makes feature
      # alignment easier for dense prediction tasks. This is also used in
      # https://github.com/facebook/fb.resnet.torch. However the accompanying
      # code of 'Deep Residual Learning for Image Recognition' uses
      # padding='VALID' for pool1. You can switch to that choice by setting
      # slim.arg_scope([slim.max_pool2d], padding='VALID').
      with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc:
        return arg_sc
Пример #5
0
def attention_inception_v3_arg_scope(
        weight_decay=0.00004,
        use_batch_norm=True,
        batch_norm_decay=0.9997,
        batch_norm_epsilon=0.001,
        activation_fn=tf.nn.relu,
        batch_norm_updates_collections=tf.GraphKeys.UPDATE_OPS,
        batch_norm_scale=False):
    """Defines the default arg scope for inception models.

  Args:
    weight_decay: The weight decay to use for regularizing the model.
    use_batch_norm: "If `True`, batch_norm is applied after each convolution.
    batch_norm_decay: Decay for batch norm moving average.
    batch_norm_epsilon: Small float added to variance to avoid dividing by zero
      in batch norm.
    activation_fn: Activation function for conv2d.
    batch_norm_updates_collections: Collection for the update ops for batch
      norm.
    batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
      activations in the batch normalization layer.

  Returns:
    An `arg_scope` to use for the inception models.
  """
    batch_norm_params = {
        # Decay for the moving averages.
        'decay': batch_norm_decay,
        # epsilon to prevent 0s in variance.
        'epsilon': batch_norm_epsilon,
        # collection containing update_ops.
        'updates_collections': batch_norm_updates_collections,
        # use fused batch norm if possible.
        'fused': None,
        'scale': batch_norm_scale,
    }
    if use_batch_norm:
        normalizer_fn = slim.batch_norm
        normalizer_params = batch_norm_params
    else:
        normalizer_fn = None
        normalizer_params = {}
    # Set weight_decay for weights in Conv and FC layers.
    with slim.arg_scope([slim.conv2d, slim.fully_connected],
                        weights_regularizer=slim.l2_regularizer(weight_decay)):
        with slim.arg_scope(
            [slim.conv2d],
                weights_initializer=slim.variance_scaling_initializer(),
                activation_fn=activation_fn,
                normalizer_fn=normalizer_fn,
                normalizer_params=normalizer_params) as sc:
            return sc
Пример #6
0
def resnet_arg_scope():
    batch_norm_params = dict(decay=0.997,
                             epsilon=1e-5,
                             scale=True,
                             is_training=tfu.is_training(),
                             fused=True,
                             data_format=tfu.data_format())

    with slim.arg_scope(
        [slim.conv2d, slim.conv3d],
            weights_regularizer=slim.l2_regularizer(1e-4),
            weights_initializer=slim.variance_scaling_initializer(),
            activation_fn=tf.nn.relu,
            normalizer_fn=slim.batch_norm,
            normalizer_params=batch_norm_params):
        with slim.arg_scope([slim.batch_norm], **batch_norm_params):
            with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc:
                return arg_sc
Пример #7
0
def conv_net(inputs, hparams):
  """Builds the ConvNet from Kelz 2016."""
  with slim.arg_scope(
      [slim.conv2d, slim.fully_connected],
      activation_fn=tf.nn.relu,
      weights_initializer=slim.variance_scaling_initializer(
          factor=2.0, mode='FAN_AVG', uniform=True)):

    net = inputs
    i = 0
    for (conv_temporal_size, conv_freq_size,
         num_filters, freq_pool_size, dropout_amt) in zip(
             hparams.temporal_sizes, hparams.freq_sizes, hparams.num_filters,
             hparams.pool_sizes, hparams.dropout_keep_amts):
      net = slim.conv2d(
          net,
          num_filters, [conv_temporal_size, conv_freq_size],
          scope='conv' + str(i),
          normalizer_fn=slim.batch_norm)
      if freq_pool_size > 1:
        net = slim.max_pool2d(
            net, [1, freq_pool_size],
            stride=[1, freq_pool_size],
            scope='pool' + str(i))
      if dropout_amt < 1:
        net = slim.dropout(net, dropout_amt, scope='dropout' + str(i))
      i += 1

    # Flatten while preserving batch and time dimensions.
    dims = tf.shape(net)
    net = tf.reshape(
        net, (dims[0], dims[1], net.shape[2] * net.shape[3]),
        'flatten_end')

    net = slim.fully_connected(net, hparams.fc_size, scope='fc_end')
    net = slim.dropout(net, hparams.fc_dropout_keep_amt, scope='dropout_end')

    return net
def _build_initializer(initializer):
    """Build a tf initializer from config.

  Args:
    initializer: hyperparams_pb2.Hyperparams.regularizer proto.

  Returns:
    tf initializer.

  Raises:
    ValueError: On unknown initializer.
  """
    initializer_oneof = initializer.WhichOneof('initializer_oneof')
    if initializer_oneof == 'truncated_normal_initializer':
        return tf.truncated_normal_initializer(
            mean=initializer.truncated_normal_initializer.mean,
            stddev=initializer.truncated_normal_initializer.stddev)
    if initializer_oneof == 'random_normal_initializer':
        return tf.random_normal_initializer(
            mean=initializer.random_normal_initializer.mean,
            stddev=initializer.random_normal_initializer.stddev)
    if initializer_oneof == 'variance_scaling_initializer':
        enum_descriptor = (hyperparams_pb2.VarianceScalingInitializer.
                           DESCRIPTOR.enum_types_by_name['Mode'])
        mode = enum_descriptor.values_by_number[
            initializer.variance_scaling_initializer.mode].name
        return slim.variance_scaling_initializer(
            factor=initializer.variance_scaling_initializer.factor,
            mode=mode,
            uniform=initializer.variance_scaling_initializer.uniform)
    if initializer_oneof == 'glorot_normal_initializer':
        return tf.glorot_normal_initializer()
    if initializer_oneof == 'glorot_uniform_initializer':
        return tf.glorot_uniform_initializer()

    raise ValueError(
        'Unknown initializer function: {}'.format(initializer_oneof))
Пример #9
0
def _build_initializer(initializer, build_for_keras=False):
    """Build a tf initializer from config.

  Args:
    initializer: hyperparams_pb2.Hyperparams.regularizer proto.
    build_for_keras: Whether the initializers should be built for Keras
      operators. If false builds for Slim.

  Returns:
    tf initializer.

  Raises:
    ValueError: On unknown initializer.
  """
    initializer_oneof = initializer.WhichOneof('initializer_oneof')
    if initializer_oneof == 'truncated_normal_initializer':
        return tf.truncated_normal_initializer(
            mean=initializer.truncated_normal_initializer.mean,
            stddev=initializer.truncated_normal_initializer.stddev)
    if initializer_oneof == 'random_normal_initializer':
        return tf.random_normal_initializer(
            mean=initializer.random_normal_initializer.mean,
            stddev=initializer.random_normal_initializer.stddev)
    if initializer_oneof == 'variance_scaling_initializer':
        enum_descriptor = (hyperparams_pb2.VarianceScalingInitializer.
                           DESCRIPTOR.enum_types_by_name['Mode'])
        mode = enum_descriptor.values_by_number[
            initializer.variance_scaling_initializer.mode].name
        if build_for_keras:
            if initializer.variance_scaling_initializer.uniform:
                return tf.variance_scaling_initializer(
                    scale=initializer.variance_scaling_initializer.factor,
                    mode=mode.lower(),
                    distribution='uniform')
            else:
                # In TF 1.9 release and earlier, the truncated_normal distribution was
                # not supported correctly. So, in these earlier versions of tensorflow,
                # the ValueError will be raised, and we manually truncate the
                # distribution scale.
                #
                # It is insufficient to just set distribution to `normal` from the
                # start, because the `normal` distribution in newer Tensorflow versions
                # creates a truncated distribution, whereas it created untruncated
                # distributions in older versions.
                try:
                    return tf.variance_scaling_initializer(
                        scale=initializer.variance_scaling_initializer.factor,
                        mode=mode.lower(),
                        distribution='truncated_normal')
                except ValueError:
                    truncate_constant = 0.87962566103423978
                    truncated_scale = initializer.variance_scaling_initializer.factor / (
                        truncate_constant * truncate_constant)
                    return tf.variance_scaling_initializer(
                        scale=truncated_scale,
                        mode=mode.lower(),
                        distribution='normal')

        else:
            return slim.variance_scaling_initializer(
                factor=initializer.variance_scaling_initializer.factor,
                mode=mode,
                uniform=initializer.variance_scaling_initializer.uniform)
    if initializer_oneof is None:
        return None
    raise ValueError(
        'Unknown initializer function: {}'.format(initializer_oneof))
def discriminator(x,
                  progress,
                  num_filters_fn,
                  resolution_schedule,
                  num_blocks=None,
                  kernel_size=3,
                  simple_arch=False,
                  scope='progressive_gan_discriminator',
                  reuse=None):
  """Discriminator network for the progressive GAN model.

  Args:
    x: A `Tensor`of NHWC format representing images of size `resolution`.
    progress: A scalar float `Tensor` of training progress.
    num_filters_fn: A function that maps `block_id` to # of filters for the
        block.
    resolution_schedule: An object of `ResolutionSchedule`.
    num_blocks: An integer of number of blocks. None means maximum number of
        blocks, i.e. `resolution.schedule.num_resolutions`. Defaults to None.
    kernel_size: An integer of convolution kernel size.
    simple_arch: Bool, use a simple architecture.
    scope: A string or variable scope.
    reuse: Whether to reuse `scope`. Defaults to None which means to inherit
        the reuse option of the parent scope.

  Returns:
    A `Tensor` of model output and a dictionary of model end points.
  """
  he_init = tf_slim.variance_scaling_initializer()

  if num_blocks is None:
    num_blocks = resolution_schedule.num_resolutions

  def _conv2d(scope, x, kernel_size, filters, padding='SAME'):
    return layers.custom_conv2d(
        x=x,
        filters=filters,
        kernel_size=kernel_size,
        padding=padding,
        activation=tf.nn.leaky_relu,
        he_initializer_slope=0.0,
        scope=scope)

  def _from_rgb(x, block_id):
    return _conv2d('from_rgb', x, 1, num_filters_fn(block_id))

  if resolution_schedule.scale_mode == 'H':
    strides = (resolution_schedule.scale_base, 1)
  else:
    strides = (resolution_schedule.scale_base,
               resolution_schedule.scale_base)

  end_points = {}

  with tf.variable_scope(scope, reuse=reuse):
    x0 = x
    end_points['rgb'] = x0

    lods = []
    for block_id in range(num_blocks, 0, -1):
      with tf.variable_scope(block_name(block_id)):
        scale = resolution_schedule.scale_factor(block_id)
        lod = resolution_schedule.downscale(x0, scale)
        end_points['downscaled_rgb_{}'.format(block_id)] = lod
        if simple_arch:
          lod = tf.layers.conv2d(
              lod,
              num_filters_fn(block_id),
              kernel_size=1,
              padding='SAME',
              name='from_rgb',
              kernel_initializer=he_init)
          lod = tf.nn.relu(lod)
        else:
          lod = _from_rgb(lod, block_id)
        # alpha_i is used to replace lod_select.
        alpha = _discriminator_alpha(block_id, progress)
        end_points['alpha_{}'.format(block_id)] = alpha
      lods.append((lod, alpha))

    lods_iter = iter(lods)
    x, _ = next(lods_iter)
    for block_id in range(num_blocks, 1, -1):
      with tf.variable_scope(block_name(block_id)):
        if simple_arch:
          x = tf.layers.conv2d(
              x,
              num_filters_fn(block_id-1),
              strides=strides,
              kernel_size=kernel_size,
              padding='SAME',
              name='conv',
              kernel_initializer=he_init)
          x = tf.nn.relu(x)
        else:
          x = _conv2d('conv0', x, kernel_size, num_filters_fn(block_id))
          x = _conv2d('conv1', x, kernel_size, num_filters_fn(block_id - 1))
          x = resolution_schedule.downscale(x, resolution_schedule.scale_base)
        lod, alpha = next(lods_iter)
        x = alpha * lod + (1.0 - alpha) * x

    with tf.variable_scope(block_name(1)):
      x = layers.scalar_concat(x, layers.minibatch_mean_stddev(x))
      if simple_arch:
        x = tf.reshape(x, [tf.shape(x)[0], -1])  # flatten
        x = tf.layers.dense(x, num_filters_fn(0), name='last_conv',
                            kernel_initializer=he_init)
        x = tf.reshape(x, [tf.shape(x)[0], 1, 1, num_filters_fn(0)])
        x = tf.nn.relu(x)
      else:
        x = _conv2d('conv0', x, kernel_size, num_filters_fn(1))
        x = _conv2d('conv1', x, resolution_schedule.start_resolutions,
                    num_filters_fn(0), 'VALID')
      end_points['last_conv'] = x
      if simple_arch:
        logits = tf.layers.dense(x, 1, name='logits',
                                 kernel_initializer=he_init)
      else:
        logits = layers.custom_dense(x=x, units=1, scope='logits')
      end_points['logits'] = logits

  return logits, end_points
def generator(z,
              progress,
              num_filters_fn,
              resolution_schedule,
              num_blocks=None,
              kernel_size=3,
              colors=3,
              to_rgb_activation=None,
              simple_arch=False,
              scope='progressive_gan_generator',
              reuse=None):
  """Generator network for the progressive GAN model.

  Args:
    z: A `Tensor` of latent vector. The first dimension must be batch size.
    progress: A scalar float `Tensor` of training progress.
    num_filters_fn: A function that maps `block_id` to # of filters for the
        block.
    resolution_schedule: An object of `ResolutionSchedule`.
    num_blocks: An integer of number of blocks. None means maximum number of
        blocks, i.e. `resolution.schedule.num_resolutions`. Defaults to None.
    kernel_size: An integer of convolution kernel size.
    colors: Number of output color channels. Defaults to 3.
    to_rgb_activation: Activation function applied when output rgb.
    simple_arch: Architecture variants for lower memory usage and faster speed
    scope: A string or variable scope.
    reuse: Whether to reuse `scope`. Defaults to None which means to inherit
        the reuse option of the parent scope.
  Returns:
    A `Tensor` of model output and a dictionary of model end points.
  """
  if num_blocks is None:
    num_blocks = resolution_schedule.num_resolutions

  start_h, start_w = resolution_schedule.start_resolutions
  final_h, final_w = resolution_schedule.final_resolutions

  def _conv2d(scope, x, kernel_size, filters, padding='SAME'):
    return layers.custom_conv2d(
        x=x,
        filters=filters,
        kernel_size=kernel_size,
        padding=padding,
        activation=lambda x: layers.pixel_norm(tf.nn.leaky_relu(x)),
        he_initializer_slope=0.0,
        scope=scope)

  def _to_rgb(x):
    return layers.custom_conv2d(
        x=x,
        filters=colors,
        kernel_size=1,
        padding='SAME',
        activation=to_rgb_activation,
        scope='to_rgb')

  he_init = tf_slim.variance_scaling_initializer()

  end_points = {}

  with tf.variable_scope(scope, reuse=reuse):
    with tf.name_scope('input'):
      x = tf_slim.flatten(z)
      end_points['latent_vector'] = x

    with tf.variable_scope(block_name(1)):
      if simple_arch:
        x_shape = tf.shape(x)
        x = tf.layers.dense(x, start_h*start_w*num_filters_fn(1),
                            kernel_initializer=he_init)
        x = tf.nn.relu(x)
        x = tf.reshape(x, [x_shape[0], start_h, start_w, num_filters_fn(1)])
      else:
        x = tf.expand_dims(tf.expand_dims(x, 1), 1)
        x = layers.pixel_norm(x)
        # Pad the 1 x 1 image to 2 * (start_h - 1) x 2 * (start_w - 1)
        # with zeros for the next conv.
        x = tf.pad(x, [[0] * 2, [start_h - 1] * 2, [start_w - 1] * 2, [0] * 2])
        # The output is start_h x start_w x num_filters_fn(1).
        x = _conv2d('conv0', x, (start_h, start_w), num_filters_fn(1), 'VALID')
        x = _conv2d('conv1', x, kernel_size, num_filters_fn(1))
      lods = [x]

    if resolution_schedule.scale_mode == 'H':
      strides = (resolution_schedule.scale_base, 1)
    else:
      strides = (resolution_schedule.scale_base,
                 resolution_schedule.scale_base)

    for block_id in range(2, num_blocks + 1):
      with tf.variable_scope(block_name(block_id)):
        if simple_arch:
          x = tf.layers.conv2d_transpose(
              x,
              num_filters_fn(block_id),
              kernel_size=kernel_size,
              strides=strides,
              padding='SAME',
              kernel_initializer=he_init)
          x = tf.nn.relu(x)
        else:
          x = resolution_schedule.upscale(x, resolution_schedule.scale_base)
          x = _conv2d('conv0', x, kernel_size, num_filters_fn(block_id))
          x = _conv2d('conv1', x, kernel_size, num_filters_fn(block_id))
        lods.append(x)

    outputs = []
    for block_id in range(1, num_blocks + 1):
      with tf.variable_scope(block_name(block_id)):
        if simple_arch:
          lod = lods[block_id - 1]
          lod = tf.layers.conv2d(
              lod,
              colors,
              kernel_size=1,
              padding='SAME',
              name='to_rgb',
              kernel_initializer=he_init)
          lod = to_rgb_activation(lod)
        else:
          lod = _to_rgb(lods[block_id - 1])
        scale = resolution_schedule.scale_factor(block_id)
        lod = resolution_schedule.upscale(lod, scale)
        end_points['upscaled_rgb_{}'.format(block_id)] = lod

        # alpha_i is used to replace lod_select. Note sum(alpha_i) is
        # garanteed to be 1.
        alpha = _generator_alpha(block_id, progress)
        end_points['alpha_{}'.format(block_id)] = alpha

        outputs.append(lod * alpha)

    predictions = tf.add_n(outputs)
    batch_size = int(z.shape[0])
    predictions.set_shape([batch_size, final_h, final_w, colors])
    end_points['predictions'] = predictions

  return predictions, end_points
Пример #12
0
def convolutional_alexnet_arg_scope(embed_config,
                                    trainable=True,
                                    is_training=False):
    """Defines the default arg scope.

  Args:
    embed_config: A dictionary which contains configurations for the embedding function.
    trainable: If the weights in the embedding function is trainable.
    is_training: If the embedding function is built for training.

  Returns:
    An `arg_scope` to use for the convolutional_alexnet models.
  """
    # Only consider the model to be in training mode if it's trainable.
    # This is vital for batch_norm since moving_mean and moving_variance
    # will get updated even if not trainable.
    is_model_training = trainable and is_training

    if get(embed_config, 'use_bn', True):
        batch_norm_scale = get(embed_config, 'bn_scale', True)
        batch_norm_decay = 1 - get(embed_config, 'bn_momentum', 3e-4)
        batch_norm_epsilon = get(embed_config, 'bn_epsilon', 1e-6)
        batch_norm_params = {
            "scale": batch_norm_scale,
            # Decay for the moving averages.
            "decay": batch_norm_decay,
            # Epsilon to prevent 0s in variance.
            "epsilon": batch_norm_epsilon,
            "trainable": trainable,
            "is_training": is_model_training,
            # Collection containing the moving mean and moving variance.
            "variables_collections": {
                "beta": None,
                "gamma": None,
                "moving_mean": ["moving_vars"],
                "moving_variance": ["moving_vars"],
            },
            'updates_collections':
            None,  # Ensure that updates are done within a frame
        }
        normalizer_fn = slim.batch_norm
    else:
        batch_norm_params = {}
        normalizer_fn = None

    weight_decay = get(embed_config, 'weight_decay', 5e-4)
    if trainable:
        weights_regularizer = slim.l2_regularizer(weight_decay)
    else:
        weights_regularizer = None

    init_method = get(embed_config, 'init_method', 'kaiming_normal')
    if is_model_training:
        logging.info('embedding init method -- {}'.format(init_method))
    if init_method == 'kaiming_normal':
        # The same setting as siamese-fc
        initializer = slim.variance_scaling_initializer(factor=2.0,
                                                        mode='FAN_OUT',
                                                        uniform=False)
    else:
        initializer = slim.xavier_initializer()

    with slim.arg_scope([slim.conv2d],
                        weights_regularizer=weights_regularizer,
                        weights_initializer=initializer,
                        padding='VALID',
                        trainable=trainable,
                        activation_fn=tf.nn.relu,
                        normalizer_fn=normalizer_fn,
                        normalizer_params=batch_norm_params):
        with slim.arg_scope([slim.batch_norm], **batch_norm_params):
            with slim.arg_scope([slim.batch_norm], is_training=True) as arg_sc:
                return arg_sc
Пример #13
0
    def run(self, inputs, trainable=True):
        """Runs model."""
        _, height, width, _ = inputs["input_a"].shape.as_list()
        with tf.variable_scope("FlowNetC"):
            with slim.arg_scope(
                [slim.conv2d, slim.conv2d_transpose],
                    # Only backprop this network if trainable.
                    trainable=trainable,
                    # He (aka MSRA) weight initialization.
                    weights_initializer=slim.variance_scaling_initializer(),
                    activation_fn=leaky_relu,
                    # We will do our own padding to match the original Caffe code.
                    padding="VALID"):

                weights_regularizer = slim.l2_regularizer(WEIGHT_DECAY)
                with slim.arg_scope([slim.conv2d],
                                    weights_regularizer=weights_regularizer):
                    with slim.arg_scope([slim.conv2d], stride=2):
                        conv_a_1 = slim.conv2d(pad(inputs["input_a"], 3),
                                               64,
                                               7,
                                               scope="conv1")
                        conv_a_2 = slim.conv2d(pad(conv_a_1, 2),
                                               128,
                                               5,
                                               scope="conv2")
                        conv_a_3 = slim.conv2d(pad(conv_a_2, 2),
                                               256,
                                               5,
                                               scope="conv3")

                        conv_b_1 = slim.conv2d(pad(inputs["input_b"], 3),
                                               64,
                                               7,
                                               scope="conv1",
                                               reuse=True)
                        conv_b_2 = slim.conv2d(pad(conv_b_1, 2),
                                               128,
                                               5,
                                               scope="conv2",
                                               reuse=True)
                        conv_b_3 = slim.conv2d(pad(conv_b_2, 2),
                                               256,
                                               5,
                                               scope="conv3",
                                               reuse=True)

                        # Compute cross correlation with leaky relu activation.
                        cc = correlation(conv_a_3, conv_b_3, 1, 20, 1, 2, 20)
                        cc_relu = leaky_relu(cc)

                    # Combine cross correlation results with convolution of feature map A.
                    net_a_conv = slim.conv2d(conv_a_3,
                                             32,
                                             1,
                                             scope="conv_redir")
                    # Concatenate along the channels axis.
                    net = tf.concat([net_a_conv, cc_relu], axis=3)

                    conv3_1 = slim.conv2d(pad(net), 256, 3, scope="conv3_1")
                    with slim.arg_scope([slim.conv2d],
                                        num_outputs=512,
                                        kernel_size=3):
                        conv4 = slim.conv2d(pad(conv3_1),
                                            stride=2,
                                            scope="conv4")
                        conv4_1 = slim.conv2d(pad(conv4), scope="conv4_1")
                        conv5 = slim.conv2d(pad(conv4_1),
                                            stride=2,
                                            scope="conv5")
                        conv5_1 = slim.conv2d(pad(conv5), scope="conv5_1")
                    conv6 = slim.conv2d(pad(conv5_1),
                                        1024,
                                        3,
                                        stride=2,
                                        scope="conv6")
                    conv6_1 = slim.conv2d(pad(conv6), 1024, 3, scope="conv6_1")
                    # START: Refinement Network.
                    with slim.arg_scope([slim.conv2d_transpose],
                                        biases_initializer=None):
                        predict_flow6 = slim.conv2d(pad(conv6_1),
                                                    2,
                                                    3,
                                                    scope="predict_flow6",
                                                    activation_fn=None)
                        deconv5 = antipad(
                            slim.conv2d_transpose(conv6_1,
                                                  512,
                                                  4,
                                                  stride=2,
                                                  scope="deconv5"))
                        upsample_flow6to5 = antipad(
                            slim.conv2d_transpose(predict_flow6,
                                                  2,
                                                  4,
                                                  stride=2,
                                                  scope="upsample_flow6to5",
                                                  activation_fn=None))
                        concat5 = tf.concat(
                            [conv5_1, deconv5, upsample_flow6to5], axis=3)

                        predict_flow5 = slim.conv2d(pad(concat5),
                                                    2,
                                                    3,
                                                    scope="predict_flow5",
                                                    activation_fn=None)
                        deconv4 = antipad(
                            slim.conv2d_transpose(concat5,
                                                  256,
                                                  4,
                                                  stride=2,
                                                  scope="deconv4"))
                        upsample_flow5to4 = antipad(
                            slim.conv2d_transpose(predict_flow5,
                                                  2,
                                                  4,
                                                  stride=2,
                                                  scope="upsample_flow5to4",
                                                  activation_fn=None))
                        concat4 = tf.concat(
                            [conv4_1, deconv4, upsample_flow5to4], axis=3)

                        predict_flow4 = slim.conv2d(pad(concat4),
                                                    2,
                                                    3,
                                                    scope="predict_flow4",
                                                    activation_fn=None)
                        deconv3 = antipad(
                            slim.conv2d_transpose(concat4,
                                                  128,
                                                  4,
                                                  stride=2,
                                                  scope="deconv3"))
                        upsample_flow4to3 = antipad(
                            slim.conv2d_transpose(predict_flow4,
                                                  2,
                                                  4,
                                                  stride=2,
                                                  scope="upsample_flow4to3",
                                                  activation_fn=None))
                        concat3 = tf.concat(
                            [conv3_1, deconv3, upsample_flow4to3], axis=3)

                        predict_flow3 = slim.conv2d(pad(concat3),
                                                    2,
                                                    3,
                                                    scope="predict_flow3",
                                                    activation_fn=None)
                        deconv2 = antipad(
                            slim.conv2d_transpose(concat3,
                                                  64,
                                                  4,
                                                  stride=2,
                                                  scope="deconv2"))
                        upsample_flow3to2 = antipad(
                            slim.conv2d_transpose(predict_flow3,
                                                  2,
                                                  4,
                                                  stride=2,
                                                  scope="upsample_flow3to2",
                                                  activation_fn=None))
                        concat2 = tf.concat(
                            [conv_a_2, deconv2, upsample_flow3to2], axis=3)

                        predict_flow2 = slim.conv2d(pad(concat2),
                                                    2,
                                                    3,
                                                    scope="predict_flow2",
                                                    activation_fn=None)
                    # END: Refinement Network.

                    flow = predict_flow2 * 20.0

                    flow = tf.image.resize_bilinear(flow,
                                                    tf.stack([height, width]),
                                                    align_corners=True)

                    return {
                        "predict_flow6": predict_flow6,
                        "predict_flow5": predict_flow5,
                        "predict_flow4": predict_flow4,
                        "predict_flow3": predict_flow3,
                        "predict_flow2": predict_flow2,
                        "flow": flow,
                    }
Пример #14
0
    def run(self, inputs, trainable=True):
        """Runs model."""
        _, height, width, _ = inputs["input_a"].shape.as_list()
        with tf.variable_scope("FlowNet2"):
            # Forward pass through FlowNetCSS and FlowNetSD with weights frozen.
            net_css_predictions = self.net_css.run(inputs, trainable=False)
            net_sd_predictions = self.net_sd.run(inputs, trainable=False)

            sd_flow_norm = channel_norm(net_sd_predictions["flow"])
            css_flow_norm = channel_norm(net_css_predictions["flow"])

            flow_warp_sd = flow_warp(inputs["input_b"],
                                     net_sd_predictions["flow"])
            img_diff_sd = inputs["input_a"] - flow_warp_sd
            img_diff_sd_norm = channel_norm(img_diff_sd)

            flow_warp_css = flow_warp(inputs["input_b"],
                                      net_css_predictions["flow"])
            img_diff_css = inputs["input_a"] - flow_warp_css
            img_diff_css_norm = channel_norm(img_diff_css)

            input_to_fusion = tf.concat([
                inputs["input_a"], net_sd_predictions["flow"],
                net_css_predictions["flow"], sd_flow_norm, css_flow_norm,
                img_diff_sd_norm, img_diff_css_norm
            ],
                                        axis=3)

            # Fusion Network.
            with slim.arg_scope(
                [slim.conv2d, slim.conv2d_transpose],
                    # Only backprop this network if trainable.
                    trainable=trainable,
                    # He (aka MSRA) weight initialization.
                    weights_initializer=slim.variance_scaling_initializer(),
                    activation_fn=leaky_relu,
                    # We will do our own padding to match the original Caffe code.
                    padding="VALID"):

                weights_regularizer = slim.l2_regularizer(WEIGHT_DECAY)
                with slim.arg_scope([slim.conv2d],
                                    weights_regularizer=weights_regularizer):
                    fuse_conv0 = slim.conv2d(pad(input_to_fusion),
                                             64,
                                             3,
                                             scope="fuse_conv0")
                    fuse_conv1 = slim.conv2d(pad(fuse_conv0),
                                             64,
                                             3,
                                             stride=2,
                                             scope="fuse_conv1")
                    fuse_conv1_1 = slim.conv2d(pad(fuse_conv1),
                                               128,
                                               3,
                                               scope="fuse_conv1_1")
                    fuse_conv2 = slim.conv2d(pad(fuse_conv1_1),
                                             128,
                                             3,
                                             stride=2,
                                             scope="fuse_conv2")
                    fuse_conv2_1 = slim.conv2d(pad(fuse_conv2),
                                               128,
                                               3,
                                               scope="fuse_conv2_1")

                    predict_flow2 = slim.conv2d(pad(fuse_conv2_1),
                                                2,
                                                3,
                                                scope="predict_flow2",
                                                activation_fn=None)
                    fuse_deconv1 = antipad(
                        slim.conv2d_transpose(fuse_conv2_1,
                                              32,
                                              4,
                                              stride=2,
                                              scope="fuse_deconv1"))
                    fuse_upsample_flow2to1 = antipad(
                        slim.conv2d_transpose(predict_flow2,
                                              2,
                                              4,
                                              stride=2,
                                              scope="fuse_upsample_flow2to1",
                                              activation_fn=None))
                    concat1 = tf.concat(
                        [fuse_conv1_1, fuse_deconv1, fuse_upsample_flow2to1],
                        axis=3)
                    fuse_interconv1 = slim.conv2d(pad(concat1),
                                                  32,
                                                  3,
                                                  activation_fn=None,
                                                  scope="fuse_interconv1")

                    predict_flow1 = slim.conv2d(pad(fuse_interconv1),
                                                2,
                                                3,
                                                scope="predict_flow1",
                                                activation_fn=None)
                    fuse_deconv0 = antipad(
                        slim.conv2d_transpose(concat1,
                                              16,
                                              4,
                                              stride=2,
                                              scope="fuse_deconv0"))
                    fuse_upsample_flow1to0 = antipad(
                        slim.conv2d_transpose(predict_flow1,
                                              2,
                                              4,
                                              stride=2,
                                              scope="fuse_upsample_flow1to0",
                                              activation_fn=None))
                    concat0 = tf.concat(
                        [fuse_conv0, fuse_deconv0, fuse_upsample_flow1to0],
                        axis=3)
                    fuse_interconv0 = slim.conv2d(pad(concat0),
                                                  16,
                                                  3,
                                                  activation_fn=None,
                                                  scope="fuse_interconv0")

                    predict_flow0 = slim.conv2d(pad(fuse_interconv0),
                                                2,
                                                3,
                                                activation_fn=None,
                                                scope="predict_flow0")

                    flow = tf.image.resize_bilinear(predict_flow0,
                                                    tf.stack([height, width]),
                                                    align_corners=True)
                    return {
                        "predict_flow0": predict_flow0,
                        "flow": flow,
                    }
Пример #15
0
    def run(self, inputs, trainable=True):
        """Runs model."""
        _, height, width, _ = inputs["input_a"].shape.as_list()
        with tf.variable_scope("FlowNetSD"):
            concat_inputs = tf.concat([inputs["input_a"], inputs["input_b"]],
                                      axis=3)
            with slim.arg_scope(
                [slim.conv2d, slim.conv2d_transpose],
                    # Only backprop this network if trainable.
                    trainable=trainable,
                    # He (aka MSRA) weight initialization.
                    weights_initializer=slim.variance_scaling_initializer(),
                    activation_fn=leaky_relu,
                    # We will do our own padding to match the original Caffe code.
                    padding="VALID"):

                weights_regularizer = slim.l2_regularizer(WEIGHT_DECAY)
                with slim.arg_scope([slim.conv2d],
                                    weights_regularizer=weights_regularizer):
                    conv0 = slim.conv2d(pad(concat_inputs),
                                        64,
                                        3,
                                        scope="conv0")
                    conv1 = slim.conv2d(pad(conv0),
                                        64,
                                        3,
                                        stride=2,
                                        scope="conv1")
                    conv1_1 = slim.conv2d(pad(conv1), 128, 3, scope="conv1_1")
                    conv2 = slim.conv2d(pad(conv1_1),
                                        128,
                                        3,
                                        stride=2,
                                        scope="conv2")
                    conv2_1 = slim.conv2d(pad(conv2), 128, 3, scope="conv2_1")
                    conv3 = slim.conv2d(pad(conv2_1),
                                        256,
                                        3,
                                        stride=2,
                                        scope="conv3")
                    conv3_1 = slim.conv2d(pad(conv3), 256, 3, scope="conv3_1")
                    conv4 = slim.conv2d(pad(conv3_1),
                                        512,
                                        3,
                                        stride=2,
                                        scope="conv4")
                    conv4_1 = slim.conv2d(pad(conv4), 512, 3, scope="conv4_1")
                    conv5 = slim.conv2d(pad(conv4_1),
                                        512,
                                        3,
                                        stride=2,
                                        scope="conv5")
                    conv5_1 = slim.conv2d(pad(conv5), 512, 3, scope="conv5_1")
                    conv6 = slim.conv2d(pad(conv5_1),
                                        1024,
                                        3,
                                        stride=2,
                                        scope="conv6")
                    conv6_1 = slim.conv2d(pad(conv6), 1024, 3, scope="conv6_1")
                    # START: Refinement Network.
                    with slim.arg_scope([slim.conv2d_transpose],
                                        biases_initializer=None):
                        predict_flow6 = slim.conv2d(pad(conv6_1),
                                                    2,
                                                    3,
                                                    scope="predict_flow6",
                                                    activation_fn=None)
                        deconv5 = antipad(
                            slim.conv2d_transpose(conv6_1,
                                                  512,
                                                  4,
                                                  stride=2,
                                                  scope="deconv5"))
                        upsample_flow6to5 = antipad(
                            slim.conv2d_transpose(predict_flow6,
                                                  2,
                                                  4,
                                                  stride=2,
                                                  scope="upsample_flow6to5",
                                                  activation_fn=None))
                        concat5 = tf.concat(
                            [conv5_1, deconv5, upsample_flow6to5], axis=3)
                        interconv5 = slim.conv2d(pad(concat5),
                                                 512,
                                                 3,
                                                 activation_fn=None,
                                                 scope="interconv5")

                        predict_flow5 = slim.conv2d(pad(interconv5),
                                                    2,
                                                    3,
                                                    scope="predict_flow5",
                                                    activation_fn=None)
                        deconv4 = antipad(
                            slim.conv2d_transpose(concat5,
                                                  256,
                                                  4,
                                                  stride=2,
                                                  scope="deconv4"))
                        upsample_flow5to4 = antipad(
                            slim.conv2d_transpose(predict_flow5,
                                                  2,
                                                  4,
                                                  stride=2,
                                                  scope="upsample_flow5to4",
                                                  activation_fn=None))
                        concat4 = tf.concat(
                            [conv4_1, deconv4, upsample_flow5to4], axis=3)
                        interconv4 = slim.conv2d(pad(concat4),
                                                 256,
                                                 3,
                                                 activation_fn=None,
                                                 scope="interconv4")

                        predict_flow4 = slim.conv2d(pad(interconv4),
                                                    2,
                                                    3,
                                                    scope="predict_flow4",
                                                    activation_fn=None)
                        deconv3 = antipad(
                            slim.conv2d_transpose(concat4,
                                                  128,
                                                  4,
                                                  stride=2,
                                                  scope="deconv3"))
                        upsample_flow4to3 = antipad(
                            slim.conv2d_transpose(predict_flow4,
                                                  2,
                                                  4,
                                                  stride=2,
                                                  scope="upsample_flow4to3",
                                                  activation_fn=None))
                        concat3 = tf.concat(
                            [conv3_1, deconv3, upsample_flow4to3], axis=3)
                        interconv3 = slim.conv2d(pad(concat3),
                                                 128,
                                                 3,
                                                 activation_fn=None,
                                                 scope="interconv3")

                        predict_flow3 = slim.conv2d(pad(interconv3),
                                                    2,
                                                    3,
                                                    scope="predict_flow3",
                                                    activation_fn=None)
                        deconv2 = antipad(
                            slim.conv2d_transpose(concat3,
                                                  64,
                                                  4,
                                                  stride=2,
                                                  scope="deconv2"))
                        upsample_flow3to2 = antipad(
                            slim.conv2d_transpose(predict_flow3,
                                                  2,
                                                  4,
                                                  stride=2,
                                                  scope="upsample_flow3to2",
                                                  activation_fn=None))
                        concat2 = tf.concat(
                            [conv2, deconv2, upsample_flow3to2], axis=3)
                        interconv2 = slim.conv2d(pad(concat2),
                                                 64,
                                                 3,
                                                 activation_fn=None,
                                                 scope="interconv2")

                        predict_flow2 = slim.conv2d(pad(interconv2),
                                                    2,
                                                    3,
                                                    scope="predict_flow2",
                                                    activation_fn=None)
                    # END: Refinement Network.

                    flow = predict_flow2 * 0.05
                    flow = tf.image.resize_bilinear(flow,
                                                    tf.stack([height, width]),
                                                    align_corners=True)

                    return {
                        "predict_flow6": predict_flow6,
                        "predict_flow5": predict_flow5,
                        "predict_flow4": predict_flow4,
                        "predict_flow3": predict_flow3,
                        "predict_flow2": predict_flow2,
                        "flow": flow,
                    }