Exemplo n.º 1
0
def resample_feature_map(feat,
                         level,
                         target_level,
                         is_training,
                         target_feat_dims=256,
                         conv2d_op=tf.layers.conv2d,
                         batch_norm_relu=nn_ops.BatchNormRelu(),
                         name=None):
    """Resample input feature map to have target number of channels and width."""
    feat_dims = feat.get_shape().as_list()[3]
    with tf.variable_scope('resample_{}'.format(name)):
        if feat_dims != target_feat_dims:
            feat = conv2d_op(feat,
                             filters=target_feat_dims,
                             kernel_size=(1, 1),
                             padding='same')
            feat = batch_norm_relu(feat,
                                   is_training=is_training,
                                   relu=False,
                                   name='bn')
        if level < target_level:
            stride = int(2**(target_level - level))
            feat = tf.layers.max_pooling2d(inputs=feat,
                                           pool_size=stride,
                                           strides=[stride, stride],
                                           padding='SAME')
        elif level > target_level:
            scale = int(2**(level - target_level))
            feat = spatial_transform_ops.nearest_upsampling(feat, scale=scale)
    return feat
Exemplo n.º 2
0
def resample_with_sepconv(feat,
                          target_width,
                          target_num_filters,
                          use_native_resize_op=False,
                          batch_norm_activation=nn_ops.BatchNormActivation(),
                          data_format='channels_last',
                          name=None,
                          is_training=False):
    """Match resolution and feature dimension to the target block."""
    _, height, width, num_filters = feat.get_shape().as_list()
    if width is None or num_filters is None:
        raise ValueError('Shape of feat is None (shape:{}).'.format(
            feat.shape))

    with tf.variable_scope('resample_with_sepconv_{}'.format(name)):
        # Down-sample.
        if width > target_width:
            if width % target_width != 0:
                raise ValueError('width ({}) is not divisible by '
                                 'target_width ({}).'.format(
                                     width, target_width))

            while width > target_width:
                feat = nn_ops.depthwise_conv2d_fixed_padding(
                    inputs=feat,
                    kernel_size=3,
                    strides=2,
                    data_format=data_format)
                feat = batch_norm_activation(feat, is_training=is_training)
                width /= 2

        # Up-sample with NN interpolation.
        elif width < target_width:
            if target_width % width != 0:
                raise ValueError('target_wdith ({}) is not divisible by '
                                 'width ({}).'.format(target_width, width))
            scale = target_width // width
            if use_native_resize_op:
                feat = tf.image.resize_nearest_neighbor(
                    feat, [height * scale, width * scale])
            else:
                feat = spatial_transform_ops.nearest_upsampling(feat,
                                                                scale=scale)

        # Match feature dimension to the target block.
        feat = nn_ops.conv2d_fixed_padding(inputs=feat,
                                           filters=target_num_filters,
                                           kernel_size=1,
                                           strides=1,
                                           data_format=data_format)
        feat = batch_norm_activation(feat, relu=False, is_training=is_training)

    return feat
Exemplo n.º 3
0
  def __call__(self, multilevel_features, is_training=None):
    """Returns the FPN features for a given multilevel features.

    Args:
      multilevel_features: a `dict` containing `int` keys for continuous feature
        levels, e.g., [2, 3, 4, 5]. The values are corresponding features with
        shape [batch_size, height_l, width_l, num_filters].
      is_training: `bool` if True, the model is in training mode.

    Returns:
      a `dict` containing `int` keys for continuous feature levels
      [min_level, min_level + 1, ..., max_level]. The values are corresponding
      FPN features with shape [batch_size, height_l, width_l, fpn_feat_dims].
    """
    input_levels = list(multilevel_features.keys())
    if min(input_levels) > self._min_level:
      raise ValueError(
          'The minimum backbone level %d should be '%(min(input_levels)) +
          'less or equal to FPN minimum level %d.:'%(self._min_level))
    backbone_max_level = min(max(input_levels), self._max_level)
    with backend.get_graph().as_default(), tf.name_scope('fpn'):
      # Adds lateral connections.
      feats_lateral = {}
      for level in range(self._min_level, backbone_max_level + 1):
        feats_lateral[level] = self._lateral_conv2d_op[level](
            multilevel_features[level])

      # Adds top-down path.
      feats = {backbone_max_level: feats_lateral[backbone_max_level]}
      for level in range(backbone_max_level - 1, self._min_level - 1, -1):
        feats[level] = spatial_transform_ops.nearest_upsampling(
            feats[level + 1], 2) + feats_lateral[level]

      # Adds post-hoc 3x3 convolution kernel.
      for level in range(self._min_level, backbone_max_level + 1):
        feats[level] = self._post_hoc_conv2d_op[level](feats[level])

      # Adds coarser FPN levels introduced for RetinaNet.
      for level in range(backbone_max_level + 1, self._max_level + 1):
        feats_in = feats[level - 1]
        if level > backbone_max_level + 1:
          feats_in = tf.nn.relu(feats_in)
        feats[level] = self._coarse_conv2d_op[level](feats_in)
      if self._use_batch_norm:
        # Adds batch_norm layer.
        for level in range(self._min_level, self._max_level + 1):
          feats[level] = self._batch_norm_relus[level](
              feats[level], is_training=is_training)
    return feats
Exemplo n.º 4
0
def pyramid_feature_fusion(pyramid_feats, target_level):
  """Fuse all feature maps in the feature pyramid at the target level.

  Args:
    pyramid_feats: a dictionary containing the feature pyramid.
    target_level: `int` the target feature level for feature fusion.

  Returns:
    A float Tensor of shape [batch_size, feature_height, feature_width,
      feature_channel].
  """
  min_level, max_level = min(pyramid_feats.keys()), max(pyramid_feats.keys())
  resampled_feats = []

  for l in range(min_level, max_level + 1):
    if l == target_level:
      resampled_feats.append(pyramid_feats[l])
    else:
      resampled_feat = spatial_transform_ops.nearest_upsampling(
          pyramid_feats[l], 2**(l - target_level))
      resampled_feats.append(resampled_feat)

  return tf.math.add_n(resampled_feats)
Exemplo n.º 5
0
def resample_with_alpha(feat,
                        input_block_fn,
                        target_width,
                        target_num_filters,
                        target_block_fn,
                        alpha=1.0,
                        use_native_resize_op=False,
                        batch_norm_activation=nn_ops.BatchNormActivation(),
                        data_format='channels_last',
                        name=None,
                        is_training=False):
    """Match resolution and feature dimension to the target block."""
    _, height, width, num_filters = feat.get_shape().as_list()
    if width is None or num_filters is None:
        raise ValueError('Shape of feat is None (shape:{}).'.format(
            feat.shape))

    if input_block_fn == 'bottleneck':
        num_filters /= 4
    new_num_filters = int(num_filters * alpha)

    with tf.variable_scope('resample_with_alpha_{}'.format(name)):
        # First 1x1 conv to reduce feature dimension to alpha*.
        feat = nn_ops.conv2d_fixed_padding(inputs=feat,
                                           filters=new_num_filters,
                                           kernel_size=1,
                                           strides=1,
                                           data_format=data_format)
        feat = batch_norm_activation(feat, is_training=is_training)

        # Down-sample.
        if width > target_width:
            # Apply stride-2 conv to reduce feature map size to 1/2.
            feat = nn_ops.conv2d_fixed_padding(inputs=feat,
                                               filters=new_num_filters,
                                               kernel_size=3,
                                               strides=2,
                                               data_format=data_format)
            feat = batch_norm_activation(feat, is_training=is_training)
            # Apply maxpool to further reduce feature map size if necessary.
            if width // target_width > 2:
                if width % target_width != 0:
                    stride_size = 2
                else:
                    stride_size = width // target_width // 2
                feat = tf.layers.max_pooling2d(
                    inputs=feat,
                    pool_size=3 if width / target_width <= 4 else 5,
                    strides=stride_size,
                    padding='SAME',
                    data_format=data_format)
            # Use NN interpolation to resize if necessary. This could happen in cases
            # where `wdith` is not divisible by `target_width`.
            if feat.get_shape().as_list()[2] != target_width:
                feat = spatial_transform_ops.native_resize(
                    feat, [int(target_width / width * height), target_width])
        # Up-sample with NN interpolation.
        elif width < target_width:
            if target_width % width != 0 or use_native_resize_op:
                feat = spatial_transform_ops.native_resize(
                    feat, [int(target_width / width * height), target_width])
            else:
                scale = target_width // width
                feat = spatial_transform_ops.nearest_upsampling(feat,
                                                                scale=scale)

        # Match feature dimension to the target block.
        if target_block_fn == 'bottleneck':
            target_num_filters *= 4
        feat = nn_ops.conv2d_fixed_padding(inputs=feat,
                                           filters=target_num_filters,
                                           kernel_size=1,
                                           strides=1,
                                           data_format=data_format)
        feat = batch_norm_activation(feat, relu=False, is_training=is_training)

    return feat
Exemplo n.º 6
0
    def __call__(self, multilevel_features, is_training=False):
        """Returns the FPN features for a given multilevel features.

    Args:
      multilevel_features: a `dict` containing `int` keys for continuous feature
        levels, e.g., [2, 3, 4, 5]. The values are corresponding features with
        shape [batch_size, height_l, width_l, num_filters].
      is_training: `bool` if True, the model is in training mode.

    Returns:
      a `dict` containing `int` keys for continuous feature levels
      [min_level, min_level + 1, ..., max_level]. The values are corresponding
      FPN features with shape [batch_size, height_l, width_l, fpn_feat_dims].
    """
        input_levels = multilevel_features.keys()
        if min(input_levels) > self._min_level:
            raise ValueError('The minimum backbone level %d should be ' %
                             (min(input_levels)) +
                             'less or equal to FPN minimum level %d.:' %
                             (self._min_level))
        backbone_max_level = min(max(input_levels), self._max_level)
        with tf.variable_scope('fpn'):
            # Adds lateral connections.
            feats_lateral = {}
            for level in range(self._min_level, backbone_max_level + 1):
                feats_lateral[level] = self._conv2d_op(
                    multilevel_features[level],
                    filters=self._fpn_feat_dims,
                    kernel_size=(1, 1),
                    padding='same',
                    name='l%d' % level)

            # Adds top-down path.
            feats = {backbone_max_level: feats_lateral[backbone_max_level]}
            for level in range(backbone_max_level - 1, self._min_level - 1,
                               -1):
                feats[level] = spatial_transform_ops.nearest_upsampling(
                    feats[level + 1], 2) + feats_lateral[level]

            # Adds post-hoc 3x3 convolution kernel.
            for level in range(self._min_level, backbone_max_level + 1):
                feats[level] = self._conv2d_op(feats[level],
                                               filters=self._fpn_feat_dims,
                                               strides=(1, 1),
                                               kernel_size=(3, 3),
                                               padding='same',
                                               name='post_hoc_d%d' % level)

            # Adds coarser FPN levels introduced for RetinaNet.
            for level in range(backbone_max_level + 1, self._max_level + 1):
                feats_in = feats[level - 1]
                if level > backbone_max_level + 1:
                    feats_in = tf.nn.relu(feats_in)
                feats[level] = self._conv2d_op(feats_in,
                                               filters=self._fpn_feat_dims,
                                               strides=(2, 2),
                                               kernel_size=(3, 3),
                                               padding='same',
                                               name='p%d' % level)
            # Adds batch_norm layer.
            for level in range(self._min_level, self._max_level + 1):
                feats[level] = self._batch_norm_relu(feats[level],
                                                     relu=False,
                                                     is_training=is_training,
                                                     name='p%d-bn' % level)
        return feats