예제 #1
0
def resnet_v1_101(inputs, output_stride=8, is_training=True):

    blocks = [
        resnet_v1.resnet_v1_block('block1',
                                  base_depth=64,
                                  num_units=3,
                                  stride=1),
        resnet_v1.resnet_v1_block('block2',
                                  base_depth=128,
                                  num_units=4,
                                  stride=2),
        resnet_v1.resnet_v1_block('block3',
                                  base_depth=256,
                                  num_units=23,
                                  stride=2),
        resnet_v1.resnet_v1_block('block4',
                                  base_depth=512,
                                  num_units=3,
                                  stride=2),
    ]

    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        with tf.variable_scope('resnet_v1_101', 'resnet_v1', [inputs]) as sc:
            end_points_collection = sc.original_name_scope + '_end_points'
            with slim.arg_scope([
                    slim.conv2d, resnet_v1.bottleneck,
                    resnet_utils.stack_blocks_dense
            ],
                                outputs_collections=end_points_collection):
                with slim.arg_scope([slim.batch_norm],
                                    is_training=is_training):
                    net = inputs
                    output_stride /= 4
                    net = resnet_utils.conv2d_same(net,
                                                   64,
                                                   7,
                                                   stride=2,
                                                   scope='conv1')
                    net = slim.max_pool2d(net, [2, 2], stride=2, scope='pool1')
                    net = resnet_utils.stack_blocks_dense(
                        net, blocks, output_stride)
                    # Convert end_points_collection into a dictionary of end_points.
                    end_points = slim.utils.convert_collection_to_dict(
                        end_points_collection)

        outputs = {}
        outputs['conv1'] = end_points['resnet_v1_101/conv1']
        outputs['conv2'] = end_points['resnet_v1_101/block1']
        outputs['conv3'] = end_points['resnet_v1_101/block2']
        outputs['conv4'] = end_points['resnet_v1_101/block3']
        outputs['conv5'] = end_points['resnet_v1_101/block4']

    return outputs
예제 #2
0
def resnet_v2_50(inputs, is_training=True):

    blocks = [
        resnet_v2.resnet_v2_block('block1',
                                  base_depth=64,
                                  num_units=3,
                                  stride=2),
        resnet_v2.resnet_v2_block('block2',
                                  base_depth=128,
                                  num_units=4,
                                  stride=2),
        resnet_v2.resnet_v2_block('block3',
                                  base_depth=256,
                                  num_units=6,
                                  stride=2),
        resnet_v2.resnet_v2_block('block4',
                                  base_depth=512,
                                  num_units=3,
                                  stride=1),
    ]
    with slim.arg_scope(resnet_v2.resnet_arg_scope()):
        with tf.variable_scope('resnet_v2_50', 'resnet_v2', [inputs]):
            with slim.arg_scope([
                    slim.conv2d, resnet_v2.bottleneck,
                    resnet_utils.stack_blocks_dense
            ]):
                with slim.arg_scope([slim.batch_norm],
                                    is_training=is_training):
                    net = inputs
                    with slim.arg_scope([slim.conv2d],
                                        activation_fn=None,
                                        normalizer_fn=None):
                        net = resnet_utils.conv2d_same(net,
                                                       64,
                                                       7,
                                                       stride=2,
                                                       scope='conv1')
                    net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
                    net = resnet_utils.stack_blocks_dense(net, blocks)
                    # This is needed because the pre-activation variant does not have batch
                    # normalization or activation functions in the residual unit output. See
                    # Appendix of [2].
                    net = slim.batch_norm(net,
                                          activation_fn=nn_ops.relu,
                                          scope='postnorm')
                    net = tf.reduce_mean(net, [1, 2],
                                         name='pool5',
                                         keepdims=True)

    return net
예제 #3
0
def resnet_v1_50(inputs, is_training=True):

    blocks = [
        resnet_v1.resnet_v1_block('block1',
                                  base_depth=64,
                                  num_units=3,
                                  stride=2),
        resnet_v1.resnet_v1_block('block2',
                                  base_depth=128,
                                  num_units=4,
                                  stride=2),
        resnet_v1.resnet_v1_block('block3',
                                  base_depth=256,
                                  num_units=6,
                                  stride=2),
        resnet_v1.resnet_v1_block('block4',
                                  base_depth=512,
                                  num_units=3,
                                  stride=1),
    ]

    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        with tf.variable_scope('resnet_v1_50', 'resnet_v1', [inputs]):
            with slim.arg_scope([
                    slim.conv2d, resnet_v1.bottleneck,
                    resnet_utils.stack_blocks_dense
            ]):
                with slim.arg_scope([slim.batch_norm],
                                    is_training=is_training):
                    net = inputs
                    net = resnet_utils.conv2d_same(net,
                                                   64,
                                                   7,
                                                   stride=2,
                                                   scope='conv1')
                    net = slim.max_pool2d(net, [2, 2], stride=2, scope='pool1')
                    net = resnet_utils.stack_blocks_dense(net, blocks)
                    net = tf.reduce_mean(net, [1, 2],
                                         name='pool5',
                                         keepdims=True)

    return net
예제 #4
0
def resnet_v1(inputs,
              blocks,
              num_classes=None,
              is_training=True,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              reuse=None,
              dropout=False,
              scope=None):
    """Generator for v1 ResNet models.

  This function generates a family of ResNet v1 models. See the resnet_v1_*()
  methods for specific model instantiations, obtained by selecting different
  block instantiations that produce ResNets of various depths.

  Training for image classification on Imagenet is usually done with [224, 224]
  inputs, resulting in [7, 7] feature maps at the output of the last ResNet
  block for the ResNets defined in [1] that have nominal stride equal to 32.
  However, for dense prediction tasks we advise that one uses inputs with
  spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
  this case the feature maps at the ResNet output will have spatial shape
  [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
  and corners exactly aligned with the input image corners, which greatly
  facilitates alignment of the features to the image. Using as input [225, 225]
  images results in [8, 8] feature maps at the output of the last ResNet block.

  For dense prediction tasks, the ResNet needs to run in fully-convolutional
  (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
  have nominal stride equal to 32 and a good choice in FCN mode is to use
  output_stride=16 in order to increase the density of the computed features at
  small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.

  Args:
    inputs: A tensor of size [batch, height_in, width_in, channels].
    blocks: A list of length equal to the number of ResNet blocks. Each element
      is a resnet_utils.Block object describing the units in the block.
    num_classes: Number of predicted classes for classification tasks. If None
      we return the features before the logit layer.
    is_training: whether batch_norm layers are in training mode.
    global_pool: If True, we perform global average pooling before computing the
      logits. Set to True for image classification, False for dense prediction.
    output_stride: If None, then the output will be computed at the nominal
      network stride. If output_stride is not None, it specifies the requested
      ratio of input to output spatial resolution.
    include_root_block: If True, include the initial convolution followed by
      max-pooling, if False excludes it.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.

  Returns:
    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
      If global_pool is False, then height_out and width_out are reduced by a
      factor of output_stride compared to the respective height_in and width_in,
      else both height_out and width_out equal one. If num_classes is None, then
      net is the output of the last ResNet block, potentially after global
      average pooling. If num_classes is not None, net contains the pre-softmax
      activations.
    end_points: A dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: If the target output_stride is not valid.
  """
    with variable_scope.variable_scope(scope,
                                       'resnet_v1', [inputs],
                                       reuse=reuse) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with arg_scope(
            [layers.conv2d, bottleneck, resnet_utils.stack_blocks_dense],
                outputs_collections=end_points_collection):
            with arg_scope([layers.batch_norm], is_training=is_training):
                net = inputs
                if include_root_block:
                    if output_stride is not None:
                        if output_stride % 4 != 0:
                            raise ValueError(
                                'The output_stride needs to be a multiple of 4.'
                            )
                        output_stride /= 4
                    net = resnet_utils.conv2d_same(net,
                                                   64,
                                                   7,
                                                   stride=2,
                                                   scope='conv1')
                    net = layers_lib.max_pool2d(net, [3, 3],
                                                stride=2,
                                                scope='pool1')
                net = resnet_utils.stack_blocks_dense(net,
                                                      blocks,
                                                      output_stride,
                                                      dropout=dropout)
                if global_pool:
                    # Global average pooling.
                    net = math_ops.reduce_mean(net, [1, 2],
                                               name='pool5',
                                               keep_dims=True)
                if num_classes is not None:
                    net = layers.conv2d(net,
                                        num_classes, [1, 1],
                                        activation_fn=None,
                                        normalizer_fn=None,
                                        scope='logits')
                # Convert end_points_collection into a dictionary of end_points.
                end_points = utils.convert_collection_to_dict(
                    end_points_collection)
                if num_classes is not None:
                    end_points['predictions'] = layers_lib.softmax(
                        net, scope='predictions')
                return net, end_points