def resnet_v1_101(inputs, output_stride=8, is_training=True): blocks = [ resnet_v1.resnet_v1_block('block1', base_depth=64, num_units=3, stride=1), resnet_v1.resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1.resnet_v1_block('block3', base_depth=256, num_units=23, stride=2), resnet_v1.resnet_v1_block('block4', base_depth=512, num_units=3, stride=2), ] with slim.arg_scope(resnet_v1.resnet_arg_scope()): with tf.variable_scope('resnet_v1_101', 'resnet_v1', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' with slim.arg_scope([ slim.conv2d, resnet_v1.bottleneck, resnet_utils.stack_blocks_dense ], outputs_collections=end_points_collection): with slim.arg_scope([slim.batch_norm], is_training=is_training): net = inputs output_stride /= 4 net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') net = slim.max_pool2d(net, [2, 2], stride=2, scope='pool1') net = resnet_utils.stack_blocks_dense( net, blocks, output_stride) # Convert end_points_collection into a dictionary of end_points. end_points = slim.utils.convert_collection_to_dict( end_points_collection) outputs = {} outputs['conv1'] = end_points['resnet_v1_101/conv1'] outputs['conv2'] = end_points['resnet_v1_101/block1'] outputs['conv3'] = end_points['resnet_v1_101/block2'] outputs['conv4'] = end_points['resnet_v1_101/block3'] outputs['conv5'] = end_points['resnet_v1_101/block4'] return outputs
def resnet_v2_50(inputs, is_training=True): blocks = [ resnet_v2.resnet_v2_block('block1', base_depth=64, num_units=3, stride=2), resnet_v2.resnet_v2_block('block2', base_depth=128, num_units=4, stride=2), resnet_v2.resnet_v2_block('block3', base_depth=256, num_units=6, stride=2), resnet_v2.resnet_v2_block('block4', base_depth=512, num_units=3, stride=1), ] with slim.arg_scope(resnet_v2.resnet_arg_scope()): with tf.variable_scope('resnet_v2_50', 'resnet_v2', [inputs]): with slim.arg_scope([ slim.conv2d, resnet_v2.bottleneck, resnet_utils.stack_blocks_dense ]): with slim.arg_scope([slim.batch_norm], is_training=is_training): net = inputs with slim.arg_scope([slim.conv2d], activation_fn=None, normalizer_fn=None): net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') net = resnet_utils.stack_blocks_dense(net, blocks) # This is needed because the pre-activation variant does not have batch # normalization or activation functions in the residual unit output. See # Appendix of [2]. net = slim.batch_norm(net, activation_fn=nn_ops.relu, scope='postnorm') net = tf.reduce_mean(net, [1, 2], name='pool5', keepdims=True) return net
def resnet_v1_50(inputs, is_training=True): blocks = [ resnet_v1.resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1.resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1.resnet_v1_block('block3', base_depth=256, num_units=6, stride=2), resnet_v1.resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), ] with slim.arg_scope(resnet_v1.resnet_arg_scope()): with tf.variable_scope('resnet_v1_50', 'resnet_v1', [inputs]): with slim.arg_scope([ slim.conv2d, resnet_v1.bottleneck, resnet_utils.stack_blocks_dense ]): with slim.arg_scope([slim.batch_norm], is_training=is_training): net = inputs net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') net = slim.max_pool2d(net, [2, 2], stride=2, scope='pool1') net = resnet_utils.stack_blocks_dense(net, blocks) net = tf.reduce_mean(net, [1, 2], name='pool5', keepdims=True) return net
def resnet_v1(inputs, blocks, num_classes=None, is_training=True, global_pool=True, output_stride=None, include_root_block=True, reuse=None, dropout=False, scope=None): """Generator for v1 ResNet models. This function generates a family of ResNet v1 models. See the resnet_v1_*() methods for specific model instantiations, obtained by selecting different block instantiations that produce ResNets of various depths. Training for image classification on Imagenet is usually done with [224, 224] inputs, resulting in [7, 7] feature maps at the output of the last ResNet block for the ResNets defined in [1] that have nominal stride equal to 32. However, for dense prediction tasks we advise that one uses inputs with spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In this case the feature maps at the ResNet output will have spatial shape [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1] and corners exactly aligned with the input image corners, which greatly facilitates alignment of the features to the image. Using as input [225, 225] images results in [8, 8] feature maps at the output of the last ResNet block. For dense prediction tasks, the ResNet needs to run in fully-convolutional (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all have nominal stride equal to 32 and a good choice in FCN mode is to use output_stride=16 in order to increase the density of the computed features at small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915. Args: inputs: A tensor of size [batch, height_in, width_in, channels]. blocks: A list of length equal to the number of ResNet blocks. Each element is a resnet_utils.Block object describing the units in the block. num_classes: Number of predicted classes for classification tasks. If None we return the features before the logit layer. is_training: whether batch_norm layers are in training mode. global_pool: If True, we perform global average pooling before computing the logits. Set to True for image classification, False for dense prediction. output_stride: If None, then the output will be computed at the nominal network stride. If output_stride is not None, it specifies the requested ratio of input to output spatial resolution. include_root_block: If True, include the initial convolution followed by max-pooling, if False excludes it. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. If global_pool is False, then height_out and width_out are reduced by a factor of output_stride compared to the respective height_in and width_in, else both height_out and width_out equal one. If num_classes is None, then net is the output of the last ResNet block, potentially after global average pooling. If num_classes is not None, net contains the pre-softmax activations. end_points: A dictionary from components of the network to the corresponding activation. Raises: ValueError: If the target output_stride is not valid. """ with variable_scope.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc: end_points_collection = sc.original_name_scope + '_end_points' with arg_scope( [layers.conv2d, bottleneck, resnet_utils.stack_blocks_dense], outputs_collections=end_points_collection): with arg_scope([layers.batch_norm], is_training=is_training): net = inputs if include_root_block: if output_stride is not None: if output_stride % 4 != 0: raise ValueError( 'The output_stride needs to be a multiple of 4.' ) output_stride /= 4 net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') net = layers_lib.max_pool2d(net, [3, 3], stride=2, scope='pool1') net = resnet_utils.stack_blocks_dense(net, blocks, output_stride, dropout=dropout) if global_pool: # Global average pooling. net = math_ops.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) if num_classes is not None: net = layers.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') # Convert end_points_collection into a dictionary of end_points. end_points = utils.convert_collection_to_dict( end_points_collection) if num_classes is not None: end_points['predictions'] = layers_lib.softmax( net, scope='predictions') return net, end_points