示例#1
0
    def _GetAttentionModel(
            self,
            images,
            num_classes,
            weight_decay=0.0001,
            attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
            attention_type=_SUPPORTED_ATTENTION_TYPES[0],
            kernel=1,
            training_resnet=False,
            training_attention=False,
            reuse=False):
        """Constructs attention model on resnet_v1_50.

    Args:
      images: A tensor of size [batch, height, width, channels]
      num_classes: The number of output classes.
      weight_decay: The parameters for weight_decay regularizer.
      attention_nonlinear: Type of non-linearity on top of the attention
        function.
      attention_type: Type of the attention structure.
      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
      training_resnet: Whether or not the Resnet blocks from the model are in
        training mode.
      training_attention: Whether or not the attention part of the model is in
        training mode.
      reuse: Whether or not the layer and its variables should be reused.

    Returns:
      logits: A tensor of size [batch, num_classes].
      attention_prob: Attention score after the non-linearity.
      attention_score: Attention score before the non-linearity.
      feature_map: Features extracted from the model, which are not
        l2-normalized.
    """

        attention_feat, attention_prob, attention_score, feature_map, _ = (
            self.GetAttentionPrelogit(images,
                                      weight_decay,
                                      attention_nonlinear=attention_nonlinear,
                                      attention_type=attention_type,
                                      kernel=kernel,
                                      training_resnet=training_resnet,
                                      training_attention=training_attention,
                                      reuse=reuse))
        with arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay,
                                           batch_norm_scale=True)):
            with arg_scope([layers.batch_norm],
                           is_training=training_attention):
                with tf.compat.v1.variable_scope(_ATTENTION_VARIABLE_SCOPE,
                                                 values=[attention_feat],
                                                 reuse=reuse):
                    logits = layers.conv2d(attention_feat,
                                           num_classes, [1, 1],
                                           activation_fn=None,
                                           normalizer_fn=None,
                                           scope='logits')
                    logits = tf.squeeze(logits, [1, 2], name='spatial_squeeze')
        return logits, attention_prob, attention_score, feature_map
示例#2
0
    def GetAttentionPrelogit(
            self,
            images,
            weight_decay=0.0001,
            attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
            attention_type=_SUPPORTED_ATTENTION_TYPES[0],
            kernel=1,
            training_resnet=False,
            training_attention=False,
            reuse=False,
            use_batch_norm=True):
        """Constructs attention model on resnet_v1_50.

    Args:
      images: A tensor of size [batch, height, width, channels].
      weight_decay: The parameters for weight_decay regularizer.
      attention_nonlinear: Type of non-linearity on top of the attention
        function.
      attention_type: Type of the attention structure.
      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
      training_resnet: Whether or not the Resnet blocks from the model are in
        training mode.
      training_attention: Whether or not the attention part of the model is in
        training mode.
      reuse: Whether or not the layer and its variables should be reused.
      use_batch_norm: Whether or not to use batch normalization.

    Returns:
      prelogits: A tensor of size [batch, 1, 1, channels].
      attention_prob: Attention score after the non-linearity.
      attention_score: Attention score before the non-linearity.
      feature_map: Features extracted from the model, which are not
        l2-normalized.
      end_points: Set of activations for external use.
    """
        # Construct Resnet50 features.
        with arg_scope(
                resnet_v1.resnet_arg_scope(use_batch_norm=use_batch_norm)):
            _, end_points = self.GetResnet50Subnetwork(
                images, is_training=training_resnet, reuse=reuse)

        feature_map = end_points[self._target_layer_type]

        # Construct attention subnetwork on top of features.
        with arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay,
                                           use_batch_norm=use_batch_norm)):
            with arg_scope([layers.batch_norm],
                           is_training=training_attention):
                (prelogits, attention_prob, attention_score,
                 end_points) = self._GetAttentionSubnetwork(
                     feature_map,
                     end_points,
                     attention_nonlinear=attention_nonlinear,
                     attention_type=attention_type,
                     kernel=kernel,
                     reuse=reuse)

        return prelogits, attention_prob, attention_score, feature_map, end_points
示例#3
0
def alexnet_v2_arg_scope(weight_decay=0.0005):
    with arg_scope(
        [layers.conv2d, layers_lib.fully_connected],
            activation_fn=nn_ops.relu,
            biases_initializer=init_ops.constant_initializer(0.1),
            weights_regularizer=regularizers.l2_regularizer(weight_decay)):
        with arg_scope([layers.conv2d], padding='SAME'):
            with arg_scope([layers_lib.max_pool2d], padding='VALID') as arg_sc:
                return arg_sc
示例#4
0
def inception_v1_arg_scope(weight_decay=0.00004,
                           use_batch_norm=True,
                           batch_norm_var_collection='moving_vars'):
    """Defines the default InceptionV1 arg scope.

  Note: Althougth the original paper didn't use batch_norm we found it useful.

  Args:
    weight_decay: The weight decay to use for regularizing the model.
    use_batch_norm: "If `True`, batch_norm is applied after each convolution.
    batch_norm_var_collection: The name of the collection for the batch norm
      variables.

  Returns:
    An `arg_scope` to use for the inception v3 model.
  """
    batch_norm_params = {
        # Decay for the moving averages.
        'decay': 0.9997,
        # epsilon to prevent 0s in variance.
        'epsilon': 0.001,
        # collection containing update_ops.
        'updates_collections': ops.GraphKeys.UPDATE_OPS,
        # collection containing the moving mean and moving variance.
        'variables_collections': {
            'beta': None,
            'gamma': None,
            'moving_mean': [batch_norm_var_collection],
            'moving_variance': [batch_norm_var_collection],
        }
    }
    if use_batch_norm:
        normalizer_fn = layers_lib.batch_norm
        normalizer_params = batch_norm_params
    else:
        normalizer_fn = None
        normalizer_params = {}
    # Set weight_decay for weights in Conv and FC layers.
    with arg_scope(
        [layers.conv2d, layers_lib.fully_connected],
            weights_regularizer=regularizers.l2_regularizer(weight_decay)):
        with arg_scope([layers.conv2d],
                       weights_initializer=initializers.
                       variance_scaling_initializer(),
                       activation_fn=nn_ops.relu,
                       normalizer_fn=normalizer_fn,
                       normalizer_params=normalizer_params) as sc:
            return sc
示例#5
0
 def testEndPointsV1(self):
     """Test the end points of a tiny v1 bottleneck network."""
     blocks = [
         resnet_v1.resnet_v1_block('block1',
                                   base_depth=1,
                                   num_units=2,
                                   stride=2),
         resnet_v1.resnet_v1_block('block2',
                                   base_depth=2,
                                   num_units=2,
                                   stride=1),
     ]
     inputs = create_input(2, 32, 16, 3)
     with arg_scope(resnet_utils.resnet_arg_scope()):
         _, end_points = self._resnet_plain(inputs, blocks, scope='tiny')
     expected = [
         'tiny/block1/unit_1/bottleneck_v1/shortcut',
         'tiny/block1/unit_1/bottleneck_v1/conv1',
         'tiny/block1/unit_1/bottleneck_v1/conv2',
         'tiny/block1/unit_1/bottleneck_v1/conv3',
         'tiny/block1/unit_2/bottleneck_v1/conv1',
         'tiny/block1/unit_2/bottleneck_v1/conv2',
         'tiny/block1/unit_2/bottleneck_v1/conv3',
         'tiny/block2/unit_1/bottleneck_v1/shortcut',
         'tiny/block2/unit_1/bottleneck_v1/conv1',
         'tiny/block2/unit_1/bottleneck_v1/conv2',
         'tiny/block2/unit_1/bottleneck_v1/conv3',
         'tiny/block2/unit_2/bottleneck_v1/conv1',
         'tiny/block2/unit_2/bottleneck_v1/conv2',
         'tiny/block2/unit_2/bottleneck_v1/conv3'
     ]
     self.assertItemsEqual(expected, end_points)
示例#6
0
 def testAtrousFullyConvolutionalValues(self):
     """Verify dense feature extraction with atrous convolution."""
     nominal_stride = 32
     for output_stride in [4, 8, 16, 32, None]:
         with arg_scope(resnet_utils.resnet_arg_scope()):
             with ops.Graph().as_default():
                 with self.cached_session() as sess:
                     random_seed.set_random_seed(0)
                     inputs = create_input(2, 81, 81, 3)
                     # Dense feature extraction followed by subsampling.
                     output, _ = self._resnet_small(
                         inputs,
                         None,
                         is_training=False,
                         global_pool=False,
                         output_stride=output_stride)
                     if output_stride is None:
                         factor = 1
                     else:
                         factor = nominal_stride // output_stride
                     output = resnet_utils.subsample(output, factor)
                     # Make the two networks use the same weights.
                     variable_scope.get_variable_scope().reuse_variables()
                     # Feature extraction at the nominal network rate.
                     expected, _ = self._resnet_small(inputs,
                                                      None,
                                                      is_training=False,
                                                      global_pool=False)
                     sess.run(variables.global_variables_initializer())
                     self.assertAllClose(output.eval(),
                                         expected.eval(),
                                         atol=2e-4,
                                         rtol=1e-4)
示例#7
0
def vgg_arg_scope(weight_decay=0.0005):
    """Defines the VGG arg scope.

  Args:
    weight_decay: The l2 regularization coefficient.

  Returns:
    An arg_scope.
  """
    with arg_scope(
        [layers.conv2d, layers_lib.fully_connected],
            activation_fn=nn_ops.relu,
            weights_regularizer=regularizers.l2_regularizer(weight_decay),
            biases_initializer=init_ops.zeros_initializer()):
        with arg_scope([layers.conv2d], padding='SAME') as arg_sc:
            return arg_sc
示例#8
0
    def testAtrousValuesBottleneck(self):
        """Verify the values of dense feature extraction by atrous convolution.

    Make sure that dense feature extraction by stack_blocks_dense() followed by
    subsampling gives identical results to feature extraction at the nominal
    network output stride using the simple self._stack_blocks_nondense() above.
    """
        block = resnet_v1.resnet_v1_block
        blocks = [
            block('block1', base_depth=1, num_units=2, stride=2),
            block('block2', base_depth=2, num_units=2, stride=2),
            block('block3', base_depth=4, num_units=2, stride=2),
            block('block4', base_depth=8, num_units=2, stride=1),
        ]
        nominal_stride = 8

        # Test both odd and even input dimensions.
        height = 30
        width = 31
        with arg_scope(resnet_utils.resnet_arg_scope()):
            with arg_scope([layers.batch_norm], is_training=False):
                for output_stride in [1, 2, 4, 8, None]:
                    with ops.Graph().as_default():
                        with self.cached_session() as sess:
                            random_seed.set_random_seed(0)
                            inputs = create_input(1, height, width, 3)
                            # Dense feature extraction followed by subsampling.
                            output = resnet_utils.stack_blocks_dense(
                                inputs, blocks, output_stride)
                            if output_stride is None:
                                factor = 1
                            else:
                                factor = nominal_stride // output_stride

                            output = resnet_utils.subsample(output, factor)
                            # Make the two networks use the same weights.
                            variable_scope.get_variable_scope(
                            ).reuse_variables()
                            # Feature extraction at the nominal network rate.
                            expected = self._stack_blocks_nondense(
                                inputs, blocks)
                            sess.run(variables.global_variables_initializer())
                            output, expected = sess.run([output, expected])
                            self.assertAllClose(output,
                                                expected,
                                                atol=1e-4,
                                                rtol=1e-4)
示例#9
0
 def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None):
     """A plain ResNet without extra layers before or after the ResNet blocks."""
     with variable_scope.variable_scope(scope, values=[inputs]):
         with arg_scope([layers.conv2d], outputs_collections='end_points'):
             net = resnet_utils.stack_blocks_dense(inputs, blocks,
                                                   output_stride)
             end_points = utils.convert_collection_to_dict('end_points')
             return net, end_points
示例#10
0
def resnet_arg_scope(weight_decay=0.0001,
                     batch_norm_decay=0.997,
                     batch_norm_epsilon=1e-5,
                     batch_norm_scale=True):
    """Defines the default ResNet arg scope.

    TODO(gpapan): The batch-normalization related default values above are
      appropriate for use in conjunction with the reference ResNet models
      released at https://github.com/KaimingHe/deep-residual-networks. When
      training ResNets from scratch, they might need to be tuned.

    Args:
      weight_decay: The weight decay to use for regularizing the model.
      batch_norm_decay: The moving average decay when estimating layer activation
        statistics in batch normalization.
      batch_norm_epsilon: Small constant to prevent division by zero when
        normalizing activations by their variance in batch normalization.
      batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
        activations in the batch normalization layer.

    Returns:
      An `arg_scope` to use for the resnet models.
    """
    batch_norm_params = {
        'decay': batch_norm_decay,
        'epsilon': batch_norm_epsilon,
        'scale': batch_norm_scale,
        'updates_collections': ops.GraphKeys.UPDATE_OPS,
    }

    with arg_scope(
        [layers_lib.conv2d],
            weights_regularizer=regularizers.l2_regularizer(weight_decay),
            weights_initializer=initializers.variance_scaling_initializer(),
            activation_fn=nn_ops.relu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=batch_norm_params):
        with arg_scope([layers.batch_norm], **batch_norm_params):
            # The following implies padding='SAME' for pool1, which makes feature
            # alignment easier for dense prediction tasks. This is also used in
            # https://github.com/facebook/fb.resnet.torch. However the accompanying
            # code of 'Deep Residual Learning for Image Recognition' uses
            # padding='VALID' for pool1. You can switch to that choice by setting
            # tf.contrib.framework.arg_scope([tf.contrib.layers.max_pool2d], padding='VALID').
            with arg_scope([layers.max_pool2d], padding='SAME') as arg_sc:
                return arg_sc
示例#11
0
 def testModelHasExpectedNumberOfParameters(self):
     batch_size = 5
     height, width = 299, 299
     inputs = random_ops.random_uniform((batch_size, height, width, 3))
     with arg_scope(inception_v3.inception_v3_arg_scope()):
         inception_v3.inception_v3_base(inputs)
     total_params, _ = model_analyzer.analyze_vars(
         variables_lib.get_model_variables())
     self.assertAlmostEqual(21802784, total_params)
示例#12
0
def truncated_vgg_16(inputs, is_training=True, scope='vgg_16'):
    """Oxford Net VGG 16-Layers version D Example.

    For use in SSD object detection network, which has this particular
    truncated version of VGG16 detailed in its paper.

    Args:
      inputs: a tensor of size [batch_size, height, width, channels].
      scope: Optional scope for the variables.

    Returns:
      the last op containing the conv5 tensor and end_points dict.
    """
    with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d.
        with arg_scope(
            [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d],
                outputs_collections=end_points_collection):
            net = layers_lib.repeat(inputs,
                                    2,
                                    layers.conv2d,
                                    64, [3, 3],
                                    scope='conv1')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool1')
            net = layers_lib.repeat(net,
                                    2,
                                    layers.conv2d,
                                    128, [3, 3],
                                    scope='conv2')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool2')
            net = layers_lib.repeat(net,
                                    3,
                                    layers.conv2d,
                                    256, [3, 3],
                                    scope='conv3')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool3')
            net = layers_lib.repeat(net,
                                    3,
                                    layers.conv2d,
                                    512, [3, 3],
                                    scope='conv4')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool4')
            net = layers_lib.repeat(net,
                                    3,
                                    layers.conv2d,
                                    512, [3, 3],
                                    scope='conv5')
            # Convert end_points_collection into a end_point dict.
            end_points = utils.convert_collection_to_dict(
                end_points_collection)
            return net, end_points
示例#13
0
def _recomputing_grad_fn(compute_fn, original_args, original_vars,
                         output_grads, grad_fn_variables, use_data_dep,
                         tupleize_grads, arg_scope, var_scope,
                         has_is_recompute_kwarg):
    """Grad fn for recompute_grad."""
    variables = grad_fn_variables or []

    # Identity ops around the inputs ensures correct gradient graph-walking.
    inputs = [array_ops.identity(x) for x in list(original_args)]

    # Recompute outputs
    # Use a control dependency to ensure that the recompute is not eliminated by
    # CSE and that it happens on the backwards pass.
    ctrl_dep_grads = [g for g in output_grads if g is not None]
    with framework_ops.control_dependencies(ctrl_dep_grads):
        if use_data_dep:
            inputs = _force_data_dependency(output_grads, inputs)
        # Re-enter scopes
        with arg_scope_lib.arg_scope(arg_scope):
            with variable_scope.variable_scope(var_scope, reuse=True):
                # Re-call the function and ensure that the touched variables are the
                # same as in the first call.
                with backprop.GradientTape() as tape:
                    fn_kwargs = {}
                    if has_is_recompute_kwarg:
                        fn_kwargs["is_recomputing"] = True
                    outputs = compute_fn(*inputs, **fn_kwargs)
                recompute_vars = set(
                    _as_ref(v) for v in tape.watched_variables())
                if original_vars != recompute_vars:
                    raise ValueError(_WRONG_VARS_ERR)

    if not isinstance(outputs, (list, tuple)):
        outputs = [outputs]
    outputs = list(outputs)

    # Compute gradients
    grads = _gradients(outputs,
                       inputs + variables,
                       output_grads,
                       stop_gradients=inputs)

    if tupleize_grads:
        if use_data_dep:
            grads = _tuple_with_data_dep(grads)
        else:
            grads = control_flow_ops.tuple(grads)

    grad_inputs = grads[:len(inputs)]
    grad_vars = grads[len(inputs):]
    return grad_inputs, grad_vars
示例#14
0
 def testClassificationEndPoints(self):
     global_pool = True
     num_classes = 10
     inputs = create_input(2, 224, 224, 3)
     with arg_scope(resnet_utils.resnet_arg_scope()):
         logits, end_points = self._resnet_small(inputs,
                                                 num_classes,
                                                 global_pool=global_pool,
                                                 scope='resnet')
     self.assertTrue(logits.op.name.startswith('resnet/logits'))
     self.assertListEqual(logits.get_shape().as_list(),
                          [2, 1, 1, num_classes])
     self.assertTrue('predictions' in end_points)
     self.assertListEqual(end_points['predictions'].get_shape().as_list(),
                          [2, 1, 1, num_classes])
示例#15
0
 def testFullyConvolutionalUnknownHeightWidth(self):
     batch = 2
     height, width = 65, 65
     global_pool = False
     inputs = create_input(batch, None, None, 3)
     with arg_scope(resnet_utils.resnet_arg_scope()):
         output, _ = self._resnet_small(inputs,
                                        None,
                                        global_pool=global_pool)
     self.assertListEqual(output.get_shape().as_list(),
                          [batch, None, None, 32])
     images = create_input(batch, height, width, 3)
     with self.cached_session() as sess:
         sess.run(variables.global_variables_initializer())
         output = sess.run(output, {inputs: images.eval()})
         self.assertEqual(output.shape, (batch, 3, 3, 32))
示例#16
0
 def testUnknownBatchSize(self):
     batch = 2
     height, width = 65, 65
     global_pool = True
     num_classes = 10
     inputs = create_input(None, height, width, 3)
     with arg_scope(resnet_utils.resnet_arg_scope()):
         logits, _ = self._resnet_small(inputs,
                                        num_classes,
                                        global_pool=global_pool,
                                        scope='resnet')
     self.assertTrue(logits.op.name.startswith('resnet/logits'))
     self.assertListEqual(logits.get_shape().as_list(),
                          [None, 1, 1, num_classes])
     images = create_input(batch, height, width, 3)
     with self.cached_session() as sess:
         sess.run(variables.global_variables_initializer())
         output = sess.run(logits, {inputs: images.eval()})
         self.assertEqual(output.shape, (batch, 1, 1, num_classes))
示例#17
0
 def testFullyConvolutionalEndpointShapes(self):
     global_pool = False
     num_classes = 10
     inputs = create_input(2, 321, 321, 3)
     with arg_scope(resnet_utils.resnet_arg_scope()):
         _, end_points = self._resnet_small(inputs,
                                            num_classes,
                                            global_pool=global_pool,
                                            scope='resnet')
         endpoint_to_shape = {
             'resnet/block1': [2, 41, 41, 4],
             'resnet/block2': [2, 21, 21, 8],
             'resnet/block3': [2, 11, 11, 16],
             'resnet/block4': [2, 11, 11, 32]
         }
         for endpoint in endpoint_to_shape:
             shape = endpoint_to_shape[endpoint]
             self.assertListEqual(
                 end_points[endpoint].get_shape().as_list(), shape)
示例#18
0
 def testClassificationShapes(self):
     global_pool = True
     num_classes = 10
     inputs = create_input(2, 224, 224, 3)
     with arg_scope(resnet_utils.resnet_arg_scope()):
         _, end_points = self._resnet_small(inputs,
                                            num_classes,
                                            global_pool=global_pool,
                                            scope='resnet')
         endpoint_to_shape = {
             'resnet/block1': [2, 28, 28, 4],
             'resnet/block2': [2, 14, 14, 8],
             'resnet/block3': [2, 7, 7, 16],
             'resnet/block4': [2, 7, 7, 32]
         }
         for endpoint in endpoint_to_shape:
             shape = endpoint_to_shape[endpoint]
             self.assertListEqual(
                 end_points[endpoint].get_shape().as_list(), shape)
示例#19
0
def resnet_v2(inputs,
              blocks,
              num_classes=None,
              is_training=True,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              reuse=None,
              scope=None):
    """Generator for v2 (preactivation) ResNet models.

  This function generates a family of ResNet v2 models. See the resnet_v2_*()
  methods for specific model instantiations, obtained by selecting different
  block instantiations that produce ResNets of various depths.

  Training for image classification on Imagenet is usually done with [224, 224]
  inputs, resulting in [7, 7] feature maps at the output of the last ResNet
  block for the ResNets defined in [1] that have nominal stride equal to 32.
  However, for dense prediction tasks we advise that one uses inputs with
  spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
  this case the feature maps at the ResNet output will have spatial shape
  [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
  and corners exactly aligned with the input image corners, which greatly
  facilitates alignment of the features to the image. Using as input [225, 225]
  images results in [8, 8] feature maps at the output of the last ResNet block.

  For dense prediction tasks, the ResNet needs to run in fully-convolutional
  (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
  have nominal stride equal to 32 and a good choice in FCN mode is to use
  output_stride=16 in order to increase the density of the computed features at
  small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.

  Args:
    inputs: A tensor of size [batch, height_in, width_in, channels].
    blocks: A list of length equal to the number of ResNet blocks. Each element
      is a resnet_utils.Block object describing the units in the block.
    num_classes: Number of predicted classes for classification tasks. If None
      we return the features before the logit layer.
    is_training: whether batch_norm layers are in training mode.
    global_pool: If True, we perform global average pooling before computing the
      logits. Set to True for image classification, False for dense prediction.
    output_stride: If None, then the output will be computed at the nominal
      network stride. If output_stride is not None, it specifies the requested
      ratio of input to output spatial resolution.
    include_root_block: If True, include the initial convolution followed by
      max-pooling, if False excludes it. If excluded, `inputs` should be the
      results of an activation-less convolution.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.


  Returns:
    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
      If global_pool is False, then height_out and width_out are reduced by a
      factor of output_stride compared to the respective height_in and width_in,
      else both height_out and width_out equal one. If num_classes is None, then
      net is the output of the last ResNet block, potentially after global
      average pooling. If num_classes is not None, net contains the pre-softmax
      activations.
    end_points: A dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: If the target output_stride is not valid.
  """
    with variable_scope.variable_scope(scope,
                                       'resnet_v2', [inputs],
                                       reuse=reuse) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with arg_scope(
            [layers_lib.conv2d, bottleneck, resnet_utils.stack_blocks_dense],
                outputs_collections=end_points_collection):
            with arg_scope([layers.batch_norm], is_training=is_training):
                net = inputs
                if include_root_block:
                    if output_stride is not None:
                        if output_stride % 4 != 0:
                            raise ValueError(
                                'The output_stride needs to be a multiple of 4.'
                            )
                        output_stride /= 4
                    # We do not include batch normalization or activation functions in
                    # conv1 because the first ResNet unit will perform these. Cf.
                    # Appendix of [2].
                    with arg_scope([layers_lib.conv2d],
                                   activation_fn=None,
                                   normalizer_fn=None):
                        net = resnet_utils.conv2d_same(net,
                                                       64,
                                                       7,
                                                       stride=2,
                                                       scope='conv1')
                    net = layers.max_pool2d(net, [3, 3],
                                            stride=2,
                                            scope='pool1')
                net = resnet_utils.stack_blocks_dense(net, blocks,
                                                      output_stride)
                # This is needed because the pre-activation variant does not have batch
                # normalization or activation functions in the residual unit output. See
                # Appendix of [2].
                net = layers.batch_norm(net,
                                        activation_fn=nn_ops.relu,
                                        scope='postnorm')
                if global_pool:
                    # Global average pooling.
                    net = math_ops.reduce_mean(net, [1, 2],
                                               name='pool5',
                                               keepdims=True)
                if num_classes is not None:
                    net = layers_lib.conv2d(net,
                                            num_classes, [1, 1],
                                            activation_fn=None,
                                            normalizer_fn=None,
                                            scope='logits')
                # Convert end_points_collection into a dictionary of end_points.
                end_points = utils.convert_collection_to_dict(
                    end_points_collection)
                if num_classes is not None:
                    end_points['predictions'] = layers.softmax(
                        net, scope='predictions')
                return net, end_points
示例#20
0
def overfeat(inputs,
             num_classes=1000,
             is_training=True,
             dropout_keep_prob=0.5,
             spatial_squeeze=True,
             scope='overfeat'):
  """Contains the model definition for the OverFeat network.

  The definition for the network was obtained from:
    OverFeat: Integrated Recognition, Localization and Detection using
    Convolutional Networks
    Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and
    Yann LeCun, 2014
    http://arxiv.org/abs/1312.6229

  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 231x231. To use in fully
        convolutional mode, set spatial_squeeze to false.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    scope: Optional scope for the variables.

  Returns:
    the last op containing the log predictions and end_points dict.

  """
  with variable_scope.variable_scope(scope, 'overfeat', [inputs]) as sc:
    end_points_collection = sc.name + '_end_points'
    # Collect outputs for conv2d, fully_connected and max_pool2d
    with arg_scope(
        [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d],
        outputs_collections=end_points_collection):
      net = layers.conv2d(
          inputs, 64, [11, 11], 4, padding='VALID', scope='conv1')
      net = layers_lib.max_pool2d(net, [2, 2], scope='pool1')
      net = layers.conv2d(net, 256, [5, 5], padding='VALID', scope='conv2')
      net = layers_lib.max_pool2d(net, [2, 2], scope='pool2')
      net = layers.conv2d(net, 512, [3, 3], scope='conv3')
      net = layers.conv2d(net, 1024, [3, 3], scope='conv4')
      net = layers.conv2d(net, 1024, [3, 3], scope='conv5')
      net = layers_lib.max_pool2d(net, [2, 2], scope='pool5')
      with arg_scope(
          [layers.conv2d],
          weights_initializer=trunc_normal(0.005),
          biases_initializer=init_ops.constant_initializer(0.1)):
        # Use conv2d instead of fully_connected layers.
        net = layers.conv2d(net, 3072, [6, 6], padding='VALID', scope='fc6')
        net = layers_lib.dropout(
            net, dropout_keep_prob, is_training=is_training, scope='dropout6')
        net = layers.conv2d(net, 4096, [1, 1], scope='fc7')
        net = layers_lib.dropout(
            net, dropout_keep_prob, is_training=is_training, scope='dropout7')
        net = layers.conv2d(
            net,
            num_classes, [1, 1],
            activation_fn=None,
            normalizer_fn=None,
            biases_initializer=init_ops.zeros_initializer(),
            scope='fc8')
      # Convert end_points_collection into a end_point dict.
      end_points = utils.convert_collection_to_dict(end_points_collection)
      if spatial_squeeze:
        net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed')
        end_points[sc.name + '/fc8'] = net
      return net, end_points
示例#21
0
def alexnet_v2(inputs,
               num_classes=1000,
               is_training=True,
               dropout_keep_prob=0.5,
               spatial_squeeze=True,
               scope='alexnet_v2'):
    """AlexNet version 2.

  Described in: http://arxiv.org/pdf/1404.5997v2.pdf
  Parameters from:
  github.com/akrizhevsky/cuda-convnet2/blob/master/layers/
  layers-imagenet-1gpu.cfg

  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 224x224. To use in fully
        convolutional mode, set spatial_squeeze to false.
        The LRN layers have been removed and change the initializers from
        random_normal_initializer to xavier_initializer.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    scope: Optional scope for the variables.

  Returns:
    the last op containing the log predictions and end_points dict.
  """
    with variable_scope.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d.
        with arg_scope(
            [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d],
                outputs_collections=[end_points_collection]):
            net = layers.conv2d(inputs,
                                64, [11, 11],
                                4,
                                padding='VALID',
                                scope='conv1')
            net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool1')
            net = layers.conv2d(net, 192, [5, 5], scope='conv2')
            net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool2')
            net = layers.conv2d(net, 384, [3, 3], scope='conv3')
            net = layers.conv2d(net, 384, [3, 3], scope='conv4')
            net = layers.conv2d(net, 256, [3, 3], scope='conv5')
            net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool5')

            # Use conv2d instead of fully_connected layers.
            with arg_scope(
                [layers.conv2d],
                    weights_initializer=trunc_normal(0.005),
                    biases_initializer=init_ops.constant_initializer(0.1)):
                net = layers.conv2d(net,
                                    4096, [5, 5],
                                    padding='VALID',
                                    scope='fc6')
                net = layers_lib.dropout(net,
                                         dropout_keep_prob,
                                         is_training=is_training,
                                         scope='dropout6')
                net = layers.conv2d(net, 4096, [1, 1], scope='fc7')
                net = layers_lib.dropout(net,
                                         dropout_keep_prob,
                                         is_training=is_training,
                                         scope='dropout7')
                net = layers.conv2d(
                    net,
                    num_classes, [1, 1],
                    activation_fn=None,
                    normalizer_fn=None,
                    biases_initializer=init_ops.zeros_initializer(),
                    scope='fc8')

            # Convert end_points_collection into a end_point dict.
            end_points = utils.convert_collection_to_dict(
                end_points_collection)
            if spatial_squeeze:
                net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed')
                end_points[sc.name + '/fc8'] = net
            return net, end_points
示例#22
0
def inception_v1_base(inputs, final_endpoint='Mixed_5c', scope='InceptionV1'):
    """Defines the Inception V1 base architecture.

  This architecture is defined in:
    Going deeper with convolutions
    Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
    Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich.
    http://arxiv.org/pdf/1409.4842v1.pdf.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    final_endpoint: specifies the endpoint to construct the network up to. It
      can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
      'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
      'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e',
      'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c']
    scope: Optional variable_scope.

  Returns:
    A dictionary from components of the network to the corresponding activation.

  Raises:
    ValueError: if final_endpoint is not set to one of the predefined values.
  """
    end_points = {}
    with variable_scope.variable_scope(scope, 'InceptionV1', [inputs]):
        with arg_scope([layers.conv2d, layers_lib.fully_connected],
                       weights_initializer=trunc_normal(0.01)):
            with arg_scope([layers.conv2d, layers_lib.max_pool2d],
                           stride=1,
                           padding='SAME'):
                end_point = 'Conv2d_1a_7x7'
                net = layers.conv2d(inputs,
                                    64, [7, 7],
                                    stride=2,
                                    scope=end_point)
                end_points[end_point] = net
                if final_endpoint == end_point:
                    return net, end_points
                end_point = 'MaxPool_2a_3x3'
                net = layers_lib.max_pool2d(net, [3, 3],
                                            stride=2,
                                            scope=end_point)
                end_points[end_point] = net
                if final_endpoint == end_point:
                    return net, end_points
                end_point = 'Conv2d_2b_1x1'
                net = layers.conv2d(net, 64, [1, 1], scope=end_point)
                end_points[end_point] = net
                if final_endpoint == end_point:
                    return net, end_points
                end_point = 'Conv2d_2c_3x3'
                net = layers.conv2d(net, 192, [3, 3], scope=end_point)
                end_points[end_point] = net
                if final_endpoint == end_point:
                    return net, end_points
                end_point = 'MaxPool_3a_3x3'
                net = layers_lib.max_pool2d(net, [3, 3],
                                            stride=2,
                                            scope=end_point)
                end_points[end_point] = net
                if final_endpoint == end_point:
                    return net, end_points

                end_point = 'Mixed_3b'
                with variable_scope.variable_scope(end_point):
                    with variable_scope.variable_scope('Branch_0'):
                        branch_0 = layers.conv2d(net,
                                                 64, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                    with variable_scope.variable_scope('Branch_1'):
                        branch_1 = layers.conv2d(net,
                                                 96, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                        branch_1 = layers.conv2d(branch_1,
                                                 128, [3, 3],
                                                 scope='Conv2d_0b_3x3')
                    with variable_scope.variable_scope('Branch_2'):
                        branch_2 = layers.conv2d(net,
                                                 16, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                        branch_2 = layers.conv2d(branch_2,
                                                 32, [3, 3],
                                                 scope='Conv2d_0b_3x3')
                    with variable_scope.variable_scope('Branch_3'):
                        branch_3 = layers_lib.max_pool2d(
                            net, [3, 3], scope='MaxPool_0a_3x3')
                        branch_3 = layers.conv2d(branch_3,
                                                 32, [1, 1],
                                                 scope='Conv2d_0b_1x1')
                    net = array_ops.concat(
                        [branch_0, branch_1, branch_2, branch_3], 3)
                end_points[end_point] = net
                if final_endpoint == end_point:
                    return net, end_points

                end_point = 'Mixed_3c'
                with variable_scope.variable_scope(end_point):
                    with variable_scope.variable_scope('Branch_0'):
                        branch_0 = layers.conv2d(net,
                                                 128, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                    with variable_scope.variable_scope('Branch_1'):
                        branch_1 = layers.conv2d(net,
                                                 128, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                        branch_1 = layers.conv2d(branch_1,
                                                 192, [3, 3],
                                                 scope='Conv2d_0b_3x3')
                    with variable_scope.variable_scope('Branch_2'):
                        branch_2 = layers.conv2d(net,
                                                 32, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                        branch_2 = layers.conv2d(branch_2,
                                                 96, [3, 3],
                                                 scope='Conv2d_0b_3x3')
                    with variable_scope.variable_scope('Branch_3'):
                        branch_3 = layers_lib.max_pool2d(
                            net, [3, 3], scope='MaxPool_0a_3x3')
                        branch_3 = layers.conv2d(branch_3,
                                                 64, [1, 1],
                                                 scope='Conv2d_0b_1x1')
                    net = array_ops.concat(
                        [branch_0, branch_1, branch_2, branch_3], 3)
                end_points[end_point] = net
                if final_endpoint == end_point:
                    return net, end_points

                end_point = 'MaxPool_4a_3x3'
                net = layers_lib.max_pool2d(net, [3, 3],
                                            stride=2,
                                            scope=end_point)
                end_points[end_point] = net
                if final_endpoint == end_point:
                    return net, end_points

                end_point = 'Mixed_4b'
                with variable_scope.variable_scope(end_point):
                    with variable_scope.variable_scope('Branch_0'):
                        branch_0 = layers.conv2d(net,
                                                 192, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                    with variable_scope.variable_scope('Branch_1'):
                        branch_1 = layers.conv2d(net,
                                                 96, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                        branch_1 = layers.conv2d(branch_1,
                                                 208, [3, 3],
                                                 scope='Conv2d_0b_3x3')
                    with variable_scope.variable_scope('Branch_2'):
                        branch_2 = layers.conv2d(net,
                                                 16, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                        branch_2 = layers.conv2d(branch_2,
                                                 48, [3, 3],
                                                 scope='Conv2d_0b_3x3')
                    with variable_scope.variable_scope('Branch_3'):
                        branch_3 = layers_lib.max_pool2d(
                            net, [3, 3], scope='MaxPool_0a_3x3')
                        branch_3 = layers.conv2d(branch_3,
                                                 64, [1, 1],
                                                 scope='Conv2d_0b_1x1')
                    net = array_ops.concat(
                        [branch_0, branch_1, branch_2, branch_3], 3)
                end_points[end_point] = net
                if final_endpoint == end_point:
                    return net, end_points

                end_point = 'Mixed_4c'
                with variable_scope.variable_scope(end_point):
                    with variable_scope.variable_scope('Branch_0'):
                        branch_0 = layers.conv2d(net,
                                                 160, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                    with variable_scope.variable_scope('Branch_1'):
                        branch_1 = layers.conv2d(net,
                                                 112, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                        branch_1 = layers.conv2d(branch_1,
                                                 224, [3, 3],
                                                 scope='Conv2d_0b_3x3')
                    with variable_scope.variable_scope('Branch_2'):
                        branch_2 = layers.conv2d(net,
                                                 24, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                        branch_2 = layers.conv2d(branch_2,
                                                 64, [3, 3],
                                                 scope='Conv2d_0b_3x3')
                    with variable_scope.variable_scope('Branch_3'):
                        branch_3 = layers_lib.max_pool2d(
                            net, [3, 3], scope='MaxPool_0a_3x3')
                        branch_3 = layers.conv2d(branch_3,
                                                 64, [1, 1],
                                                 scope='Conv2d_0b_1x1')
                    net = array_ops.concat(
                        [branch_0, branch_1, branch_2, branch_3], 3)
                end_points[end_point] = net
                if final_endpoint == end_point:
                    return net, end_points

                end_point = 'Mixed_4d'
                with variable_scope.variable_scope(end_point):
                    with variable_scope.variable_scope('Branch_0'):
                        branch_0 = layers.conv2d(net,
                                                 128, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                    with variable_scope.variable_scope('Branch_1'):
                        branch_1 = layers.conv2d(net,
                                                 128, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                        branch_1 = layers.conv2d(branch_1,
                                                 256, [3, 3],
                                                 scope='Conv2d_0b_3x3')
                    with variable_scope.variable_scope('Branch_2'):
                        branch_2 = layers.conv2d(net,
                                                 24, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                        branch_2 = layers.conv2d(branch_2,
                                                 64, [3, 3],
                                                 scope='Conv2d_0b_3x3')
                    with variable_scope.variable_scope('Branch_3'):
                        branch_3 = layers_lib.max_pool2d(
                            net, [3, 3], scope='MaxPool_0a_3x3')
                        branch_3 = layers.conv2d(branch_3,
                                                 64, [1, 1],
                                                 scope='Conv2d_0b_1x1')
                    net = array_ops.concat(
                        [branch_0, branch_1, branch_2, branch_3], 3)
                end_points[end_point] = net
                if final_endpoint == end_point:
                    return net, end_points

                end_point = 'Mixed_4e'
                with variable_scope.variable_scope(end_point):
                    with variable_scope.variable_scope('Branch_0'):
                        branch_0 = layers.conv2d(net,
                                                 112, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                    with variable_scope.variable_scope('Branch_1'):
                        branch_1 = layers.conv2d(net,
                                                 144, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                        branch_1 = layers.conv2d(branch_1,
                                                 288, [3, 3],
                                                 scope='Conv2d_0b_3x3')
                    with variable_scope.variable_scope('Branch_2'):
                        branch_2 = layers.conv2d(net,
                                                 32, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                        branch_2 = layers.conv2d(branch_2,
                                                 64, [3, 3],
                                                 scope='Conv2d_0b_3x3')
                    with variable_scope.variable_scope('Branch_3'):
                        branch_3 = layers_lib.max_pool2d(
                            net, [3, 3], scope='MaxPool_0a_3x3')
                        branch_3 = layers.conv2d(branch_3,
                                                 64, [1, 1],
                                                 scope='Conv2d_0b_1x1')
                    net = array_ops.concat(
                        [branch_0, branch_1, branch_2, branch_3], 3)
                end_points[end_point] = net
                if final_endpoint == end_point:
                    return net, end_points

                end_point = 'Mixed_4f'
                with variable_scope.variable_scope(end_point):
                    with variable_scope.variable_scope('Branch_0'):
                        branch_0 = layers.conv2d(net,
                                                 256, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                    with variable_scope.variable_scope('Branch_1'):
                        branch_1 = layers.conv2d(net,
                                                 160, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                        branch_1 = layers.conv2d(branch_1,
                                                 320, [3, 3],
                                                 scope='Conv2d_0b_3x3')
                    with variable_scope.variable_scope('Branch_2'):
                        branch_2 = layers.conv2d(net,
                                                 32, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                        branch_2 = layers.conv2d(branch_2,
                                                 128, [3, 3],
                                                 scope='Conv2d_0b_3x3')
                    with variable_scope.variable_scope('Branch_3'):
                        branch_3 = layers_lib.max_pool2d(
                            net, [3, 3], scope='MaxPool_0a_3x3')
                        branch_3 = layers.conv2d(branch_3,
                                                 128, [1, 1],
                                                 scope='Conv2d_0b_1x1')
                    net = array_ops.concat(
                        [branch_0, branch_1, branch_2, branch_3], 3)
                end_points[end_point] = net
                if final_endpoint == end_point:
                    return net, end_points

                end_point = 'MaxPool_5a_2x2'
                net = layers_lib.max_pool2d(net, [2, 2],
                                            stride=2,
                                            scope=end_point)
                end_points[end_point] = net
                if final_endpoint == end_point:
                    return net, end_points

                end_point = 'Mixed_5b'
                with variable_scope.variable_scope(end_point):
                    with variable_scope.variable_scope('Branch_0'):
                        branch_0 = layers.conv2d(net,
                                                 256, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                    with variable_scope.variable_scope('Branch_1'):
                        branch_1 = layers.conv2d(net,
                                                 160, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                        branch_1 = layers.conv2d(branch_1,
                                                 320, [3, 3],
                                                 scope='Conv2d_0b_3x3')
                    with variable_scope.variable_scope('Branch_2'):
                        branch_2 = layers.conv2d(net,
                                                 32, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                        branch_2 = layers.conv2d(branch_2,
                                                 128, [3, 3],
                                                 scope='Conv2d_0a_3x3')
                    with variable_scope.variable_scope('Branch_3'):
                        branch_3 = layers_lib.max_pool2d(
                            net, [3, 3], scope='MaxPool_0a_3x3')
                        branch_3 = layers.conv2d(branch_3,
                                                 128, [1, 1],
                                                 scope='Conv2d_0b_1x1')
                    net = array_ops.concat(
                        [branch_0, branch_1, branch_2, branch_3], 3)
                end_points[end_point] = net
                if final_endpoint == end_point:
                    return net, end_points

                end_point = 'Mixed_5c'
                with variable_scope.variable_scope(end_point):
                    with variable_scope.variable_scope('Branch_0'):
                        branch_0 = layers.conv2d(net,
                                                 384, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                    with variable_scope.variable_scope('Branch_1'):
                        branch_1 = layers.conv2d(net,
                                                 192, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                        branch_1 = layers.conv2d(branch_1,
                                                 384, [3, 3],
                                                 scope='Conv2d_0b_3x3')
                    with variable_scope.variable_scope('Branch_2'):
                        branch_2 = layers.conv2d(net,
                                                 48, [1, 1],
                                                 scope='Conv2d_0a_1x1')
                        branch_2 = layers.conv2d(branch_2,
                                                 128, [3, 3],
                                                 scope='Conv2d_0b_3x3')
                    with variable_scope.variable_scope('Branch_3'):
                        branch_3 = layers_lib.max_pool2d(
                            net, [3, 3], scope='MaxPool_0a_3x3')
                        branch_3 = layers.conv2d(branch_3,
                                                 128, [1, 1],
                                                 scope='Conv2d_0b_1x1')
                    net = array_ops.concat(
                        [branch_0, branch_1, branch_2, branch_3], 3)
                end_points[end_point] = net
                if final_endpoint == end_point:
                    return net, end_points
        raise ValueError('Unknown final endpoint %s' % final_endpoint)
示例#23
0
def inception_v1(inputs,
                 num_classes=1000,
                 is_training=True,
                 dropout_keep_prob=0.8,
                 prediction_fn=layers_lib.softmax,
                 spatial_squeeze=True,
                 reuse=None,
                 scope='InceptionV1'):
    """Defines the Inception V1 architecture.

  This architecture is defined in:

    Going deeper with convolutions
    Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
    Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich.
    http://arxiv.org/pdf/1409.4842v1.pdf.

  The default image size used to train this network is 224x224.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether is training or not.
    dropout_keep_prob: the percentage of activation values that are retained.
    prediction_fn: a function to get predictions out of logits.
    spatial_squeeze: if True, logits is of shape is [B, C], if false logits is
        of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.

  Returns:
    logits: the pre-softmax activations, a tensor of size
      [batch_size, num_classes]
    end_points: a dictionary from components of the network to the corresponding
      activation.
  """
    # Final pooling and prediction
    with variable_scope.variable_scope(scope,
                                       'InceptionV1', [inputs, num_classes],
                                       reuse=reuse) as scope:
        with arg_scope([layers_lib.batch_norm, layers_lib.dropout],
                       is_training=is_training):
            net, end_points = inception_v1_base(inputs, scope=scope)
            with variable_scope.variable_scope('Logits'):
                net = layers_lib.avg_pool2d(net, [7, 7],
                                            stride=1,
                                            scope='MaxPool_0a_7x7')
                net = layers_lib.dropout(net,
                                         dropout_keep_prob,
                                         scope='Dropout_0b')
                logits = layers.conv2d(net,
                                       num_classes, [1, 1],
                                       activation_fn=None,
                                       normalizer_fn=None,
                                       scope='Conv2d_0c_1x1')
                if spatial_squeeze:
                    logits = array_ops.squeeze(logits, [1, 2],
                                               name='SpatialSqueeze')

                end_points['Logits'] = logits
                end_points['Predictions'] = prediction_fn(logits,
                                                          scope='Predictions')
    return logits, end_points
示例#24
0
def vgg_a(inputs,
          num_classes=1000,
          is_training=True,
          dropout_keep_prob=0.5,
          spatial_squeeze=True,
          scope='vgg_a'):
    """Oxford Net VGG 11-Layers version A Example.

  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 224x224.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    scope: Optional scope for the variables.

  Returns:
    the last op containing the log predictions and end_points dict.
  """
    with variable_scope.variable_scope(scope, 'vgg_a', [inputs]) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d.
        with arg_scope([layers.conv2d, layers_lib.max_pool2d],
                       outputs_collections=end_points_collection):
            net = layers_lib.repeat(inputs,
                                    1,
                                    layers.conv2d,
                                    64, [3, 3],
                                    scope='conv1')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool1')
            net = layers_lib.repeat(net,
                                    1,
                                    layers.conv2d,
                                    128, [3, 3],
                                    scope='conv2')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool2')
            net = layers_lib.repeat(net,
                                    2,
                                    layers.conv2d,
                                    256, [3, 3],
                                    scope='conv3')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool3')
            net = layers_lib.repeat(net,
                                    2,
                                    layers.conv2d,
                                    512, [3, 3],
                                    scope='conv4')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool4')
            net = layers_lib.repeat(net,
                                    2,
                                    layers.conv2d,
                                    512, [3, 3],
                                    scope='conv5')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool5')
            # Use conv2d instead of fully_connected layers.
            net = layers.conv2d(net,
                                4096, [7, 7],
                                padding='VALID',
                                scope='fc6')
            net = layers_lib.dropout(net,
                                     dropout_keep_prob,
                                     is_training=is_training,
                                     scope='dropout6')
            net = layers.conv2d(net, 4096, [1, 1], scope='fc7')
            net = layers_lib.dropout(net,
                                     dropout_keep_prob,
                                     is_training=is_training,
                                     scope='dropout7')
            net = layers.conv2d(net,
                                num_classes, [1, 1],
                                activation_fn=None,
                                normalizer_fn=None,
                                scope='fc8')
            # Convert end_points_collection into a end_point dict.
            end_points = utils.convert_collection_to_dict(
                end_points_collection)
            if spatial_squeeze:
                net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed')
                end_points[sc.name + '/fc8'] = net
            return net, end_points