示例#1
0
 def test_aliases(self):
     t1 = constant_op.constant(1.0, name='t1')
     t2 = constant_op.constant(2.0, name='t2')
     utils.collect_named_outputs('end_points', 'a1', t1)
     utils.collect_named_outputs('end_points', 'a2', t2)
     self.assertEqual(t1.aliases, ['a1'])
     self.assertEqual(t2.aliases, ['a2'])
示例#2
0
 def test_gather_aliases(self):
     t1 = constant_op.constant(1.0, name='t1')
     t2 = constant_op.constant(2.0, name='t2')
     t3 = constant_op.constant(2.0, name='t3')
     utils.collect_named_outputs('end_points', 'a1', t1)
     utils.collect_named_outputs('end_points', 'a2', t2)
     ops.add_to_collection('end_points', t3)
     aliases = utils.gather_tensors_aliases(
         ops.get_collection('end_points'))
     self.assertEqual(aliases, ['a1', 'a2', 't3'])
示例#3
0
def bottleneck(inputs,
               depth,
               depth_bottleneck,
               stride,
               rate=1,
               outputs_collections=None,
               scope=None):
    """Bottleneck residual unit variant with BN after convolutions.

    This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
    its definition. Note that we use here the bottleneck variant which has an
    extra bottleneck layer.

    When putting together two consecutive ResNet blocks that use this unit, one
    should use stride = 2 in the last unit of the first block.

    Args:
      inputs: A tensor of size [batch, height, width, channels].
      depth: The depth of the ResNet unit output.
      depth_bottleneck: The depth of the bottleneck layers.
      stride: The ResNet unit's stride. Determines the amount of downsampling of
        the units output compared to its input.
      rate: An integer, rate for atrous convolution.
      outputs_collections: Collection to add the ResNet unit output.
      scope: Optional variable_scope.

    Returns:
      The ResNet unit's output.
    """
    with variable_scope.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
        depth_in = utils.last_dimension(inputs.get_shape(), min_rank=4)
        if depth == depth_in:
            shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
        else:
            shortcut = layers.conv2d(inputs,
                                     depth, [1, 1],
                                     stride=stride,
                                     activation_fn=None,
                                     scope='shortcut')

        residual = layers.conv2d(inputs,
                                 depth_bottleneck, [1, 1],
                                 stride=1,
                                 scope='conv1')
        residual = resnet_utils.conv2d_same(residual,
                                            depth_bottleneck,
                                            3,
                                            stride,
                                            rate=rate,
                                            scope='conv2')
        residual = layers.conv2d(residual,
                                 depth, [1, 1],
                                 stride=1,
                                 activation_fn=None,
                                 scope='conv3')

        output = nn_ops.relu(shortcut + residual)

        return utils.collect_named_outputs(outputs_collections, sc.name,
                                           output)
示例#4
0
 def test_convert_collection_to_dict_clear_collection(self):
     t1 = constant_op.constant(1.0, name='t1')
     t2 = constant_op.constant(2.0, name='t2')
     utils.collect_named_outputs('end_points', 'a1', t1)
     utils.collect_named_outputs('end_points', 'a21', t2)
     utils.collect_named_outputs('end_points', 'a22', t2)
     utils.convert_collection_to_dict('end_points', clear_collection=True)
     self.assertEqual(ops.get_collection('end_points'), [])
示例#5
0
 def test_convert_collection_to_dict(self):
     t1 = constant_op.constant(1.0, name='t1')
     t2 = constant_op.constant(2.0, name='t2')
     utils.collect_named_outputs('end_points', 'a1', t1)
     utils.collect_named_outputs('end_points', 'a21', t2)
     utils.collect_named_outputs('end_points', 'a22', t2)
     end_points = utils.convert_collection_to_dict('end_points')
     self.assertEqual(end_points['a1'], t1)
     self.assertEqual(end_points['a21'], t2)
     self.assertEqual(end_points['a22'], t2)
示例#6
0
def stack_blocks_dense(net,
                       blocks,
                       output_stride=None,
                       outputs_collections=None):
    """Stacks ResNet `Blocks` and controls output feature density.

    First, this function creates scopes for the ResNet in the form of
    'block_name/unit_1', 'block_name/unit_2', etc.

    Second, this function allows the user to explicitly control the ResNet
    output_stride, which is the ratio of the input to output spatial resolution.
    This is useful for dense prediction tasks such as semantic segmentation or
    object detection.

    Most ResNets consist of 4 ResNet blocks and subsample the activations by a
    factor of 2 when transitioning between consecutive ResNet blocks. This results
    to a nominal ResNet output_stride equal to 8. If we set the output_stride to
    half the nominal network stride (e.g., output_stride=4), then we compute
    responses twice.

    Control of the output feature density is implemented by atrous convolution.

    Args:
      net: A `Tensor` of size [batch, height, width, channels].
      blocks: A list of length equal to the number of ResNet `Blocks`. Each
        element is a ResNet `Block` object describing the units in the `Block`.
      output_stride: If `None`, then the output will be computed at the nominal
        network stride. If output_stride is not `None`, it specifies the requested
        ratio of input to output spatial resolution, which needs to be equal to
        the product of unit strides from the start up to some level of the ResNet.
        For example, if the ResNet employs units with strides 1, 2, 1, 3, 4, 1,
        then valid values for the output_stride are 1, 2, 6, 24 or None (which
        is equivalent to output_stride=24).
      outputs_collections: Collection to add the ResNet block outputs.

    Returns:
      net: Output tensor with stride equal to the specified output_stride.

    Raises:
      ValueError: If the target output_stride is not valid.
    """
    # The current_stride variable keeps track of the effective stride of the
    # activations. This allows us to invoke atrous convolution whenever applying
    # the next residual unit would result in the activations having stride larger
    # than the target output_stride.
    current_stride = 1

    # The atrous convolution rate parameter.
    rate = 1

    for block in blocks:
        with variable_scope.variable_scope(block.scope, 'block', [net]) as sc:
            for i, unit in enumerate(block.args):
                if output_stride is not None and current_stride > output_stride:
                    raise ValueError(
                        'The target output_stride cannot be reached.')

                with variable_scope.variable_scope('unit_%d' % (i + 1),
                                                   values=[net]):
                    # If we have reached the target output_stride, then we need to employ
                    # atrous convolution with stride=1 and multiply the atrous rate by the
                    # current unit's stride for use in subsequent layers.
                    if output_stride is not None and current_stride == output_stride:
                        net = block.unit_fn(net,
                                            rate=rate,
                                            **dict(unit, stride=1))
                        rate *= unit.get('stride', 1)

                    else:
                        net = block.unit_fn(net, rate=1, **unit)
                        current_stride *= unit.get('stride', 1)
            net = utils.collect_named_outputs(outputs_collections, sc.name,
                                              net)

    if output_stride is not None and current_stride != output_stride:
        raise ValueError('The target output_stride cannot be reached.')

    return net
示例#7
0
 def test_collect(self):
     t1 = constant_op.constant(1.0, name='t1')
     t2 = constant_op.constant(2.0, name='t2')
     utils.collect_named_outputs('end_points', 'a1', t1)
     utils.collect_named_outputs('end_points', 'a2', t2)
     self.assertEqual(ops.get_collection('end_points'), [t1, t2])
示例#8
0
def instance_norm(inputs,
                  center=True,
                  scale=True,
                  epsilon=1e-6,
                  activation_fn=None,
                  param_initializers=None,
                  reuse=None,
                  variables_collections=None,
                  outputs_collections=None,
                  trainable=True,
                  data_format=DATA_FORMAT_NHWC,
                  scope=None):
    """Functional interface for the instance normalization layer.

    Reference: https://arxiv.org/abs/1607.08022.

      "Instance Normalization: The Missing Ingredient for Fast Stylization"
      Dmitry Ulyanov, Andrea Vedaldi, Victor Lempitsky

    Args:
      inputs: A tensor with 2 or more dimensions, where the first dimension has
        `batch_size`. The normalization is over all but the last dimension if
        `data_format` is `NHWC` and the second dimension if `data_format` is
        `NCHW`.
      center: If True, add offset of `beta` to normalized tensor. If False, `beta`
        is ignored.
      scale: If True, multiply by `gamma`. If False, `gamma` is
        not used. When the next layer is linear (also e.g. `nn.relu`), this can be
        disabled since the scaling can be done by the next layer.
      epsilon: Small float added to variance to avoid dividing by zero.
      activation_fn: Activation function, default set to None to skip it and
        maintain a linear activation.
      param_initializers: Optional initializers for beta, gamma, moving mean and
        moving variance.
      reuse: Whether or not the layer and its variables should be reused. To be
        able to reuse the layer scope must be given.
      variables_collections: Optional collections for the variables.
      outputs_collections: Collections to add the outputs.
      trainable: If `True` also add variables to the graph collection
        `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
      data_format: A string. `NHWC` (default) and `NCHW` are supported.
      scope: Optional scope for `variable_scope`.

    Returns:
      A `Tensor` representing the output of the operation.

    Raises:
      ValueError: If `data_format` is neither `NHWC` nor `NCHW`.
      ValueError: If the rank of `inputs` is undefined.
      ValueError: If rank or channels dimension of `inputs` is undefined.
    """
    inputs = ops.convert_to_tensor(inputs)
    inputs_shape = inputs.shape
    inputs_rank = inputs.shape.ndims

    if inputs_rank is None:
        raise ValueError('Inputs %s has undefined rank.' % inputs.name)
    if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC):
        raise ValueError('data_format has to be either NCHW or NHWC.')

    with variable_scope.variable_scope(scope,
                                       'InstanceNorm', [inputs],
                                       reuse=reuse) as sc:
        if data_format == DATA_FORMAT_NCHW:
            reduction_axis = 1
            # For NCHW format, rather than relying on implicit broadcasting, we
            # explicitly reshape the params to params_shape_broadcast when computing
            # the moments and the batch normalization.
            params_shape_broadcast = list(
                [1, tf.dimension_value(inputs_shape[1])] + [1] *
                (inputs_rank - 2))
        else:
            reduction_axis = inputs_rank - 1
            params_shape_broadcast = None
        moments_axes = list(range(inputs_rank))
        del moments_axes[reduction_axis]
        del moments_axes[0]
        params_shape = inputs_shape[reduction_axis:reduction_axis + 1]
        if not params_shape.is_fully_defined():
            raise ValueError('Inputs %s has undefined channels dimension %s.' %
                             (inputs.name, params_shape))

        # Allocate parameters for the beta and gamma of the normalization.
        beta, gamma = None, None
        dtype = inputs.dtype.base_dtype
        if param_initializers is None:
            param_initializers = {}
        if center:
            beta_collections = utils.get_variable_collections(
                variables_collections, 'beta')
            beta_initializer = param_initializers.get(
                'beta', init_ops.zeros_initializer())
            beta = variables.model_variable('beta',
                                            shape=params_shape,
                                            dtype=dtype,
                                            initializer=beta_initializer,
                                            collections=beta_collections,
                                            trainable=trainable)
            if params_shape_broadcast:
                beta = array_ops.reshape(beta, params_shape_broadcast)
        if scale:
            gamma_collections = utils.get_variable_collections(
                variables_collections, 'gamma')
            gamma_initializer = param_initializers.get(
                'gamma', init_ops.ones_initializer())
            gamma = variables.model_variable('gamma',
                                             shape=params_shape,
                                             dtype=dtype,
                                             initializer=gamma_initializer,
                                             collections=gamma_collections,
                                             trainable=trainable)
            if params_shape_broadcast:
                gamma = array_ops.reshape(gamma, params_shape_broadcast)

        # Calculate the moments (instance activations).
        mean, variance = nn.moments(inputs, moments_axes, keep_dims=True)

        # Compute instance normalization.
        outputs = nn.batch_normalization(inputs,
                                         mean,
                                         variance,
                                         beta,
                                         gamma,
                                         epsilon,
                                         name='instancenorm')
        if activation_fn is not None:
            outputs = activation_fn(outputs)
        return utils.collect_named_outputs(outputs_collections, sc.name,
                                           outputs)
示例#9
0
def group_norm(inputs,
               groups=32,
               channels_axis=-1,
               reduction_axes=(-3, -2),
               center=True,
               scale=True,
               epsilon=1e-6,
               activation_fn=None,
               param_initializers=None,
               reuse=None,
               variables_collections=None,
               outputs_collections=None,
               trainable=True,
               scope=None,
               mean_close_to_zero=False):
    """Functional interface for the group normalization layer.

    Reference: https://arxiv.org/abs/1803.08494.

      "Group Normalization", Yuxin Wu, Kaiming He

    Args:
      inputs: A Tensor with at least 2 dimensions one which is channels. All
       shape dimensions except for batch must be fully defined.
      groups: Integer. Divide the channels into this number of groups over which
        normalization statistics are computed. This number must be commensurate
        with the number of channels in `inputs`.
      channels_axis: An integer. Specifies index of channels axis which will be
        broken into `groups`, each of which whose statistics will be computed
        across. Must be mutually exclusive with `reduction_axes`. Preferred usage
        is to specify negative integers to be agnostic as to whether a batch
        dimension is included.
      reduction_axes: Tuple of integers. Specifies dimensions over which
         statistics will be accumulated. Must be mutually exclusive with
         `channels_axis`. Statistics will not be accumulated across axes not
         specified in `reduction_axes` nor `channel_axis`. Preferred usage is to
         specify negative integers to be agnostic to whether a batch dimension is
         included.

        Some sample usage cases:
          NHWC format: channels_axis=-1, reduction_axes=[-3, -2]
          NCHW format: channels_axis=-3, reduction_axes=[-2, -1]

      center: If True, add offset of `beta` to normalized tensor. If False, `beta`
        is ignored.
      scale: If True, multiply by `gamma`. If False, `gamma` is
        not used. When the next layer is linear (also e.g. `nn.relu`), this can be
        disabled since the scaling can be done by the next layer.
      epsilon: Small float added to variance to avoid dividing by zero.
      activation_fn: Activation function, default set to None to skip it and
        maintain a linear activation.
      param_initializers: Optional initializers for beta, gamma, moving mean and
        moving variance.
      reuse: Whether or not the layer and its variables should be reused. To be
        able to reuse the layer scope must be given.
      variables_collections: Optional collections for the variables.
      outputs_collections: Collections to add the outputs.
      trainable: If `True` also add variables to the graph collection
        `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
      scope: Optional scope for `variable_scope`.
      mean_close_to_zero: The mean of `input` before ReLU will be close to zero
        when batch size >= 4k for Resnet-50 on TPU. If `True`, use
        `nn.sufficient_statistics` and `nn.normalize_moments` to calculate the
        variance. This is the same behavior as `fused` equals `True` in batch
        normalization. If `False`, use `nn.moments` to calculate the variance.
        When `mean` is close to zero, like 1e-4, use `mean` to calculate the
        variance may have poor result due to repeated roundoff error and
        denormalization in `mean`.  When `mean` is large, like 1e2,
        sum(`input`^2) is so large that only the high-order digits of the elements
        are being accumulated. Thus, use sum(`input` - `mean`)^2/n to calculate
        the variance has better accuracy compared to (sum(`input`^2)/n - `mean`^2)
        when `mean` is large.


    Returns:
      A `Tensor` representing the output of the operation.

    Raises:
      ValueError: If the rank of `inputs` is undefined.
      ValueError: If rank or channels dimension of `inputs` is undefined.
      ValueError: If number of groups is not commensurate with number of channels.
      ValueError: If reduction_axes or channels_axis are out of bounds.
      ValueError: If reduction_axes are not mutually exclusive with channels_axis.
    """
    inputs = ops.convert_to_tensor(inputs)

    if inputs.shape.ndims is None:
        raise ValueError('Inputs %s has undefined rank.' % inputs.name)
    if channels_axis > (inputs.shape.ndims - 1):
        raise ValueError('Axis is out of bounds.')

    # Use dynamic shape for not fully defined dimensions in the inputs.
    dyanmic_shape = array_ops.shape(inputs)
    input_shape_list = []
    for i, dim in enumerate(inputs.shape):
        if tf.dimension_value(dim) is None:
            input_shape_list.append(dyanmic_shape[i])
        else:
            input_shape_list.append(dim)

    # Standardize the channels_axis to be positive and identify # of channels.
    if channels_axis < 0:
        channels_axis = inputs.shape.ndims + channels_axis
    channels = tf.dimension_value(inputs.shape[channels_axis])

    if channels is None:
        raise ValueError('Inputs %s has undefined channel dimension: %d.' %
                         (inputs.name, channels_axis))

    # Standardize the reduction_axes to be positive.
    reduction_axes = list(reduction_axes)
    for i in range(len(reduction_axes)):
        if reduction_axes[i] < 0:
            reduction_axes[i] += inputs.shape.ndims

    for a in reduction_axes:
        if a > inputs.shape.ndims:
            raise ValueError('Axis is out of bounds.')
        if tf.dimension_value(inputs.shape[a]) is None:
            raise ValueError('Inputs %s has undefined dimensions %d.' %
                             (inputs.name, a))
        if channels_axis == a:
            raise ValueError('reduction_axis must be mutually exclusive '
                             'with channels_axis')
    if groups > channels:
        raise ValueError('Invalid groups %d for %d channels.' %
                         (groups, channels))
    if channels % groups != 0:
        raise ValueError('%d channels is not commensurate with %d groups.' %
                         (channels, groups))

    # Determine axes before channels. Some examples of common image formats:
    #  'NCHW': before = [N], after = [HW]
    #  'NHWC': before = [NHW], after = []
    axes_before_channels = input_shape_list[:channels_axis]
    axes_after_channels = input_shape_list[channels_axis + 1:]

    # Manually broadcast the parameters to conform to the number of groups.
    params_shape_broadcast = ([1] * len(axes_before_channels) +
                              [groups, channels // groups] +
                              [1] * len(axes_after_channels))

    # Reshape the input by the group within the channel dimension.
    inputs_shape = (axes_before_channels + [groups, channels // groups] +
                    axes_after_channels)
    inputs = array_ops.reshape(inputs, inputs_shape)

    # Determine the dimensions across which moments are calculated.
    moments_axes = [channels_axis + 1]
    for a in reduction_axes:
        if a > channels_axis:
            moments_axes.append(a + 1)
        else:
            moments_axes.append(a)

    with variable_scope.variable_scope(scope,
                                       'GroupNorm', [inputs],
                                       reuse=reuse) as sc:
        # Note that the params_shape is the number of channels always.
        params_shape = [channels]

        # Allocate parameters for the beta and gamma of the normalization.
        beta, gamma = None, None
        dtype = inputs.dtype.base_dtype
        if param_initializers is None:
            param_initializers = {}
        if center:
            beta_collections = utils.get_variable_collections(
                variables_collections, 'beta')
            beta_initializer = param_initializers.get(
                'beta', init_ops.zeros_initializer())
            beta = variables.model_variable('beta',
                                            shape=params_shape,
                                            dtype=dtype,
                                            initializer=beta_initializer,
                                            collections=beta_collections,
                                            trainable=trainable)
            beta = array_ops.reshape(beta, params_shape_broadcast)

        if scale:
            gamma_collections = utils.get_variable_collections(
                variables_collections, 'gamma')
            gamma_initializer = param_initializers.get(
                'gamma', init_ops.ones_initializer())
            gamma = variables.model_variable('gamma',
                                             shape=params_shape,
                                             dtype=dtype,
                                             initializer=gamma_initializer,
                                             collections=gamma_collections,
                                             trainable=trainable)
            gamma = array_ops.reshape(gamma, params_shape_broadcast)

        # Calculate the moments.
        if mean_close_to_zero:
            # One pass algorithm returns better result when mean is close to zero.
            counts, means_ss, variance_ss, _ = nn.sufficient_statistics(
                inputs, moments_axes, keep_dims=True)
            mean, variance = nn.normalize_moments(counts,
                                                  means_ss,
                                                  variance_ss,
                                                  shift=None)
        else:
            mean, variance = nn.moments(inputs, moments_axes, keep_dims=True)

        # Compute normalization.
        gain = math_ops.rsqrt(variance + epsilon)
        offset = -mean * gain
        if gamma is not None:
            gain *= gamma
            offset *= gamma
        if beta is not None:
            offset += beta
        outputs = inputs * gain + offset

        # Collapse the groups into the channel dimension.
        outputs = array_ops.reshape(outputs, input_shape_list)

        if activation_fn is not None:
            outputs = activation_fn(outputs)
        return utils.collect_named_outputs(outputs_collections, sc.name,
                                           outputs)