def __init__(self,
               num_classes,
               endpoints_num_filters=0,
               aggregation='top',
               dropout_rate=0.0,
               batch_norm_activation=nn_ops.BatchNormActivation(),
               data_format='channels_last'):
    """Initialize params to build classification head.

    Args:
      num_classes: the number of classes, including one background class.
      endpoints_num_filters: the number of filters of the optional embedding
        layer after the multiscale feature aggregation. If 0, no additional
        embedding layer is applied.
      aggregation: the method to aggregate the multiscale feature maps. If
        `top`, the feature map of the highest level will be directly used.
        If `all`, all levels will be used by nearest-neighbor upsampling and
        averaging to the same size as the lowest level (the number of filters
        for all levels should match).
      dropout_rate: the dropout rate of the optional dropout layer. If 0.0, no
        additional dropout layer is applied.
      batch_norm_activation: an operation that includes a batch normalization
        layer followed by an optional activation layer.
      data_format: An optional string from: `channels_last`, `channels_first`.
        Defaults to `channels_last`.
    """
    self._num_classes = num_classes
    self._endpoints_num_filters = endpoints_num_filters
    self._aggregation = aggregation
    self._dropout_rate = dropout_rate
    self._batch_norm_activation = batch_norm_activation
    self._data_format = data_format
示例#2
0
def spinenet_builder(
        model_id,
        min_level=3,
        max_level=7,
        block_specs=build_block_specs(),
        use_native_resize_op=False,
        activation='swish',
        batch_norm_activation=nn_ops.BatchNormActivation(activation='swish'),
        init_drop_connect_rate=None,
        data_format='channels_last'):
    """Builds the SpineNet network."""
    if model_id not in SCALING_MAP:
        raise ValueError(
            'SpineNet {} is not a valid architecture.'.format(model_id))
    scaling_params = SCALING_MAP[model_id]
    return SpineNet(
        min_level=min_level,
        max_level=max_level,
        block_specs=block_specs,
        endpoints_num_filters=scaling_params['endpoints_num_filters'],
        resample_alpha=scaling_params['resample_alpha'],
        use_native_resize_op=use_native_resize_op,
        block_repeats=scaling_params['block_repeats'],
        filter_size_scale=scaling_params['filter_size_scale'],
        activation=activation,
        batch_norm_activation=batch_norm_activation,
        init_drop_connect_rate=init_drop_connect_rate,
        data_format=data_format)
示例#3
0
def resample_feature_map(feat, level, target_level, is_training,
                         target_feat_dims=256,
                         conv2d_op=tf.layers.conv2d,
                         batch_norm_activation=nn_ops.BatchNormActivation(),
                         name=None):
  """Resample input feature map to have target number of channels and width."""
  feat_dims = feat.get_shape().as_list()[3]
  with tf.variable_scope('resample_{}'.format(name)):
    if feat_dims != target_feat_dims:
      feat = conv2d_op(
          feat, filters=target_feat_dims, kernel_size=(1, 1), padding='same')
      feat = batch_norm_activation(
          feat,
          is_training=is_training,
          relu=False,
          name='bn')
    if level < target_level:
      stride = int(2**(target_level-level))
      feat = tf.layers.max_pooling2d(
          inputs=feat,
          pool_size=stride,
          strides=[stride, stride],
          padding='SAME')
    elif level > target_level:
      scale = int(2**(level - target_level))
      feat = spatial_transform_ops.nearest_upsampling(feat, scale=scale)
  return feat
示例#4
0
    def __init__(self,
                 min_level=3,
                 max_level=7,
                 fpn_feat_dims=256,
                 use_separable_conv=False,
                 use_batch_norm=True,
                 batch_norm_activation=nn_ops.BatchNormActivation()):
        """FPN initialization function.

    Args:
      min_level: `int` minimum level in FPN output feature maps.
      max_level: `int` maximum level in FPN output feature maps.
      fpn_feat_dims: `int` number of filters in FPN layers.
      use_separable_conv: `bool`, if True use separable convolution for
        convolution in FPN layers.
      use_batch_norm: 'bool', indicating whether batchnorm layers are added.
      batch_norm_activation: an operation that includes a batch normalization
        layer followed by an optional activation layer.
    """
        self._min_level = min_level
        self._max_level = max_level
        self._fpn_feat_dims = fpn_feat_dims
        if use_separable_conv:
            self._conv2d_op = functools.partial(tf.layers.separable_conv2d,
                                                depth_multiplier=1)
        else:
            self._conv2d_op = tf.layers.conv2d
        self._use_batch_norm = use_batch_norm
        self._batch_norm_activation = batch_norm_activation
    def __init__(
        self,
        num_classes,
        num_attributes,
        num_convs=0,
        num_filters=256,
        use_separable_conv=False,
        num_fcs=2,
        fc_dims=1024,
        activation='relu',
        use_batch_norm=True,
        batch_norm_activation=nn_ops.BatchNormActivation(activation='relu')):
        """Initialize params to build Fast R-CNN head with attribute prediction.
    Args:
      num_classes: an integer for the number of classes.
      num_attributes: an integer for the number of attributes.
      num_convs: `int` number that represents the number of the intermediate
        conv layers before the FC layers.
      num_filters: `int` number that represents the number of filters of the
        intermediate conv layers.
      use_separable_conv: `bool`, indicating whether the separable conv layers
        is used.
      num_fcs: `int` number that represents the number of FC layers before the
        predictions.
      fc_dims: `int` number that represents the number of dimension of the FC
        layers.
      activation: activation function. Support 'relu' and 'swish'.
      use_batch_norm: 'bool', indicating whether batchnorm layers are added.
      batch_norm_activation: an operation that includes a batch normalization
        layer followed by an optional activation layer.
    """
        self._num_classes = num_classes
        self._num_attributes = num_attributes

        self._num_convs = num_convs
        self._num_filters = num_filters
        if use_separable_conv:
            self._conv2d_op = functools.partial(
                tf.layers.separable_conv2d,
                depth_multiplier=1,
                bias_initializer=tf.zeros_initializer())
        else:
            self._conv2d_op = functools.partial(
                tf.layers.conv2d,
                kernel_initializer=tf.keras.initializers.VarianceScaling(
                    scale=2, mode='fan_out',
                    distribution='untruncated_normal'),
                bias_initializer=tf.zeros_initializer())

        self._num_fcs = num_fcs
        self._fc_dims = fc_dims
        if activation == 'relu':
            self._activation = tf.nn.relu
        elif activation == 'swish':
            self._activation = tf.nn.swish
        else:
            raise ValueError(
                'Activation {} not implemented.'.format(activation))
        self._use_batch_norm = use_batch_norm
        self._batch_norm_activation = batch_norm_activation
示例#6
0
def batch_norm_activation_generator(params):
  return nn_ops.BatchNormActivation(
      momentum=params.batch_norm_momentum,
      epsilon=params.batch_norm_epsilon,
      trainable=params.batch_norm_trainable,
      use_sync_bn=params.use_sync_bn,
      activation=params.activation)
示例#7
0
    def __init__(self,
                 min_level=3,
                 max_level=7,
                 block_specs=build_block_specs(),
                 endpoints_num_filters=48,
                 use_native_resize_op=False,
                 se_ratio=0.2,
                 block_repeats=1,
                 filter_size_scale=1.0,
                 activation='swish',
                 batch_norm_activation=nn_ops.BatchNormActivation(
                     activation='swish'),
                 init_drop_connect_rate=None,
                 data_format='channels_last'):
        """SpineNetMBConv initialization function.

    Args:
      min_level: `int` minimum level in SpineNet endpoints.
      max_level: `int` maximum level in SpineNet endpoints.
      block_specs: a list of BlockSpec objects that specifies the SpineNet
        network topology. By default, the previously discovered architecture is
        used.
      endpoints_num_filters: `int` feature dimension applied to endpoints before
        sharing conv layers in head.
      use_native_resize_op: Whether to use native
        tf.image.nearest_neighbor_resize or the broadcast implmentation to do
        upsampling.
      se_ratio: squeeze and excitation ratio for MBConv blocks.
      block_repeats: `int` number of repeats per block.
      filter_size_scale: `float` a scaling factor to uniformaly scale feature
        dimension in SpineNet.
      activation: the activation function after cross-scale feature fusion.
        Support 'relu' and 'swish'.
      batch_norm_activation: An operation that includes a batch normalization
        layer followed by an optional activation layer.
      init_drop_connect_rate: `float` initial drop connect rate.
      data_format: An optional string from: "channels_last", "channels_first".
        Defaults to "channels_last".
    """
        self._min_level = min_level
        self._max_level = max_level
        self._block_specs = block_specs
        self._endpoints_num_filters = endpoints_num_filters
        self._use_native_resize_op = use_native_resize_op
        self._se_ratio = se_ratio
        self._block_repeats = block_repeats
        self._filter_size_scale = filter_size_scale
        if activation == 'relu':
            self._activation = tf.nn.relu
        elif activation == 'swish':
            self._activation = tf.nn.swish
        else:
            raise ValueError(
                'Activation {} not implemented.'.format(activation))
        self._batch_norm_activation = batch_norm_activation
        self._init_dc_rate = init_drop_connect_rate
        self._data_format = data_format
        self._dropblock = nn_ops.Dropblock()
  def __init__(self,
               min_level,
               max_level,
               anchors_per_location,
               num_convs=2,
               num_filters=256,
               use_separable_conv=False,
               activation='relu',
               use_batch_norm=True,
               batch_norm_activation=nn_ops.BatchNormActivation(
                   activation='relu')):
    """Initialize params to build Region Proposal Network head.

    Args:
      min_level: `int` number of minimum feature level.
      max_level: `int` number of maximum feature level.
      anchors_per_location: `int` number of number of anchors per pixel
        location.
      num_convs: `int` number that represents the number of the intermediate
        conv layers before the prediction.
      num_filters: `int` number that represents the number of filters of the
        intermediate conv layers.
      use_separable_conv: `bool`, indicating whether the separable conv layers
        is used.
      activation: activation function. Support 'relu' and 'swish'.
      use_batch_norm: 'bool', indicating whether batchnorm layers are added.
      batch_norm_activation: an operation that includes a batch normalization
        layer followed by an optional activation layer.
    """
    self._min_level = min_level
    self._max_level = max_level
    self._anchors_per_location = anchors_per_location

    self._num_convs = num_convs
    self._num_filters = num_filters
    if use_separable_conv:
      self._conv2d_op = functools.partial(
          tf.layers.separable_conv2d,
          depth_multiplier=1,
          bias_initializer=tf.zeros_initializer())
    else:
      self._conv2d_op = functools.partial(
          tf.layers.conv2d,
          kernel_initializer=tf.random_normal_initializer(stddev=0.01),
          bias_initializer=tf.zeros_initializer())

    self._use_batch_norm = use_batch_norm
    if activation == 'relu':
      self._activation = tf.nn.relu
    elif activation == 'swish':
      self._activation = tf.nn.swish
    else:
      raise ValueError('Activation {} not implemented.'.format(activation))
    self._batch_norm_activation = batch_norm_activation
示例#9
0
def resample_with_sepconv(feat,
                          target_width,
                          target_num_filters,
                          use_native_resize_op=False,
                          batch_norm_activation=nn_ops.BatchNormActivation(),
                          data_format='channels_last',
                          name=None,
                          is_training=False):
    """Match resolution and feature dimension to the target block."""
    _, height, width, num_filters = feat.get_shape().as_list()
    if width is None or num_filters is None:
        raise ValueError('Shape of feat is None (shape:{}).'.format(
            feat.shape))

    with tf.variable_scope('resample_with_sepconv_{}'.format(name)):
        # Down-sample.
        if width > target_width:
            if width % target_width != 0:
                raise ValueError('width ({}) is not divisible by '
                                 'target_width ({}).'.format(
                                     width, target_width))

            while width > target_width:
                feat = nn_ops.depthwise_conv2d_fixed_padding(
                    inputs=feat,
                    kernel_size=3,
                    strides=2,
                    data_format=data_format)
                feat = batch_norm_activation(feat, is_training=is_training)
                width /= 2

        # Up-sample with NN interpolation.
        elif width < target_width:
            if target_width % width != 0:
                raise ValueError('target_wdith ({}) is not divisible by '
                                 'width ({}).'.format(target_width, width))
            scale = target_width // width
            if use_native_resize_op:
                feat = tf.image.resize_nearest_neighbor(
                    feat, [height * scale, width * scale])
            else:
                feat = spatial_transform_ops.nearest_upsampling(feat,
                                                                scale=scale)

        # Match feature dimension to the target block.
        feat = nn_ops.conv2d_fixed_padding(inputs=feat,
                                           filters=target_num_filters,
                                           kernel_size=1,
                                           strides=1,
                                           data_format=data_format)
        feat = batch_norm_activation(feat, relu=False, is_training=is_training)

    return feat
示例#10
0
def block_group(inputs,
                filters,
                strides,
                block_fn_cand,
                block_repeats,
                activation=tf.nn.swish,
                batch_norm_activation=nn_ops.BatchNormActivation(),
                dropblock=nn_ops.Dropblock(),
                drop_connect_rate=None,
                data_format='channels_last',
                name=None,
                is_training=False):
  """Creates one group of blocks for SpineNet."""
  block_fn_candidates = {
      'bottleneck': nn_blocks.bottleneck_block,
      'residual': nn_blocks.residual_block,
  }
  if block_fn_cand not in block_fn_candidates:
    raise ValueError('Block function {} not implemented.'.format(block_fn_cand))

  block_fn = block_fn_candidates[block_fn_cand]
  _, _, _, num_filters = inputs.get_shape().as_list()

  if block_fn_cand == 'bottleneck':
    use_projection = not (num_filters == (filters * 4) and strides == 1)
  else:
    use_projection = not (num_filters == filters and strides == 1)

  # Only the first block per block_group uses projection shortcut and strides.
  inputs = block_fn(
      inputs,
      filters,
      strides,
      use_projection=use_projection,
      activation=activation,
      batch_norm_activation=batch_norm_activation,
      dropblock=dropblock,
      drop_connect_rate=drop_connect_rate,
      data_format=data_format,
      is_training=is_training)
  for _ in range(1, block_repeats):
    inputs = block_fn(
        inputs,
        filters,
        1,
        use_projection=False,
        activation=activation,
        batch_norm_activation=batch_norm_activation,
        dropblock=dropblock,
        drop_connect_rate=drop_connect_rate,
        data_format=data_format,
        is_training=is_training)
  return tf.identity(inputs, name)
示例#11
0
文件: nasfpn.py 项目: zwq1230/tpu
def block_group(inputs,
                filters,
                strides,
                block_fn,
                block_repeats,
                conv2d_op=None,
                activation=tf.nn.swish,
                batch_norm_activation=nn_ops.BatchNormActivation(),
                dropblock=nn_ops.Dropblock(),
                drop_connect_rate=None,
                data_format='channels_last',
                name=None,
                is_training=False):
    """Creates one group of blocks for NAS-FPN."""
    if block_fn == 'conv':
        inputs = conv2d_op(inputs,
                           filters=filters,
                           kernel_size=(3, 3),
                           padding='same',
                           data_format=data_format,
                           name='conv')
        inputs = batch_norm_activation(inputs,
                                       is_training=is_training,
                                       relu=False,
                                       name='bn')
        inputs = dropblock(inputs, is_training=is_training)
        return inputs

    if block_fn != 'bottleneck':
        raise ValueError('Block function {} not implemented.'.format(block_fn))
    _, _, _, num_filters = inputs.get_shape().as_list()
    block_fn = nn_blocks.bottleneck_block
    use_projection = not (num_filters == (filters * 4) and strides == 1)

    return resnet.block_group(inputs=inputs,
                              filters=filters,
                              strides=strides,
                              use_projection=use_projection,
                              block_fn=block_fn,
                              block_repeats=block_repeats,
                              activation=activation,
                              batch_norm_activation=batch_norm_activation,
                              dropblock=dropblock,
                              drop_connect_rate=drop_connect_rate,
                              data_format=data_format,
                              name=name,
                              is_training=is_training)
  def __init__(self,
               min_level,
               max_level,
               num_classes,
               anchors_per_location,
               num_convs=4,
               num_filters=256,
               use_separable_conv=False,
               activation='relu',
               use_batch_norm=True,
               batch_norm_activation=nn_ops.BatchNormActivation(
                   activation='relu')):
    """Initialize params to build RetinaNet head.

    Args:
      min_level: `int` number of minimum feature level.
      max_level: `int` number of maximum feature level.
      num_classes: `int` number of classification categories.
      anchors_per_location: `int` number of anchors per pixel location.
      num_convs: `int` number of stacked convolution before the last prediction
        layer.
      num_filters: `int` number of filters used in the head architecture.
      use_separable_conv: `bool` to indicate whether to use separable
        convoluation.
      activation: activation function. Support 'relu' and 'swish'.
      use_batch_norm: 'bool', indicating whether batchnorm layers are added.
      batch_norm_activation: an operation that includes a batch normalization
        layer followed by an optional activation layer.
    """
    self._min_level = min_level
    self._max_level = max_level

    self._num_classes = num_classes
    self._anchors_per_location = anchors_per_location

    self._num_convs = num_convs
    self._num_filters = num_filters
    self._use_separable_conv = use_separable_conv
    if activation == 'relu':
      self._activation = tf.nn.relu
    elif activation == 'swish':
      self._activation = tf.nn.swish
    else:
      raise ValueError('Activation {} not implemented.'.format(activation))
    self._use_batch_norm = use_batch_norm
    self._batch_norm_activation = batch_norm_activation
示例#13
0
    def __init__(self,
                 block_specs=build_block_specs(),
                 batch_norm_activation=nn_ops.BatchNormActivation(),
                 data_format='channels_last'):
        """EfficientNet initialization function.

    Args:
      block_specs: a list of BlockSpec objects that specifies the EfficientNet
        network. By default, the previously discovered EfficientNet-A1 is used.
      batch_norm_activation: an operation that includes a batch normalization
        layer followed by an optional activation layer.
      data_format: An optional string from: "channels_last", "channels_first".
        Defaults to "channels_last".
    """
        self._block_specs = block_specs
        self._batch_norm_activation = batch_norm_activation
        self._data_format = data_format
示例#14
0
def block_group(inputs,
                in_filters,
                out_filters,
                strides,
                expand_ratio,
                block_repeats,
                se_ratio=0.2,
                batch_norm_activation=nn_ops.BatchNormActivation(),
                dropblock=nn_ops.Dropblock(),
                drop_connect_rate=None,
                data_format='channels_last',
                name=None,
                is_training=False):
    """Creates one group of blocks for Mobile SpineNet."""
    # Apply strides only to the first block in block_group.
    inputs = nn_blocks.mbconv_block(
        inputs,
        in_filters,
        out_filters,
        expand_ratio,
        strides,
        se_ratio=se_ratio,
        batch_norm_activation=batch_norm_activation,
        dropblock=dropblock,
        drop_connect_rate=drop_connect_rate,
        data_format=data_format,
        is_training=is_training)
    for _ in range(1, block_repeats):
        inputs = nn_blocks.mbconv_block(
            inputs,
            out_filters,
            out_filters,
            expand_ratio,
            1,  # strides
            se_ratio=se_ratio,
            batch_norm_activation=batch_norm_activation,
            dropblock=dropblock,
            drop_connect_rate=drop_connect_rate,
            data_format=data_format,
            is_training=is_training)
    return tf.identity(inputs, name)
  def __init__(self,
               num_classes,
               level,
               num_convs=2,
               upsample_factor=1,
               upsample_num_filters=256,
               activation='relu',
               use_batch_norm=True,
               batch_norm_activation=nn_ops.BatchNormActivation(
                   activation='relu')):
    """Initialize params to build segmentation head.

    Args:
      num_classes: `int` number of mask classification categories. The number of
        classes does not include background class.
      level: `int` feature level used for prediction.
      num_convs: `int` number of stacked convolution before the last prediction
        layer.
      upsample_factor: `int` number to specify the upsampling factor to generate
        finer mask. Default 1 means no upsampling is applied.
      upsample_num_filters: `int` number to specify the number of filters used
        in deconv for the upsampling operation. Default is 256.
      activation: activation function. Support 'relu' and 'swish'.
      use_batch_norm: 'bool', indicating whether batchnorm layers are added.
      batch_norm_activation: an operation that includes a batch normalization
        layer followed by an optional activation layer.
    """
    self._num_classes = num_classes
    self._level = level
    self._num_convs = num_convs
    self._upsample_factor = upsample_factor
    self._upsample_num_filters = upsample_num_filters
    if activation == 'relu':
      self._activation = tf.nn.relu
    elif activation == 'swish':
      self._activation = tf.nn.swish
    else:
      raise ValueError('Activation {} not implemented.'.format(activation))
    self._use_batch_norm = use_batch_norm
    self._batch_norm_activation = batch_norm_activation
示例#16
0
def resample_with_alpha(feat,
                        input_block_fn,
                        target_width,
                        target_num_filters,
                        target_block_fn,
                        alpha=1.0,
                        use_native_resize_op=False,
                        batch_norm_activation=nn_ops.BatchNormActivation(),
                        data_format='channels_last',
                        name=None,
                        is_training=False):
    """Match resolution and feature dimension to the target block."""
    _, height, width, num_filters = feat.get_shape().as_list()
    if width is None or num_filters is None:
        raise ValueError('Shape of feat is None (shape:{}).'.format(
            feat.shape))

    if input_block_fn == 'bottleneck':
        num_filters /= 4
    new_num_filters = int(num_filters * alpha)

    with tf.variable_scope('resample_with_alpha_{}'.format(name)):
        # First 1x1 conv to reduce feature dimension to alpha*.
        feat = nn_ops.conv2d_fixed_padding(inputs=feat,
                                           filters=new_num_filters,
                                           kernel_size=1,
                                           strides=1,
                                           data_format=data_format)
        feat = batch_norm_activation(feat, is_training=is_training)

        # Down-sample.
        if width > target_width:
            # Apply stride-2 conv to reduce feature map size to 1/2.
            feat = nn_ops.conv2d_fixed_padding(inputs=feat,
                                               filters=new_num_filters,
                                               kernel_size=3,
                                               strides=2,
                                               data_format=data_format)
            feat = batch_norm_activation(feat, is_training=is_training)
            # Apply maxpool to further reduce feature map size if necessary.
            if width // target_width > 2:
                if width % target_width != 0:
                    stride_size = 2
                else:
                    stride_size = width // target_width // 2
                feat = tf.layers.max_pooling2d(
                    inputs=feat,
                    pool_size=3 if width / target_width <= 4 else 5,
                    strides=stride_size,
                    padding='SAME',
                    data_format=data_format)
            # Use NN interpolation to resize if necessary. This could happen in cases
            # where `wdith` is not divisible by `target_width`.
            if feat.get_shape().as_list()[2] != target_width:
                feat = spatial_transform_ops.native_resize(
                    feat, [int(target_width / width * height), target_width])
        # Up-sample with NN interpolation.
        elif width < target_width:
            if target_width % width != 0 or use_native_resize_op:
                feat = spatial_transform_ops.native_resize(
                    feat, [int(target_width / width * height), target_width])
            else:
                scale = target_width // width
                feat = spatial_transform_ops.nearest_upsampling(feat,
                                                                scale=scale)

        # Match feature dimension to the target block.
        if target_block_fn == 'bottleneck':
            target_num_filters *= 4
        feat = nn_ops.conv2d_fixed_padding(inputs=feat,
                                           filters=target_num_filters,
                                           kernel_size=1,
                                           strides=1,
                                           data_format=data_format)
        feat = batch_norm_activation(feat, relu=False, is_training=is_training)

    return feat
示例#17
0
    def __init__(self,
                 resnet_depth,
                 dropblock=nn_ops.Dropblock(),
                 activation='relu',
                 batch_norm_activation=nn_ops.BatchNormActivation(),
                 init_drop_connect_rate=None,
                 data_format='channels_last',
                 space_to_depth_block_size=1):
        """ResNet initialization function.

    Args:
      resnet_depth: `int` depth of ResNet backbone model.
      dropblock: a dropblock layer.
      activation: activation function. Support 'relu' and 'swish'.
      batch_norm_activation: an operation that includes a batch normalization
        layer followed by an optional activation layer.
      init_drop_connect_rate: a 'float' number that specifies the initial drop
        connection rate. Note that the default `None` means no drop connection
        is applied.
      data_format: `str` either "channels_first" for `[batch, channels, height,
        width]` or "channels_last for `[batch, height, width, channels]`.
      space_to_depth_block_size: an integer indicates the block size of
        space-to-depth convolution for conv0. `0` means use the original conv2d
        in ResNet
    """
        self._resnet_depth = resnet_depth

        self._dropblock = dropblock
        if activation == 'relu':
            self._activation = tf.nn.relu
        elif activation == 'swish':
            self._activation = tf.nn.swish
        else:
            raise ValueError(
                'Activation {} not implemented.'.format(activation))
        self._batch_norm_activation = batch_norm_activation
        self._init_drop_connect_rate = init_drop_connect_rate

        self._data_format = data_format
        self._space_to_depth_block_size = space_to_depth_block_size

        model_params = {
            10: {
                'block': nn_blocks.residual_block,
                'layers': [1, 1, 1, 1]
            },
            14: {
                'block': nn_blocks.bottleneck_block,
                'layers': [1, 1, 1, 1]
            },
            18: {
                'block': nn_blocks.residual_block,
                'layers': [2, 2, 2, 2]
            },
            26: {
                'block': nn_blocks.bottleneck_block,
                'layers': [2, 2, 2, 2]
            },
            34: {
                'block': nn_blocks.residual_block,
                'layers': [3, 4, 6, 3]
            },
            50: {
                'block': nn_blocks.bottleneck_block,
                'layers': [3, 4, 6, 3]
            },
            101: {
                'block': nn_blocks.bottleneck_block,
                'layers': [3, 4, 23, 3]
            },
            152: {
                'block': nn_blocks.bottleneck_block,
                'layers': [3, 8, 36, 3]
            },
            200: {
                'block': nn_blocks.bottleneck_block,
                'layers': [3, 24, 36, 3]
            }
        }

        if resnet_depth not in model_params:
            valid_resnet_depths = ', '.join(
                [str(depth) for depth in sorted(model_params.keys())])
            raise ValueError(
                'The resnet_depth should be in [%s]. Not a valid resnet_depth:'
                % (valid_resnet_depths), self._resnet_depth)
        params = model_params[resnet_depth]
        self._resnet_fn = self.resnet_v1_generator(
            params['block'], params['layers'], self._space_to_depth_block_size)
示例#18
0
def block_group(inputs,
                filters,
                strides,
                use_projection,
                block_fn,
                block_repeats,
                activation=tf.nn.relu,
                batch_norm_activation=nn_ops.BatchNormActivation(),
                dropblock=nn_ops.Dropblock(),
                drop_connect_rate=None,
                data_format='channels_last',
                name=None,
                is_training=False):
    """Builds one group of blocks.

  Args:
    inputs: a `Tensor` of size `[batch, channels, height, width]`.
    filters: an `int` number of filters for the first two convolutions.
    strides: an `int` block stride. If greater than 1, this block will
      ultimately downsample the input.
    use_projection: a `bool` for whether this block should use a projection
      shortcut (versus the default identity shortcut). This is usually `True`
      for the first block of a block group, which may change the number of
      filters and the resolution.
    block_fn: the `function` for the block to use within the model
    block_repeats: an `int` number of blocks to repeat in the group.
    activation: activation function. Support 'relu' and 'swish'.
    batch_norm_activation: an operation that includes a batch normalization
      layer followed by an optional activation layer.
    dropblock: a drop block layer that is added after convluations. Note that
      the default implementation does not apply any drop block.
    drop_connect_rate: a 'float' number that specifies the drop connection rate
      of the block. Note that the default `None` means no drop connection is
      applied.
    data_format: a `str` that specifies the data format.
    name: a `str` name for the Tensor output of the block layer.
    is_training: a `bool` if True, the model is in training mode.

  Returns:
    The output `Tensor` of the block layer.
  """
    # Only the first block per block_group uses projection shortcut and strides.
    inputs = block_fn(inputs,
                      filters,
                      strides,
                      use_projection=use_projection,
                      activation=activation,
                      batch_norm_activation=batch_norm_activation,
                      dropblock=dropblock,
                      drop_connect_rate=drop_connect_rate,
                      data_format=data_format,
                      is_training=is_training)
    for _ in range(1, block_repeats):
        inputs = block_fn(inputs,
                          filters,
                          1,
                          use_projection=False,
                          activation=activation,
                          batch_norm_activation=batch_norm_activation,
                          dropblock=dropblock,
                          drop_connect_rate=drop_connect_rate,
                          data_format=data_format,
                          is_training=is_training)
    return tf.identity(inputs, name)
示例#19
0
    def __call__(self, images, is_training=False):
        """Generate a multiscale feature pyramid.

    Args:
      images: The input image tensor.
      is_training: `bool` if True, the model is in training mode.

    Returns:
      a `dict` containing `int` keys for continuous feature levels
      [min_level, min_level + 1, ..., max_level]. The values are corresponding
      features with shape [batch_size, height_l, width_l,
      endpoints_num_filters].
    """
        x = images
        with tf.variable_scope('efficientnet'):
            x = nn_ops.conv2d_fixed_padding(inputs=x,
                                            filters=32,
                                            kernel_size=3,
                                            strides=2,
                                            data_format=self._data_format)
            x = tf.identity(x, 'initial_conv')
            x = self._batch_norm_activation(x, is_training=is_training)

            endpoints = []
            for i, block_spec in enumerate(self._block_specs):
                bn_act = nn_ops.BatchNormActivation(
                    activation=block_spec.act_fn)
                with tf.variable_scope('block_{}'.format(i)):
                    for j in range(block_spec.num_repeats):
                        strides = (
                            1 if j > 0 else
                            efficientnet_constants.EFFICIENTNET_STRIDES[i])

                        if block_spec.block_fn == 'conv':
                            x = nn_ops.conv2d_fixed_padding(
                                inputs=x,
                                filters=block_spec.output_filters,
                                kernel_size=block_spec.kernel_size,
                                strides=strides,
                                data_format=self._data_format)
                            x = bn_act(x, is_training=is_training)
                        elif block_spec.block_fn == 'mbconv':
                            x_shape = x.get_shape().as_list()
                            in_filters = (x_shape[1] if self._data_format
                                          == 'channel_first' else x_shape[-1])
                            x = nn_blocks.mbconv_block(
                                inputs=x,
                                in_filters=in_filters,
                                out_filters=block_spec.output_filters,
                                expand_ratio=block_spec.expand_ratio,
                                strides=strides,
                                kernel_size=block_spec.kernel_size,
                                se_ratio=block_spec.se_ratio,
                                batch_norm_activation=bn_act,
                                data_format=self._data_format,
                                is_training=is_training)
                        elif block_spec.block_fn == 'fused_mbconv':
                            x_shape = x.get_shape().as_list()
                            in_filters = (x_shape[1] if self._data_format
                                          == 'channel_first' else x_shape[-1])
                            x = nn_blocks.fused_mbconv_block(
                                inputs=x,
                                in_filters=in_filters,
                                out_filters=block_spec.output_filters,
                                expand_ratio=block_spec.expand_ratio,
                                strides=strides,
                                kernel_size=block_spec.kernel_size,
                                se_ratio=block_spec.se_ratio,
                                batch_norm_activation=bn_act,
                                data_format=self._data_format,
                                is_training=is_training)
                        else:
                            raise ValueError(
                                'Un-supported block_fn `{}`!'.format(
                                    block_spec.block_fn))
                    x = tf.identity(x, 'endpoints')
                    endpoints.append(x)

        return {
            2: endpoints[1],
            3: endpoints[2],
            4: endpoints[4],
            5: endpoints[6]
        }
示例#20
0
    def __init__(
            self,
            num_classes,
            num_convs=0,
            num_filters=256,
            use_separable_conv=False,
            num_fcs=2,
            fc_dims=1024,
            # for vild classifier: start
            clip_dim=512,
            classifier_weight_path=None,
            normalize_classifier=False,
            normalize_visual=False,
            temperature=1.0,
            # feature distillation
            visual_feature_distill=None,
            max_distill_rois=300,
            # for vild classifier: end
            activation='relu',
            use_batch_norm=True,
            batch_norm_activation=nn_ops.BatchNormActivation(
                activation='relu'),
            class_agnostic_bbox_pred=False):
        """Initialize params to build Fast R-CNN box head.


    Args:
      num_classes: an integer for the number of classes.
      num_convs: `int` number that represents the number of the intermediate
        conv layers before the FC layers.
      num_filters: `int` number that represents the number of filters of the
        intermediate conv layers.
      use_separable_conv: `bool`, indicating whether the separable conv layers
        is used.
      num_fcs: `int` number that represents the number of FC layers before the
        predictions.
      fc_dims: `int` number that represents the number of dimension of the FC
        layers.
      clip_dim: `int` number that represents the number of dimension of the CLIP
        text embeddings.
      classifier_weight_path: `str` for the text embeddings used as classifier.
      normalize_classifier: `bool`, indicating whether to normalize the
        classifier.
      normalize_visual: indication whether to normalize the visual features used
        for classification.
      temperature: `float`, temperature applied to the logits.
      visual_feature_distill: None or `str` in ['vanilla', 'double_branch'] to
        specify the type of visual feature distillation.
      max_distill_rois: `int`, specify the number of precomputed rois used for
        distillation.
      activation: activation function. Support 'relu' and 'swish'.
      use_batch_norm: 'bool', indicating whether batchnorm layers are added.
      batch_norm_activation: an operation that includes a batch normalization
        layer followed by an optional activation layer.
      class_agnostic_bbox_pred: `bool`, indicating whether bboxes should be
        predicted for every class or not.
    """
        self._num_classes = num_classes

        self._num_convs = num_convs
        self._num_filters = num_filters
        if use_separable_conv:
            self._conv2d_op = functools.partial(
                tf.layers.separable_conv2d,
                depth_multiplier=1,
                bias_initializer=tf.zeros_initializer())
        else:
            self._conv2d_op = functools.partial(
                tf.layers.conv2d,
                kernel_initializer=tf.keras.initializers.VarianceScaling(
                    scale=2, mode='fan_out',
                    distribution='untruncated_normal'),
                bias_initializer=tf.zeros_initializer())

        self._num_fcs = num_fcs
        self._fc_dims = fc_dims
        if activation == 'relu':
            self._activation = tf.nn.relu
        elif activation == 'swish':
            self._activation = tf.nn.swish
        else:
            raise ValueError(
                'Activation {} not implemented.'.format(activation))
        self._use_batch_norm = use_batch_norm
        self._batch_norm_activation = batch_norm_activation
        self._class_agnostic_bbox_pred = class_agnostic_bbox_pred

        # clip classifier related
        self._clip_dim = clip_dim

        self._classifier_weight_path = classifier_weight_path
        assert tf.gfile.Exists(self._classifier_weight_path)

        self._normalize_classifier = normalize_classifier
        self._normalize_visual = normalize_visual
        self._temperature = temperature

        # feature distill
        self._feat_distill = visual_feature_distill
        self._max_distill_rois = max_distill_rois

        assert self._normalize_classifier and self._normalize_visual
示例#21
0
def fused_mbconv_block(inputs,
                       in_filters,
                       out_filters,
                       expand_ratio,
                       strides,
                       kernel_size=3,
                       se_ratio=None,
                       batch_norm_activation=nn_ops.BatchNormActivation(),
                       dropblock=nn_ops.Dropblock(),
                       drop_connect_rate=None,
                       data_format='channels_last',
                       is_training=False):
    """The fused bottleneck block with BN and DropBlock after convolutions.

  Args:
    inputs: a `Tensor` of size `[batch, channels, height, width]`.
    in_filters: a `int` number of filters for the input feature map.
    out_filters: a `int` number of filters for the output feature map.
    expand_ratio: a `int` number as the feature dimension expansion ratio.
    strides: a `int` block stride. If greater than 1, this block will ultimately
      downsample the input.
    kernel_size: kernel size for the depthwise convolution.
    se_ratio: squeeze and excitation ratio.
    batch_norm_activation: an operation that includes a batch normalization
      layer followed by an optional activation layer.
    dropblock: a drop block layer that is added after convluations. Note that
      the default implementation does not apply any drop block.
    drop_connect_rate: a 'float' number that specifies the drop connection rate
      of the block. Note that the default `None` means no drop connection is
      applied.
    data_format: a `str` that specifies the data format.
    is_training: a `bool` if True, the model is in training mode.

  Returns:
    The output `Tensor` of the block.
  """
    tf.logging.info('-----> Building fused mbconv block.')
    shortcut = inputs

    # First 1x1 conv for channel expansion.
    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=in_filters * expand_ratio,
                                         kernel_size=kernel_size,
                                         strides=strides,
                                         data_format=data_format)
    inputs = batch_norm_activation(inputs, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    # Squeeze and excitation.
    if se_ratio is not None and se_ratio > 0 and se_ratio <= 1:
        inputs = nn_ops.squeeze_excitation(inputs,
                                           in_filters,
                                           se_ratio,
                                           expand_ratio=expand_ratio,
                                           data_format=data_format)

    # Third 1x1 conv for reversed bottleneck.
    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=out_filters,
                                         kernel_size=1,
                                         strides=1,
                                         data_format=data_format)
    inputs = batch_norm_activation(inputs, relu=False, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    if in_filters == out_filters and strides == 1:
        if drop_connect_rate:
            inputs = nn_ops.drop_connect(inputs, is_training,
                                         drop_connect_rate)
        inputs = tf.add(inputs, shortcut)

    return inputs
示例#22
0
def bottleneck_block(inputs,
                     filters,
                     strides,
                     use_projection,
                     activation=tf.nn.relu,
                     batch_norm_activation=nn_ops.BatchNormActivation(),
                     dropblock=nn_ops.Dropblock(),
                     drop_connect_rate=None,
                     data_format='channels_last',
                     is_training=False):
    """The bottleneck block with BN and DropBlock after convolutions.

  Args:
    inputs: a `Tensor` of size `[batch, channels, height, width]`.
    filters: a `int` number of filters for the first two convolutions. Note that
      the third and final convolution will use 4 times as many filters.
    strides: an `int` block stride. If greater than 1, this block will
      ultimately downsample the input.
    use_projection: a `bool` for whether this block should use a projection
      shortcut (versus the default identity shortcut). This is usually `True`
      for the first block of a block group, which may change the number of
      filters and the resolution.
    activation: activation function. Support 'relu' and 'swish'.
    batch_norm_activation: an operation that includes a batch normalization
      layer followed by an optional activation layer.
    dropblock: a drop block layer that is added after convluations. Note that
      the default implementation does not apply any drop block.
    drop_connect_rate: a 'float' number that specifies the drop connection rate
      of the block. Note that the default `None` means no drop connection is
      applied.
    data_format: a `str` that specifies the data format.
    is_training: a `bool` if True, the model is in training mode.

  Returns:
    The output `Tensor` of the block.
  """
    logging.info('-----> Building bottleneck block.')
    shortcut = inputs
    if use_projection:
        out_filters = 4 * filters
        shortcut = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                               filters=out_filters,
                                               kernel_size=1,
                                               strides=strides,
                                               data_format=data_format)
        shortcut = batch_norm_activation(shortcut,
                                         relu=False,
                                         is_training=is_training)
    shortcut = dropblock(shortcut, is_training=is_training)

    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=filters,
                                         kernel_size=1,
                                         strides=1,
                                         data_format=data_format)
    inputs = batch_norm_activation(inputs, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=filters,
                                         kernel_size=3,
                                         strides=strides,
                                         data_format=data_format)
    inputs = batch_norm_activation(inputs, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=4 * filters,
                                         kernel_size=1,
                                         strides=1,
                                         data_format=data_format)
    inputs = batch_norm_activation(inputs, relu=False, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    if drop_connect_rate:
        inputs = nn_ops.drop_connect(inputs, is_training, drop_connect_rate)

    return activation(inputs + shortcut)
示例#23
0
    def __init__(self,
                 min_level=3,
                 max_level=7,
                 block_specs=build_block_specs(),
                 endpoints_num_filters=256,
                 resample_alpha=0.5,
                 use_native_resize_op=False,
                 block_repeats=1,
                 filter_size_scale=1.0,
                 activation='swish',
                 batch_norm_activation=nn_ops.BatchNormActivation(
                     activation='swish'),
                 init_drop_connect_rate=None,
                 data_format='channels_last'):
        """SpineNet initialization function.

    Args:
      min_level: an `int` representing the minimum level in SpineNet endpoints.
      max_level: an `int` representing the maximum level in SpineNet endpoints.
      block_specs: a list of BlockSpec objects that specifies the SpineNet
        network topology. By default, the previously discovered architecture is
        used.
      endpoints_num_filters: an `int` representing the final feature dimension
        of endpoints before the shared conv layers in head.
      resample_alpha: a `float` representing the scaling factor to scale feature
        dimension before resolution resampling.
      use_native_resize_op: Whether to use native
        tf.image.nearest_neighbor_resize or the broadcast implmentation to do
        upsampling.
      block_repeats: an `int` representing the number of repeats per block
        group.
      filter_size_scale: a `float` representing the scaling factor to uniformaly
        scale feature dimension in SpineNet.
      activation: activation function. Support 'relu' and 'swish'.
      batch_norm_activation: an operation that includes a batch normalization
        layer followed by an optional activation layer.
      init_drop_connect_rate: a 'float' number that specifies the initial drop
        connection rate. Note that the default `None` means no drop connection
        is applied.
      data_format: An optional string from: "channels_last", "channels_first".
        Defaults to "channels_last".
    """
        self._min_level = min_level
        self._max_level = max_level
        self._block_specs = block_specs
        self._endpoints_num_filters = endpoints_num_filters
        self._use_native_resize_op = use_native_resize_op
        self._resample_alpha = resample_alpha
        self._block_repeats = block_repeats
        self._filter_size_scale = filter_size_scale
        if activation == 'relu':
            self._activation = tf.nn.relu
        elif activation == 'swish':
            self._activation = tf.nn.swish
        else:
            raise ValueError(
                'Activation {} not implemented.'.format(activation))
        self._batch_norm_activation = batch_norm_activation
        self._init_drop_connect_rate = init_drop_connect_rate
        self._data_format = data_format
        self._dropblock = nn_ops.Dropblock(
        )  # Hard-code it to not use DropBlock.
        self._init_block_fn = 'bottleneck'
        self._num_init_blocks = 2
示例#24
0
文件: nasfpn.py 项目: zwq1230/tpu
    def __init__(self,
                 min_level=3,
                 max_level=7,
                 block_specs=build_block_specs(),
                 fpn_feat_dims=256,
                 num_repeats=7,
                 use_separable_conv=False,
                 dropblock=nn_ops.Dropblock(),
                 block_fn='conv',
                 block_repeats=1,
                 activation='relu',
                 batch_norm_activation=nn_ops.BatchNormActivation(
                     activation='relu'),
                 init_drop_connect_rate=None,
                 data_format='channels_last',
                 use_sum_for_combination=False):
        """NAS-FPN initialization function.

    Args:
      min_level: `int` minimum level in NAS-FPN output feature maps.
      max_level: `int` maximum level in NAS-FPN output feature maps.
      block_specs: a list of BlockSpec objects that specifies the SpineNet
        network topology. By default, the previously discovered architecture is
        used.
      fpn_feat_dims: `int` number of filters in FPN layers.
      num_repeats: number of repeats for feature pyramid network.
      use_separable_conv: `bool`, if True use separable convolution for
        convolution in NAS-FPN layers.
      dropblock: a Dropblock layer.
      block_fn: `string` representing types of block group support: conv,
        bottleneck.
      block_repeats: `int` representing the number of repeats per block group
        when block group is bottleneck.
      activation: activation function. Support 'relu' and 'swish'.
      batch_norm_activation: an operation that includes a batch normalization
        layer followed by an optional activation layer.
      init_drop_connect_rate: a 'float' number that specifies the initial drop
        connection rate. Note that the default `None` means no drop connection
        is applied.
      data_format: An optional string from: "channels_last", "channels_first".
        Defaults to "channels_last".
      use_sum_for_combination: `bool`, if True only 'sum' is used for combining
        two nodes.
    """
        self._min_level = min_level
        self._max_level = max_level
        self._block_specs = block_specs
        self._fpn_feat_dims = fpn_feat_dims
        self._num_repeats = num_repeats
        self._block_fn = block_fn
        self._block_repeats = block_repeats
        if use_separable_conv:
            self._conv2d_op = functools.partial(tf.layers.separable_conv2d,
                                                depth_multiplier=1)
        else:
            self._conv2d_op = tf.layers.conv2d
        self._dropblock = dropblock
        if activation == 'relu':
            self._activation = tf.nn.relu
        elif activation == 'swish':
            self._activation = tf.nn.swish
        else:
            raise ValueError(
                'Activation {} not implemented.'.format(activation))
        self._batch_norm_activation = batch_norm_activation
        self._init_drop_connect_rate = init_drop_connect_rate
        self._data_format = data_format
        self._resample_feature_map = functools.partial(
            resample_feature_map,
            target_feat_dims=fpn_feat_dims,
            conv2d_op=self._conv2d_op,
            batch_norm_activation=batch_norm_activation,
            data_format=self._data_format)
        self._use_sum_for_combination = use_sum_for_combination
示例#25
0
  def __init__(self,
               min_level=3,
               max_level=7,
               fpn_feat_dims=256,
               num_repeats=7,
               use_separable_conv=False,
               dropblock=nn_ops.Dropblock(),
               block_fn='conv',
               block_repeats=1,
               activation='swish',
               batch_norm_activation=nn_ops.BatchNormActivation(),
               init_drop_connect_rate=None):
    """NAS-FPN initialization function.

    Args:
      min_level: `int` minimum level in NAS-FPN output feature maps.
      max_level: `int` maximum level in NAS-FPN output feature maps.
      fpn_feat_dims: `int` number of filters in FPN layers.
      num_repeats: number of repeats for feature pyramid network.
      use_separable_conv: `bool`, if True use separable convolution for
        convolution in NAS-FPN layers.
      dropblock: a Dropblock layer.
      block_fn: `string` representing types of block group support: conv,
        bottleneck.
      block_repeats: `int` representing the number of repeats per block group
        when block group is bottleneck.
      activation: activation function. Support 'relu' and 'swish'.
      batch_norm_activation: an operation that includes a batch normalization
        layer followed by an optional activation layer.
      init_drop_connect_rate: a 'float' number that specifies the initial drop
        connection rate. Note that the default `None` means no drop connection
        is applied.
    """

    self._min_level = min_level
    self._max_level = max_level
    if min_level == 3 and max_level == 7:
      model_config = [
          3, 1, 1, 3,
          3, 0, 1, 5,
          4, 0, 0, 6,  # Output to level 3.
          3, 0, 6, 7,  # Output to level 4.
          2, 1, 7, 8,  # Output to level 5.
          0, 1, 6, 9,  # Output to level 7.
          1, 1, 9, 10]  # Output to level 6.
    else:
      raise ValueError('The NAS-FPN with min level {} and max level {} '
                       'is not supported.'.format(min_level, max_level))
    self._config = Config(model_config, self._min_level, self._max_level)
    self._num_repeats = num_repeats
    self._fpn_feat_dims = fpn_feat_dims
    self._block_fn = block_fn
    self._block_repeats = block_repeats
    if use_separable_conv:
      self._conv2d_op = functools.partial(
          tf.layers.separable_conv2d, depth_multiplier=1)
    else:
      self._conv2d_op = tf.layers.conv2d
    self._dropblock = dropblock
    if activation == 'relu':
      self._activation = tf.nn.relu
    elif activation == 'swish':
      self._activation = tf.nn.swish
    else:
      raise ValueError('Activation {} not implemented.'.format(activation))
    self._batch_norm_activation = batch_norm_activation
    self._init_drop_connect_rate = init_drop_connect_rate
    self._resample_feature_map = functools.partial(
        resample_feature_map,
        target_feat_dims=fpn_feat_dims,
        conv2d_op=self._conv2d_op,
        batch_norm_activation=batch_norm_activation)