Пример #1
0
  def __init__(self,
               num_classes,
               num_convs=0,
               num_filters=256,
               use_separable_conv=False,
               num_fcs=2,
               fc_dims=1024,
               activation='relu',
               use_sync_bn=False,
               norm_momentum=0.99,
               norm_epsilon=0.001,
               kernel_regularizer=None,
               bias_regularizer=None,
               **kwargs):
    """Initialize params to build the detection head.

    Args:
      num_classes: a integer for the number of classes.
      num_convs: `int` number that represents the number of the intermediate
        conv layers before the FC layers.
      num_filters: `int` number that represents the number of filters of the
        intermediate conv layers.
      use_separable_conv: `bool`, indicating whether the separable conv layers
        is used.
      num_fcs: `int` number that represents the number of FC layers before the
        predictions.
      fc_dims: `int` number that represents the number of dimension of the FC
        layers.
      activation: `string`, indicating which activation is used, e.g. 'relu',
        'swish', etc.
      use_sync_bn: `bool`, whether to use synchronized batch normalization
        across different replicas.
      norm_momentum: `float`, the momentum parameter of the normalization
        layers.
      norm_epsilon: `float`, the epsilon parameter of the normalization layers.
      kernel_regularizer: `tf.keras.regularizers.Regularizer` object for layer
        kernel.
      bias_regularizer: `tf.keras.regularizers.Regularizer` object for bias.
      **kwargs: other keyword arguments passed to Layer.
    """
    super(DetectionHead, self).__init__(**kwargs)
    self._config_dict = {
        'num_classes': num_classes,
        'num_convs': num_convs,
        'num_filters': num_filters,
        'use_separable_conv': use_separable_conv,
        'num_fcs': num_fcs,
        'fc_dims': fc_dims,
        'activation': activation,
        'use_sync_bn': use_sync_bn,
        'norm_momentum': norm_momentum,
        'norm_epsilon': norm_epsilon,
        'kernel_regularizer': kernel_regularizer,
        'bias_regularizer': bias_regularizer,
    }

    if tf.keras.backend.image_data_format() == 'channels_last':
      self._bn_axis = -1
    else:
      self._bn_axis = 1
    self._activation = tf_utils.get_activation(activation)
def build_encoder(config: EncoderConfig,
                  embedding_layer: Optional[tf.keras.layers.Layer] = None,
                  encoder_cls=None,
                  bypass_config: bool = False):
    """Instantiate a Transformer encoder network from EncoderConfig.

  Args:
    config: the one-of encoder config, which provides encoder parameters of a
      chosen encoder.
    embedding_layer: an external embedding layer passed to the encoder.
    encoder_cls: an external encoder cls not included in the supported encoders,
      usually used by gin.configurable.
    bypass_config: whether to ignore config instance to create the object with
      `encoder_cls`.

  Returns:
    An encoder instance.
  """
    encoder_type = config.type
    encoder_cfg = config.get()
    encoder_cls = encoder_cls or ENCODER_CLS[encoder_type]
    logging.info("Encoder class: %s to build...", encoder_cls.__name__)
    if bypass_config:
        return encoder_cls()
    if encoder_cls.__name__ == "EncoderScaffold":
        embedding_cfg = dict(
            vocab_size=encoder_cfg.vocab_size,
            type_vocab_size=encoder_cfg.type_vocab_size,
            hidden_size=encoder_cfg.hidden_size,
            max_seq_length=encoder_cfg.max_position_embeddings,
            initializer=tf.keras.initializers.TruncatedNormal(
                stddev=encoder_cfg.initializer_range),
            dropout_rate=encoder_cfg.dropout_rate,
        )
        hidden_cfg = dict(
            num_attention_heads=encoder_cfg.num_attention_heads,
            intermediate_size=encoder_cfg.intermediate_size,
            intermediate_activation=tf_utils.get_activation(
                encoder_cfg.hidden_activation),
            dropout_rate=encoder_cfg.dropout_rate,
            attention_dropout_rate=encoder_cfg.attention_dropout_rate,
            kernel_initializer=tf.keras.initializers.TruncatedNormal(
                stddev=encoder_cfg.initializer_range),
        )
        kwargs = dict(
            embedding_cfg=embedding_cfg,
            hidden_cfg=hidden_cfg,
            num_hidden_instances=encoder_cfg.num_layers,
            pooled_output_dim=encoder_cfg.hidden_size,
            pooler_layer_initializer=tf.keras.initializers.TruncatedNormal(
                stddev=encoder_cfg.initializer_range),
            return_all_layer_outputs=encoder_cfg.return_all_encoder_outputs,
            dict_outputs=True)
        return encoder_cls(**kwargs)

    if encoder_type == "mobilebert":
        return encoder_cls(
            word_vocab_size=encoder_cfg.word_vocab_size,
            word_embed_size=encoder_cfg.word_embed_size,
            type_vocab_size=encoder_cfg.type_vocab_size,
            max_sequence_length=encoder_cfg.max_sequence_length,
            num_blocks=encoder_cfg.num_blocks,
            hidden_size=encoder_cfg.hidden_size,
            num_attention_heads=encoder_cfg.num_attention_heads,
            intermediate_size=encoder_cfg.intermediate_size,
            intermediate_act_fn=encoder_cfg.hidden_activation,
            hidden_dropout_prob=encoder_cfg.hidden_dropout_prob,
            attention_probs_dropout_prob=encoder_cfg.
            attention_probs_dropout_prob,
            intra_bottleneck_size=encoder_cfg.intra_bottleneck_size,
            initializer_range=encoder_cfg.initializer_range,
            use_bottleneck_attention=encoder_cfg.use_bottleneck_attention,
            key_query_shared_bottleneck=encoder_cfg.
            key_query_shared_bottleneck,
            num_feedforward_networks=encoder_cfg.num_feedforward_networks,
            normalization_type=encoder_cfg.normalization_type,
            classifier_activation=encoder_cfg.classifier_activation)

    if encoder_type == "albert":
        return encoder_cls(
            vocab_size=encoder_cfg.vocab_size,
            embedding_width=encoder_cfg.embedding_width,
            hidden_size=encoder_cfg.hidden_size,
            num_layers=encoder_cfg.num_layers,
            num_attention_heads=encoder_cfg.num_attention_heads,
            max_sequence_length=encoder_cfg.max_position_embeddings,
            type_vocab_size=encoder_cfg.type_vocab_size,
            intermediate_size=encoder_cfg.intermediate_size,
            activation=tf_utils.get_activation(encoder_cfg.hidden_activation),
            dropout_rate=encoder_cfg.dropout_rate,
            attention_dropout_rate=encoder_cfg.attention_dropout_rate,
            initializer=tf.keras.initializers.TruncatedNormal(
                stddev=encoder_cfg.initializer_range),
            dict_outputs=True)

    if encoder_type == "bigbird":
        return encoder_cls(
            vocab_size=encoder_cfg.vocab_size,
            hidden_size=encoder_cfg.hidden_size,
            num_layers=encoder_cfg.num_layers,
            num_attention_heads=encoder_cfg.num_attention_heads,
            intermediate_size=encoder_cfg.intermediate_size,
            activation=tf_utils.get_activation(encoder_cfg.hidden_activation),
            dropout_rate=encoder_cfg.dropout_rate,
            attention_dropout_rate=encoder_cfg.attention_dropout_rate,
            num_rand_blocks=encoder_cfg.num_rand_blocks,
            block_size=encoder_cfg.block_size,
            max_position_embeddings=encoder_cfg.max_position_embeddings,
            type_vocab_size=encoder_cfg.type_vocab_size,
            initializer=tf.keras.initializers.TruncatedNormal(
                stddev=encoder_cfg.initializer_range),
            embedding_width=encoder_cfg.embedding_width)

    if encoder_type == "xlnet":
        return encoder_cls(
            vocab_size=encoder_cfg.vocab_size,
            num_layers=encoder_cfg.num_layers,
            hidden_size=encoder_cfg.hidden_size,
            num_attention_heads=encoder_cfg.num_attention_heads,
            head_size=encoder_cfg.head_size,
            inner_size=encoder_cfg.inner_size,
            dropout_rate=encoder_cfg.dropout_rate,
            attention_dropout_rate=encoder_cfg.attention_dropout_rate,
            attention_type=encoder_cfg.attention_type,
            bi_data=encoder_cfg.bi_data,
            two_stream=encoder_cfg.two_stream,
            tie_attention_biases=encoder_cfg.tie_attention_biases,
            memory_length=encoder_cfg.memory_length,
            clamp_length=encoder_cfg.clamp_length,
            reuse_length=encoder_cfg.reuse_length,
            inner_activation=encoder_cfg.inner_activation,
            use_cls_mask=encoder_cfg.use_cls_mask,
            embedding_width=encoder_cfg.embedding_width,
            initializer=tf.keras.initializers.RandomNormal(
                stddev=encoder_cfg.initializer_range))

    # Uses the default BERTEncoder configuration schema to create the encoder.
    # If it does not match, please add a switch branch by the encoder type.
    return encoder_cls(
        vocab_size=encoder_cfg.vocab_size,
        hidden_size=encoder_cfg.hidden_size,
        num_layers=encoder_cfg.num_layers,
        num_attention_heads=encoder_cfg.num_attention_heads,
        intermediate_size=encoder_cfg.intermediate_size,
        activation=tf_utils.get_activation(encoder_cfg.hidden_activation),
        dropout_rate=encoder_cfg.dropout_rate,
        attention_dropout_rate=encoder_cfg.attention_dropout_rate,
        max_sequence_length=encoder_cfg.max_position_embeddings,
        type_vocab_size=encoder_cfg.type_vocab_size,
        initializer=tf.keras.initializers.TruncatedNormal(
            stddev=encoder_cfg.initializer_range),
        embedding_width=encoder_cfg.embedding_size,
        embedding_layer=embedding_layer,
        return_all_encoder_outputs=encoder_cfg.return_all_encoder_outputs,
        dict_outputs=True)
Пример #3
0
    def __init__(
            self,
            backbone,
            normalize_feature,
            hidden_dim,
            hidden_layer_num,
            hidden_norm_args,
            projection_dim,
            input_specs: Optional[Mapping[str,
                                          tf.keras.layers.InputSpec]] = None,
            dropout_rate: float = 0.0,
            aggregate_endpoints: bool = False,
            kernel_initializer='random_uniform',
            kernel_regularizer=None,
            bias_regularizer=None,
            **kwargs):
        """Video Classification initialization function.

    Args:
      backbone: a 3d backbone network.
      normalize_feature: whether normalize backbone feature.
      hidden_dim: `int` number of hidden units in MLP.
      hidden_layer_num: `int` number of hidden layers in MLP.
      hidden_norm_args: `dict` for batchnorm arguments in MLP.
      projection_dim: `int` number of ouput dimension for MLP.
      input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
      dropout_rate: `float` rate for dropout regularization.
      aggregate_endpoints: `bool` aggregate all end ponits or only use the
        final end point.
      kernel_initializer: kernel initializer for the dense layer.
      kernel_regularizer: tf.keras.regularizers.Regularizer object. Default to
        None.
      bias_regularizer: tf.keras.regularizers.Regularizer object. Default to
        None.
      **kwargs: keyword arguments to be passed.
    """
        if not input_specs:
            input_specs = {
                'image': layers.InputSpec(shape=[None, None, None, None, 3])
            }
        self._self_setattr_tracking = False
        self._config_dict = {
            'backbone': backbone,
            'normalize_feature': normalize_feature,
            'hidden_dim': hidden_dim,
            'hidden_layer_num': hidden_layer_num,
            'use_sync_bn': hidden_norm_args.use_sync_bn,
            'norm_momentum': hidden_norm_args.norm_momentum,
            'norm_epsilon': hidden_norm_args.norm_epsilon,
            'activation': hidden_norm_args.activation,
            'projection_dim': projection_dim,
            'input_specs': input_specs,
            'dropout_rate': dropout_rate,
            'aggregate_endpoints': aggregate_endpoints,
            'kernel_initializer': kernel_initializer,
            'kernel_regularizer': kernel_regularizer,
            'bias_regularizer': bias_regularizer,
        }
        self._input_specs = input_specs
        self._kernel_regularizer = kernel_regularizer
        self._bias_regularizer = bias_regularizer
        self._backbone = backbone

        inputs = {
            k: tf.keras.Input(shape=v.shape[1:])
            for k, v in input_specs.items()
        }
        endpoints = backbone(inputs['image'])

        if aggregate_endpoints:
            pooled_feats = []
            for endpoint in endpoints.values():
                x_pool = tf.keras.layers.GlobalAveragePooling3D()(endpoint)
                pooled_feats.append(x_pool)
            x = tf.concat(pooled_feats, axis=1)
        else:
            x = endpoints[max(endpoints.keys())]
            x = tf.keras.layers.GlobalAveragePooling3D()(x)

        # L2 Normalize feature after backbone
        if normalize_feature:
            x = tf.nn.l2_normalize(x, axis=-1)

        # MLP hidden layers
        for _ in range(hidden_layer_num):
            x = tf.keras.layers.Dense(hidden_dim)(x)
            if self._config_dict['use_sync_bn']:
                x = tf.keras.layers.experimental.SyncBatchNormalization(
                    momentum=self._config_dict['norm_momentum'],
                    epsilon=self._config_dict['norm_epsilon'])(x)
            else:
                x = tf.keras.layers.BatchNormalization(
                    momentum=self._config_dict['norm_momentum'],
                    epsilon=self._config_dict['norm_epsilon'])(x)
            x = tf_utils.get_activation(self._config_dict['activation'])(x)

        # Projection head
        x = tf.keras.layers.Dense(projection_dim)(x)

        super(VideoSSLModel, self).__init__(inputs=inputs, outputs=x, **kwargs)
Пример #4
0
    def _make_block_basic(self,
                          input_tensor,
                          first_block=True,
                          filters=64,
                          stride=2,
                          radix=1,
                          avd=False,
                          avd_first=False,
                          is_first=False):
        """Conv2d_BN_Relu->Bn_Relu_Conv2d
    """
        x = input_tensor
        x = BatchNormalization(axis=self.channel_axis, epsilon=1.001e-5)(x)
        x = tf_utils.get_activation(self.activation)(x)

        short_cut = x
        inplanes = input_tensor.shape[-1]
        if stride != 1 or inplanes != filters * self.block_expansion:
            if self.avg_down:
                if self.dilation == 1:
                    short_cut = AveragePooling2D(
                        pool_size=stride,
                        strides=stride,
                        padding="same",
                        data_format="channels_last")(short_cut)
                else:
                    short_cut = AveragePooling2D(
                        pool_size=1,
                        strides=1,
                        padding="same",
                        data_format="channels_last")(short_cut)
                short_cut = Conv2D(filters,
                                   kernel_size=1,
                                   strides=1,
                                   padding="same",
                                   kernel_initializer="he_normal",
                                   use_bias=False,
                                   data_format="channels_last")(short_cut)
            else:
                short_cut = Conv2D(filters,
                                   kernel_size=1,
                                   strides=stride,
                                   padding="same",
                                   kernel_initializer="he_normal",
                                   use_bias=False,
                                   data_format="channels_last")(short_cut)

        group_width = int(filters *
                          (self.bottleneck_width / 64.0)) * self.cardinality
        avd = avd and (stride > 1 or is_first)
        avd_first = avd_first

        if avd:
            avd_layer = AveragePooling2D(pool_size=3,
                                         strides=stride,
                                         padding="same",
                                         data_format="channels_last")
            stride = 1

        if avd and avd_first:
            x = avd_layer(x)

        if radix >= 1:
            x = self._SplAtConv2d(x,
                                  filters=group_width,
                                  kernel_size=3,
                                  stride=stride,
                                  dilation=self.dilation,
                                  groups=self.cardinality,
                                  radix=radix)
        else:
            x = Conv2D(filters,
                       kernel_size=3,
                       strides=stride,
                       padding="same",
                       kernel_initializer="he_normal",
                       dilation_rate=self.dilation,
                       use_bias=False,
                       data_format="channels_last")(x)

        if avd and not avd_first:
            x = avd_layer(x)
            # print('can')

        x = BatchNormalization(axis=self.channel_axis, epsilon=1.001e-5)(x)
        x = tf_utils.get_activation(self.activation)(x)
        x = Conv2D(filters,
                   kernel_size=3,
                   strides=1,
                   padding="same",
                   kernel_initializer="he_normal",
                   dilation_rate=self.dilation,
                   use_bias=False,
                   data_format="channels_last")(x)
        m2 = Add()([x, short_cut])
        return m2
Пример #5
0
    def _build_scale_permuted_network(self,
                                      net,
                                      input_width,
                                      weighted_fusion=False):
        """Builds scale-permuted network."""
        net_sizes = [int(math.ceil(input_width / 2**2))] * len(net)
        net_block_fns = [self._init_block_fn] * len(net)
        num_outgoing_connections = [0] * len(net)

        endpoints = {}
        for i, block_spec in enumerate(self._block_specs):
            # Find out specs for the target block.
            target_width = int(math.ceil(input_width / 2**block_spec.level))
            target_num_filters = int(FILTER_SIZE_MAP[block_spec.level] *
                                     self._filter_size_scale)
            target_block_fn = block_spec.block_fn

            # Resample then merge input0 and input1.
            parents = []
            input0 = block_spec.input_offsets[0]
            input1 = block_spec.input_offsets[1]

            x0 = self._resample_with_alpha(
                inputs=net[input0],
                input_width=net_sizes[input0],
                input_block_fn=net_block_fns[input0],
                target_width=target_width,
                target_num_filters=target_num_filters,
                target_block_fn=target_block_fn,
                alpha=self._resample_alpha)
            parents.append(x0)
            num_outgoing_connections[input0] += 1

            x1 = self._resample_with_alpha(
                inputs=net[input1],
                input_width=net_sizes[input1],
                input_block_fn=net_block_fns[input1],
                target_width=target_width,
                target_num_filters=target_num_filters,
                target_block_fn=target_block_fn,
                alpha=self._resample_alpha)
            parents.append(x1)
            num_outgoing_connections[input1] += 1

            # Merge 0 outdegree blocks to the output block.
            if block_spec.is_output:
                for j, (j_feat, j_connections) in enumerate(
                        zip(net, num_outgoing_connections)):
                    if j_connections == 0 and (j_feat.shape[2] == target_width
                                               and j_feat.shape[3]
                                               == x0.shape[3]):
                        parents.append(j_feat)
                        num_outgoing_connections[j] += 1

            # pylint: disable=g-direct-tensorflow-import
            if weighted_fusion:
                dtype = parents[0].dtype
                parent_weights = [
                    tf.nn.relu(
                        tf.cast(tf.Variable(1.0,
                                            name='block{}_fusion{}'.format(
                                                i, j)),
                                dtype=dtype)) for j in range(len(parents))
                ]
                weights_sum = tf.add_n(parent_weights)
                parents = [
                    parents[i] * parent_weights[i] / (weights_sum + 0.0001)
                    for i in range(len(parents))
                ]

            # Fuse all parent nodes then build a new block.
            x = tf_utils.get_activation(self._activation_fn)(tf.add_n(parents))
            x = self._block_group(
                inputs=x,
                filters=target_num_filters,
                strides=1,
                block_fn_cand=target_block_fn,
                block_repeats=self._block_repeats,
                stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate(
                    self._init_stochastic_depth_rate, i + 1,
                    len(self._block_specs)),
                name='scale_permuted_block_{}'.format(i + 1))

            net.append(x)
            net_sizes.append(target_width)
            net_block_fns.append(target_block_fn)
            num_outgoing_connections.append(0)

            # Save output feats.
            if block_spec.is_output:
                if block_spec.level in endpoints:
                    raise ValueError(
                        'Duplicate feats found for output level {}.'.format(
                            block_spec.level))
                if (block_spec.level < self._min_level
                        or block_spec.level > self._max_level):
                    raise ValueError(
                        'Output level is out of range [{}, {}]'.format(
                            self._min_level, self._max_level))
                endpoints[str(block_spec.level)] = x

        return endpoints
Пример #6
0
    def __init__(self,
                 model_id,
                 input_specs=layers.InputSpec(shape=[None, None, None, 3]),
                 se_ratio=0.0,
                 stochastic_depth_drop_rate=0.0,
                 kernel_initializer='VarianceScaling',
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 activation='relu',
                 use_sync_bn=False,
                 norm_momentum=0.99,
                 norm_epsilon=0.001,
                 **kwargs):
        """EfficientNet initialization function.

    Args:
      model_id: `str` model id of EfficientNet.
      input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
      se_ratio: `float` squeeze and excitation ratio for inverted bottleneck
        blocks.
      stochastic_depth_drop_rate: `float` drop rate for drop connect layer.
      kernel_initializer: kernel_initializer for convolutional layers.
      kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
        Default to None.
      bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
        Default to None.
      activation: `str` name of the activation function.
      use_sync_bn: if True, use synchronized batch normalization.
      norm_momentum: `float` normalization omentum for the moving average.
      norm_epsilon: `float` small float added to variance to avoid dividing by
        zero.
      **kwargs: keyword arguments to be passed.
    """
        self._model_id = model_id
        self._input_specs = input_specs
        self._se_ratio = se_ratio
        self._stochastic_depth_drop_rate = stochastic_depth_drop_rate
        self._use_sync_bn = use_sync_bn
        self._activation = activation
        self._kernel_initializer = kernel_initializer
        self._norm_momentum = norm_momentum
        self._norm_epsilon = norm_epsilon
        self._kernel_regularizer = kernel_regularizer
        self._bias_regularizer = bias_regularizer
        if use_sync_bn:
            self._norm = layers.experimental.SyncBatchNormalization
        else:
            self._norm = layers.BatchNormalization

        if tf.keras.backend.image_data_format() == 'channels_last':
            bn_axis = -1
        else:
            bn_axis = 1

        # Build EfficientNet.
        inputs = tf.keras.Input(shape=input_specs.shape[1:])
        width_scale = SCALING_MAP[model_id]['width_scale']
        depth_scale = SCALING_MAP[model_id]['depth_scale']

        # Build stem.
        x = layers.Conv2D(filters=nn_layers.round_filters(32, width_scale),
                          kernel_size=3,
                          strides=2,
                          use_bias=False,
                          padding='same',
                          kernel_initializer=self._kernel_initializer,
                          kernel_regularizer=self._kernel_regularizer,
                          bias_regularizer=self._bias_regularizer)(inputs)
        x = self._norm(axis=bn_axis,
                       momentum=norm_momentum,
                       epsilon=norm_epsilon)(x)
        x = tf_utils.get_activation(activation)(x)

        # Build intermediate blocks.
        endpoints = {}
        endpoint_level = 2
        decoded_specs = block_spec_decoder(EN_B0_BLOCK_SPECS, width_scale,
                                           depth_scale)

        for i, specs in enumerate(decoded_specs):
            x = self._block_group(inputs=x,
                                  specs=specs,
                                  name='block_group_{}'.format(i))
            if specs.is_output:
                endpoints[str(endpoint_level)] = x
                endpoint_level += 1

        # Build output specs for downstream tasks.
        self._output_specs = {
            l: endpoints[l].get_shape
            for l in endpoints.keys()
        }

        # Build the final conv for classification.
        x = layers.Conv2D(filters=nn_layers.round_filters(1280, width_scale),
                          kernel_size=1,
                          strides=1,
                          use_bias=False,
                          padding='same',
                          kernel_initializer=self._kernel_initializer,
                          kernel_regularizer=self._kernel_regularizer,
                          bias_regularizer=self._bias_regularizer)(x)
        x = self._norm(axis=bn_axis,
                       momentum=norm_momentum,
                       epsilon=norm_epsilon)(x)
        endpoints[str(endpoint_level)] = tf_utils.get_activation(activation)(x)

        super(EfficientNet, self).__init__(inputs=inputs,
                                           outputs=endpoints,
                                           **kwargs)
Пример #7
0
    def __init__(self,
                 model_id,
                 input_specs=InputSpec(shape=[None, None, None, 3]),
                 stem_type='v1',
                 activation="relu",
                 dropout_rate=0.2,
                 radix=2,
                 groups=1,
                 bottleneck_width=64,
                 block_expansion=4,
                 avg_down=True,
                 avd=True,
                 avd_first=False,
                 preact=False,
                 using_basic_block=False,
                 using_cb=False):
        self.channel_axis = -1  # not for change
        self.model_id = model_id
        self.activation = activation
        self.input_specs = input_specs
        self.dropout_rate = dropout_rate

        self.blocks_set = RESNEST_SPECS[model_id]['blocks_set']
        self.radix = radix
        self.cardinality = groups
        self.bottleneck_width = bottleneck_width

        self.deep_stem = stem_type == 'v1'
        self.stem_width = RESNEST_SPECS[model_id]['stem_width']
        self.block_expansion = block_expansion
        self.avg_down = avg_down
        self.avd = avd
        self.avd_first = avd_first

        self.dilation = 1
        self.preact = preact
        self.using_basic_block = using_basic_block
        self.using_cb = using_cb

        # get_custom_objects().update({'mish': Mish(mish)})

        input_sig = Input(shape=self.input_specs.shape[1:])
        x = self._make_stem(input_sig,
                            stem_width=self.stem_width,
                            deep_stem=self.deep_stem)

        if self.preact is False:
            x = BatchNormalization(axis=self.channel_axis, epsilon=1.001e-5)(x)
            x = tf_utils.get_activation(self.activation)(x)
        print("stem_out", x.shape)

        x = MaxPool2D(pool_size=3,
                      strides=2,
                      padding="same",
                      data_format="channels_last")(x)
        print("MaxPool2D out", x.shape)

        if self.preact is True:
            x = BatchNormalization(axis=self.channel_axis, epsilon=1.001e-5)(x)
            x = tf_utils.get_activation(self.activation)(x)

        endpoints = {}
        i = 0
        if self.using_cb:
            second_x = x
            second_x = self._make_layer(x,
                                        blocks=self.blocks_set[0],
                                        filters=64,
                                        stride=1,
                                        is_first=False)
            second_x_tmp = self._make_Composite_layer(second_x,
                                                      filters=x.shape[-1],
                                                      upsample=False)
            print('layer 0 db_com', second_x_tmp.shape)
            x = Add()([second_x_tmp, x])
        x = self._make_layer(x,
                             blocks=self.blocks_set[0],
                             filters=64,
                             stride=1,
                             is_first=False)
        endpoints[str(i + 2)] = x
        print("-" * 5, "layer 0 out", x.shape, "-" * 5)

        b1_b3_filters = [64, 128, 256, 512]
        for i in range(1, 4):
            if self.using_cb:
                second_x = self._make_layer(x,
                                            blocks=self.blocks_set[i],
                                            filters=b1_b3_filters[i],
                                            stride=2)
                second_x_tmp = self._make_Composite_layer(second_x,
                                                          filters=x.shape[-1])
                print('layer {} db_com out {}'.format(i, second_x_tmp.shape))
                x = Add()([second_x_tmp, x])
            x = self._make_layer(x,
                                 blocks=self.blocks_set[i],
                                 filters=b1_b3_filters[i],
                                 stride=2)
            print('----- layer {} out {} -----'.format(i, x.shape))
            endpoints[str(i + 2)] = x

        self._output_specs = {l: endpoints[l].get_shape() for l in endpoints}
        print(self._output_specs)

        super(ResNest, self).__init__(inputs=input_sig, outputs=endpoints)
Пример #8
0
    def __init__(self,
                 params: yt8m_cfg.DbofModel,
                 num_frames: int = 30,
                 num_classes: int = 3862,
                 input_specs: layers.InputSpec = layers.InputSpec(
                     shape=[None, None, 1152]),
                 kernel_regularizer: Optional[
                     tf.keras.regularizers.Regularizer] = None,
                 activation: str = "relu",
                 use_sync_bn: bool = False,
                 norm_momentum: float = 0.99,
                 norm_epsilon: float = 0.001,
                 **kwargs):
        """YT8M initialization function.

    Args:
      params: model configuration parameters
      num_frames: `int` number of frames in a single input.
      num_classes: `int` number of classes in dataset.
      input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
        [batch_size x num_frames x num_features]
      kernel_regularizer: tf.keras.regularizers.Regularizer object. Default to
        None.
      activation: A `str` of name of the activation function.
      use_sync_bn: If True, use synchronized batch normalization.
      norm_momentum: A `float` of normalization momentum for the moving average.
      norm_epsilon: A `float` added to variance to avoid dividing by zero.
      **kwargs: keyword arguments to be passed.
    """

        self._self_setattr_tracking = False
        self._config_dict = {
            "input_specs": input_specs,
            "num_classes": num_classes,
            "num_frames": num_frames,
            "params": params
        }
        self._num_classes = num_classes
        self._input_specs = input_specs
        self._act_fn = tf_utils.get_activation(activation)
        if use_sync_bn:
            self._norm = layers.experimental.SyncBatchNormalization
        else:
            self._norm = layers.BatchNormalization
        if tf.keras.backend.image_data_format() == "channels_last":
            bn_axis = -1
        else:
            bn_axis = 1

        # [batch_size x num_frames x num_features]
        feature_size = input_specs.shape[-1]
        # shape 'excluding' batch_size
        model_input = tf.keras.Input(shape=self._input_specs.shape[1:])
        reshaped_input = tf.reshape(model_input, [-1, feature_size])
        tf.summary.histogram("input_hist", model_input)

        # configure model
        if params.add_batch_norm:
            reshaped_input = self._norm(axis=bn_axis,
                                        momentum=norm_momentum,
                                        epsilon=norm_epsilon,
                                        name="input_bn")(reshaped_input)

        # activation = reshaped input * cluster weights
        if params.cluster_size > 0:
            activation = layers.Dense(
                params.cluster_size,
                kernel_regularizer=kernel_regularizer,
                kernel_initializer=tf.random_normal_initializer(
                    stddev=1 / tf.sqrt(tf.cast(feature_size, tf.float32))))(
                        reshaped_input)

        if params.add_batch_norm:
            activation = self._norm(axis=bn_axis,
                                    momentum=norm_momentum,
                                    epsilon=norm_epsilon,
                                    name="cluster_bn")(activation)
        else:
            cluster_biases = tf.Variable(tf.random_normal_initializer(
                stddev=1 /
                tf.math.sqrt(feature_size))(shape=[params.cluster_size]),
                                         name="cluster_biases")
            tf.summary.histogram("cluster_biases", cluster_biases)
            activation += cluster_biases

        activation = self._act_fn(activation)
        tf.summary.histogram("cluster_output", activation)

        if params.use_context_gate_cluster_layer:
            pooling_method = None
            norm_args = dict(axis=bn_axis,
                             momentum=norm_momentum,
                             epsilon=norm_epsilon,
                             name="context_gate_bn")
            activation = utils.context_gate(
                activation,
                normalizer_fn=self._norm,
                normalizer_params=norm_args,
                pooling_method=pooling_method,
                hidden_layer_size=params.context_gate_cluster_bottleneck_size,
                kernel_regularizer=kernel_regularizer)
        activation = tf.reshape(activation,
                                [-1, num_frames, params.cluster_size])
        activation = utils.frame_pooling(activation, params.pooling_method)

        # activation = activation * hidden1_weights
        activation = layers.Dense(
            params.hidden_size,
            kernel_regularizer=kernel_regularizer,
            kernel_initializer=tf.random_normal_initializer(
                stddev=1 /
                tf.sqrt(tf.cast(params.cluster_size, tf.float32))))(activation)

        if params.add_batch_norm:
            activation = self._norm(axis=bn_axis,
                                    momentum=norm_momentum,
                                    epsilon=norm_epsilon,
                                    name="hidden1_bn")(activation)

        else:
            hidden1_biases = tf.Variable(tf.random_normal_initializer(
                stddev=0.01)(shape=[params.hidden_size]),
                                         name="hidden1_biases")

            tf.summary.histogram("hidden1_biases", hidden1_biases)
            activation += hidden1_biases

        activation = self._act_fn(activation)
        tf.summary.histogram("hidden1_output", activation)

        aggregated_model = getattr(yt8m_agg_models,
                                   params.yt8m_agg_classifier_model)
        norm_args = dict(axis=bn_axis,
                         momentum=norm_momentum,
                         epsilon=norm_epsilon)
        output = aggregated_model().create_model(
            model_input=activation,
            vocab_size=self._num_classes,
            num_mixtures=params.agg_model.num_mixtures,
            normalizer_fn=self._norm,
            normalizer_params=norm_args,
            l2_penalty=params.agg_model.l2_penalty)

        super().__init__(inputs=model_input,
                         outputs=output.get("predictions"),
                         **kwargs)
Пример #9
0
  def __init__(
      self,
      level: Union[int, str],
      num_convs: int = 2,
      num_filters: int = 256,
      kernel_size: int = 3,
      use_depthwise_convolution: bool = False,
      upsample_factor: int = 1,
      low_level: Optional[List[int]] = None,
      low_level_num_filters: Optional[List[int]] = None,
      fusion_num_output_filters: int = 256,
      activation: str = 'relu',
      use_sync_bn: bool = False,
      norm_momentum: float = 0.99,
      norm_epsilon: float = 0.001,
      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
      **kwargs):
    """Initializes a panoptic deeplab head.

    Args:
      level: An `int` or `str`, level to use to build head.
      num_convs: An `int` number of stacked convolution before the last
        prediction layer.
      num_filters: An `int` number to specify the number of filters used.
        Default is 256.
      kernel_size:  An `int` number to specify the kernel size of the
        stacked convolutions before the last prediction layer.
      use_depthwise_convolution: A bool to specify if use depthwise separable
        convolutions.
      upsample_factor: An `int` number to specify the upsampling factor to
        generate finer mask. Default 1 means no upsampling is applied.
      low_level: An `int` of backbone level to be used for feature fusion. It is
        used when feature_fusion is set to `deeplabv3plus`.
      low_level_num_filters: An `int` of reduced number of filters for the low
        level features before fusing it with higher level features. It is only
        used when feature_fusion is set to `deeplabv3plus`.
      fusion_num_output_filters: An `int` number to specify the number of
        filters used by output layer of fusion module. Default is 256.
      activation: A `str` that indicates which activation is used, e.g. 'relu',
        'swish', etc.
      use_sync_bn: A `bool` that indicates whether to use synchronized batch
        normalization across different replicas.
      norm_momentum: A `float` of normalization momentum for the moving average.
      norm_epsilon: A `float` added to variance to avoid dividing by zero.
      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
        Conv2D. Default is None.
      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
      **kwargs: Additional keyword arguments to be passed.
    """
    super(PanopticDeeplabHead, self).__init__(**kwargs)

    self._config_dict = {
        'level': level,
        'num_convs': num_convs,
        'num_filters': num_filters,
        'kernel_size': kernel_size,
        'use_depthwise_convolution': use_depthwise_convolution,
        'upsample_factor': upsample_factor,
        'low_level': low_level,
        'low_level_num_filters': low_level_num_filters,
        'fusion_num_output_filters': fusion_num_output_filters,
        'activation': activation,
        'use_sync_bn': use_sync_bn,
        'norm_momentum': norm_momentum,
        'norm_epsilon': norm_epsilon,
        'kernel_regularizer': kernel_regularizer,
        'bias_regularizer': bias_regularizer
    }
    if tf.keras.backend.image_data_format() == 'channels_last':
      self._bn_axis = -1
    else:
      self._bn_axis = 1
    self._activation = tf_utils.get_activation(activation)
Пример #10
0
    def build(self, input_shape: Optional[Union[Sequence[int], tf.Tensor]]):
        """Build variables and child layers to prepare for calling."""
        conv2d_quantized = _quantize_wrapped_layer(
            tf.keras.layers.Conv2D,
            configs.Default8BitConvQuantizeConfig(['kernel'], ['activation'],
                                                  False))
        depthwise_conv2d_quantized = _quantize_wrapped_layer(
            tf.keras.layers.DepthwiseConv2D,
            configs.Default8BitConvQuantizeConfig(['depthwise_kernel'],
                                                  ['activation'], False))
        expand_filters = self._in_filters
        if self._expand_ratio > 1:
            # First 1x1 conv for channel expansion.
            expand_filters = nn_layers.make_divisible(
                self._in_filters * self._expand_ratio, self._divisible_by)

            expand_kernel = 1 if self._use_depthwise else self._kernel_size
            expand_stride = 1 if self._use_depthwise else self._strides

            self._conv0 = conv2d_quantized(
                filters=expand_filters,
                kernel_size=expand_kernel,
                strides=expand_stride,
                padding='same',
                use_bias=False,
                kernel_initializer=self._kernel_initializer,
                kernel_regularizer=self._kernel_regularizer,
                bias_regularizer=self._bias_regularizer,
                activation=NoOpActivation())
            self._norm0 = self._norm_by_activation(self._activation)(
                axis=self._bn_axis,
                momentum=self._norm_momentum,
                epsilon=self._norm_epsilon)
            self._activation_layer = tfmot.quantization.keras.QuantizeWrapperV2(
                tf_utils.get_activation(self._activation,
                                        use_keras_layer=True),
                configs.Default8BitActivationQuantizeConfig())

        if self._use_depthwise:
            # Depthwise conv.
            self._conv1 = depthwise_conv2d_quantized(
                kernel_size=(self._kernel_size, self._kernel_size),
                strides=self._strides,
                padding='same',
                depth_multiplier=1,
                dilation_rate=self._dilation_rate,
                use_bias=False,
                depthwise_initializer=self._kernel_initializer,
                depthwise_regularizer=self._depthsize_regularizer,
                bias_regularizer=self._bias_regularizer,
                activation=NoOpActivation())
            self._norm1 = self._norm_by_activation(self._depthwise_activation)(
                axis=self._bn_axis,
                momentum=self._norm_momentum,
                epsilon=self._norm_epsilon)
            self._depthwise_activation_layer = (
                tfmot.quantization.keras.QuantizeWrapperV2(
                    tf_utils.get_activation(self._depthwise_activation,
                                            use_keras_layer=True),
                    configs.Default8BitActivationQuantizeConfig()))

        # Squeeze and excitation.
        if self._se_ratio and self._se_ratio > 0 and self._se_ratio <= 1:
            logging.info('Use Squeeze and excitation.')
            in_filters = self._in_filters
            if self._expand_se_in_filters:
                in_filters = expand_filters
            self._squeeze_excitation = qat_nn_layers.SqueezeExcitationQuantized(
                in_filters=in_filters,
                out_filters=expand_filters,
                se_ratio=self._se_ratio,
                divisible_by=self._divisible_by,
                round_down_protect=self._se_round_down_protect,
                kernel_initializer=self._kernel_initializer,
                kernel_regularizer=self._kernel_regularizer,
                bias_regularizer=self._bias_regularizer,
                activation=self._se_inner_activation,
                gating_activation=self._se_gating_activation)
        else:
            self._squeeze_excitation = None

        # Last 1x1 conv.
        self._conv2 = conv2d_quantized(
            filters=self._out_filters,
            kernel_size=1,
            strides=1,
            padding='same',
            use_bias=False,
            kernel_initializer=self._kernel_initializer,
            kernel_regularizer=self._kernel_regularizer,
            bias_regularizer=self._bias_regularizer,
            activation=NoOpActivation())
        self._norm2 = self._norm_with_quantize(axis=self._bn_axis,
                                               momentum=self._norm_momentum,
                                               epsilon=self._norm_epsilon)

        if self._stochastic_depth_drop_rate:
            self._stochastic_depth = nn_layers.StochasticDepth(
                self._stochastic_depth_drop_rate)
        else:
            self._stochastic_depth = None
        self._add = tfmot.quantization.keras.QuantizeWrapperV2(
            tf.keras.layers.Add(),
            configs.Default8BitQuantizeConfig([], [], True))

        super(InvertedBottleneckBlockQuantized, self).build(input_shape)
Пример #11
0
def pretrain_model(bert_config,
                   seq_length,
                   max_predictions_per_seq,
                   initializer=None,
                   use_next_sentence_label=True,
                   return_core_pretrainer_model=False):
  """Returns model to be used for pre-training.

  Args:
      bert_config: Configuration that defines the core BERT model.
      seq_length: Maximum sequence length of the training data.
      max_predictions_per_seq: Maximum number of tokens in sequence to mask out
        and use for pretraining.
      initializer: Initializer for weights in BertPretrainer.
      use_next_sentence_label: Whether to use the next sentence label.
      return_core_pretrainer_model: Whether to also return the `BertPretrainer`
        object.

  Returns:
      A Tuple of (1) Pretraining model, (2) core BERT submodel from which to
      save weights after pretraining, and (3) optional core `BertPretrainer`
      object if argument `return_core_pretrainer_model` is True.
  """
  input_word_ids = tf.keras.layers.Input(
      shape=(seq_length,), name='input_word_ids', dtype=tf.int32)
  input_mask = tf.keras.layers.Input(
      shape=(seq_length,), name='input_mask', dtype=tf.int32)
  input_type_ids = tf.keras.layers.Input(
      shape=(seq_length,), name='input_type_ids', dtype=tf.int32)
  masked_lm_positions = tf.keras.layers.Input(
      shape=(max_predictions_per_seq,),
      name='masked_lm_positions',
      dtype=tf.int32)
  masked_lm_ids = tf.keras.layers.Input(
      shape=(max_predictions_per_seq,), name='masked_lm_ids', dtype=tf.int32)
  masked_lm_weights = tf.keras.layers.Input(
      shape=(max_predictions_per_seq,),
      name='masked_lm_weights',
      dtype=tf.int32)

  if use_next_sentence_label:
    next_sentence_labels = tf.keras.layers.Input(
        shape=(1,), name='next_sentence_labels', dtype=tf.int32)
  else:
    next_sentence_labels = None

  transformer_encoder = get_transformer_encoder(bert_config, seq_length)
  if initializer is None:
    initializer = tf.keras.initializers.TruncatedNormal(
        stddev=bert_config.initializer_range)
  pretrainer_model = models.BertPretrainer(
      network=transformer_encoder,
      embedding_table=transformer_encoder.get_embedding_table(),
      num_classes=2,  # The next sentence prediction label has two classes.
      activation=tf_utils.get_activation(bert_config.hidden_act),
      num_token_predictions=max_predictions_per_seq,
      initializer=initializer,
      output='logits')

  outputs = pretrainer_model(
      [input_word_ids, input_mask, input_type_ids, masked_lm_positions])
  lm_output = outputs['masked_lm']
  sentence_output = outputs['classification']
  pretrain_loss_layer = BertPretrainLossAndMetricLayer(
      vocab_size=bert_config.vocab_size)
  output_loss = pretrain_loss_layer(lm_output, sentence_output, masked_lm_ids,
                                    masked_lm_weights, next_sentence_labels)
  inputs = {
      'input_word_ids': input_word_ids,
      'input_mask': input_mask,
      'input_type_ids': input_type_ids,
      'masked_lm_positions': masked_lm_positions,
      'masked_lm_ids': masked_lm_ids,
      'masked_lm_weights': masked_lm_weights,
  }
  if use_next_sentence_label:
    inputs['next_sentence_labels'] = next_sentence_labels

  keras_model = tf.keras.Model(inputs=inputs, outputs=output_loss)
  if return_core_pretrainer_model:
    return keras_model, transformer_encoder, pretrainer_model
  else:
    return keras_model, transformer_encoder
Пример #12
0
    def build(self, input_shape: Optional[Union[Sequence[int], tf.Tensor]]):
        """Build variables and child layers to prepare for calling."""
        conv2d_quantized = _quantize_wrapped_layer(
            tf.keras.layers.Conv2D,
            configs.Default8BitConvQuantizeConfig(['kernel'], ['activation'],
                                                  False))
        if self._use_projection:
            if self._resnetd_shortcut:
                self._shortcut0 = tf.keras.layers.AveragePooling2D(
                    pool_size=2, strides=self._strides, padding='same')
                self._shortcut1 = conv2d_quantized(
                    filters=self._filters * 4,
                    kernel_size=1,
                    strides=1,
                    use_bias=False,
                    kernel_initializer=self._kernel_initializer,
                    kernel_regularizer=self._kernel_regularizer,
                    bias_regularizer=self._bias_regularizer,
                    activation=NoOpActivation())
            else:
                self._shortcut = conv2d_quantized(
                    filters=self._filters * 4,
                    kernel_size=1,
                    strides=self._strides,
                    use_bias=False,
                    kernel_initializer=self._kernel_initializer,
                    kernel_regularizer=self._kernel_regularizer,
                    bias_regularizer=self._bias_regularizer,
                    activation=NoOpActivation())

            self._norm0 = self._norm_with_quantize(
                axis=self._bn_axis,
                momentum=self._norm_momentum,
                epsilon=self._norm_epsilon,
                trainable=self._bn_trainable)

        self._conv1 = conv2d_quantized(
            filters=self._filters,
            kernel_size=1,
            strides=1,
            use_bias=False,
            kernel_initializer=self._kernel_initializer,
            kernel_regularizer=self._kernel_regularizer,
            bias_regularizer=self._bias_regularizer,
            activation=NoOpActivation())
        self._norm1 = self._norm(axis=self._bn_axis,
                                 momentum=self._norm_momentum,
                                 epsilon=self._norm_epsilon,
                                 trainable=self._bn_trainable)
        self._activation1 = tfmot.quantization.keras.QuantizeWrapperV2(
            tf_utils.get_activation(self._activation, use_keras_layer=True),
            configs.Default8BitActivationQuantizeConfig())

        self._conv2 = conv2d_quantized(
            filters=self._filters,
            kernel_size=3,
            strides=self._strides,
            dilation_rate=self._dilation_rate,
            padding='same',
            use_bias=False,
            kernel_initializer=self._kernel_initializer,
            kernel_regularizer=self._kernel_regularizer,
            bias_regularizer=self._bias_regularizer,
            activation=NoOpActivation())
        self._norm2 = self._norm(axis=self._bn_axis,
                                 momentum=self._norm_momentum,
                                 epsilon=self._norm_epsilon,
                                 trainable=self._bn_trainable)
        self._activation2 = tfmot.quantization.keras.QuantizeWrapperV2(
            tf_utils.get_activation(self._activation, use_keras_layer=True),
            configs.Default8BitActivationQuantizeConfig())

        self._conv3 = conv2d_quantized(
            filters=self._filters * 4,
            kernel_size=1,
            strides=1,
            use_bias=False,
            kernel_initializer=self._kernel_initializer,
            kernel_regularizer=self._kernel_regularizer,
            bias_regularizer=self._bias_regularizer,
            activation=NoOpActivation())
        self._norm3 = self._norm_with_quantize(axis=self._bn_axis,
                                               momentum=self._norm_momentum,
                                               epsilon=self._norm_epsilon,
                                               trainable=self._bn_trainable)
        self._activation3 = tfmot.quantization.keras.QuantizeWrapperV2(
            tf_utils.get_activation(self._activation, use_keras_layer=True),
            configs.Default8BitActivationQuantizeConfig())

        if self._se_ratio and self._se_ratio > 0 and self._se_ratio <= 1:
            self._squeeze_excitation = qat_nn_layers.SqueezeExcitationQuantized(
                in_filters=self._filters * 4,
                out_filters=self._filters * 4,
                se_ratio=self._se_ratio,
                kernel_initializer=self._kernel_initializer,
                kernel_regularizer=self._kernel_regularizer,
                bias_regularizer=self._bias_regularizer)
        else:
            self._squeeze_excitation = None

        if self._stochastic_depth_drop_rate:
            self._stochastic_depth = nn_layers.StochasticDepth(
                self._stochastic_depth_drop_rate)
        else:
            self._stochastic_depth = None
        self._add = tfmot.quantization.keras.QuantizeWrapperV2(
            tf.keras.layers.Add(),
            configs.Default8BitQuantizeConfig([], [], True))

        super(BottleneckBlockQuantized, self).build(input_shape)
Пример #13
0
    def __init__(self,
                 num_classes: int,
                 level: Union[int, str],
                 num_convs: int = 2,
                 num_filters: int = 256,
                 use_depthwise_convolution: bool = False,
                 prediction_kernel_size: int = 1,
                 upsample_factor: int = 1,
                 feature_fusion: Optional[str] = None,
                 decoder_min_level: Optional[int] = None,
                 decoder_max_level: Optional[int] = None,
                 low_level: int = 2,
                 low_level_num_filters: int = 48,
                 num_decoder_filters: int = 256,
                 activation: str = 'relu',
                 use_sync_bn: bool = False,
                 norm_momentum: float = 0.99,
                 norm_epsilon: float = 0.001,
                 kernel_regularizer: Optional[
                     tf.keras.regularizers.Regularizer] = None,
                 bias_regularizer: Optional[
                     tf.keras.regularizers.Regularizer] = None,
                 **kwargs):
        """Initializes a segmentation head.

    Args:
      num_classes: An `int` number of mask classification categories. The number
        of classes does not include background class.
      level: An `int` or `str`, level to use to build segmentation head.
      num_convs: An `int` number of stacked convolution before the last
        prediction layer.
      num_filters: An `int` number to specify the number of filters used.
        Default is 256.
      use_depthwise_convolution: A bool to specify if use depthwise separable
        convolutions.
      prediction_kernel_size: An `int` number to specify the kernel size of the
        prediction layer.
      upsample_factor: An `int` number to specify the upsampling factor to
        generate finer mask. Default 1 means no upsampling is applied.
      feature_fusion: One of `deeplabv3plus`, `pyramid_fusion`, or None. If
        `deeplabv3plus`, features from decoder_features[level] will be fused
        with low level feature maps from backbone. If `pyramid_fusion`,
        multiscale features will be resized and fused at the target level.
      decoder_min_level: An `int` of minimum level from decoder to use in
        feature fusion. It is only used when feature_fusion is set to
        `panoptic_fpn_fusion`.
      decoder_max_level: An `int` of maximum level from decoder to use in
        feature fusion. It is only used when feature_fusion is set to
        `panoptic_fpn_fusion`.
      low_level: An `int` of backbone level to be used for feature fusion. It is
        used when feature_fusion is set to `deeplabv3plus`.
      low_level_num_filters: An `int` of reduced number of filters for the low
        level features before fusing it with higher level features. It is only
        used when feature_fusion is set to `deeplabv3plus`.
      num_decoder_filters: An `int` of number of filters in the decoder outputs.
        It is only used when feature_fusion is set to `panoptic_fpn_fusion`.
      activation: A `str` that indicates which activation is used, e.g. 'relu',
        'swish', etc.
      use_sync_bn: A `bool` that indicates whether to use synchronized batch
        normalization across different replicas.
      norm_momentum: A `float` of normalization momentum for the moving average.
      norm_epsilon: A `float` added to variance to avoid dividing by zero.
      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
        Conv2D. Default is None.
      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
      **kwargs: Additional keyword arguments to be passed.
    """
        super().__init__(**kwargs)

        self._config_dict = {
            'num_classes': num_classes,
            'level': level,
            'num_convs': num_convs,
            'num_filters': num_filters,
            'use_depthwise_convolution': use_depthwise_convolution,
            'prediction_kernel_size': prediction_kernel_size,
            'upsample_factor': upsample_factor,
            'feature_fusion': feature_fusion,
            'decoder_min_level': decoder_min_level,
            'decoder_max_level': decoder_max_level,
            'low_level': low_level,
            'low_level_num_filters': low_level_num_filters,
            'num_decoder_filters': num_decoder_filters,
            'activation': activation,
            'use_sync_bn': use_sync_bn,
            'norm_momentum': norm_momentum,
            'norm_epsilon': norm_epsilon,
            'kernel_regularizer': kernel_regularizer,
            'bias_regularizer': bias_regularizer,
        }
        if tf.keras.backend.image_data_format() == 'channels_last':
            self._bn_axis = -1
        else:
            self._bn_axis = 1
        self._activation_layer = tfmot.quantization.keras.QuantizeWrapperV2(
            tf_utils.get_activation(activation, use_keras_layer=True),
            configs.Default8BitActivationQuantizeConfig())
Пример #14
0
def get_mc_dropout_transformer_encoder(bert_config,
                                       use_mc_dropout_mha=False,
                                       use_mc_dropout_att=False,
                                       use_mc_dropout_ffn=False,
                                       channel_wise_dropout_mha=False,
                                       channel_wise_dropout_att=False,
                                       channel_wise_dropout_ffn=False):
    """Gets a DropoutTransformerEncoder from a bert_config object.

  Args:
    bert_config: A 'modeling.BertConfig' object.
    use_mc_dropout_mha: (bool) Whether to apply MC Dropout to the multi-head
      attention score layer.
    use_mc_dropout_att: (bool) Whether to apply MC Dropout to the attention
      output layer.
    use_mc_dropout_ffn: (bool) Whether to apply MC Dropout to the feedforward
      layer.
    channel_wise_dropout_mha: (bool) Whether to apply MC Dropout to the
      multi-head attention score layer.
    channel_wise_dropout_att: (bool) Whether to apply MC Dropout to the
      attention output layer.
    channel_wise_dropout_ffn: (bool) Whether to apply MC Dropout to the
      feedforward layer.

  Returns:
    A DropoutTransformerEncoder object.
  """
    embedding_cfg = dict(
        vocab_size=bert_config.vocab_size,
        type_vocab_size=bert_config.type_vocab_size,
        hidden_size=bert_config.hidden_size,
        max_seq_length=bert_config.max_position_embeddings,
        initializer=tf.keras.initializers.TruncatedNormal(
            stddev=bert_config.initializer_range),
        dropout_rate=bert_config.hidden_dropout_prob,
    )
    hidden_cfg = dict(
        num_attention_heads=bert_config.num_attention_heads,
        intermediate_size=bert_config.intermediate_size,
        intermediate_activation=tf_utils.get_activation(
            bert_config.hidden_act),
        dropout_rate=bert_config.hidden_dropout_prob,
        attention_dropout_rate=bert_config.attention_probs_dropout_prob,
        kernel_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=bert_config.initializer_range),
    )
    kwargs = dict(
        embedding_cfg=embedding_cfg,
        num_hidden_instances=bert_config.num_hidden_layers,
        pooled_output_dim=bert_config.hidden_size,
        pooler_layer_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=bert_config.initializer_range))

    return DropoutTransformerEncoder(
        use_mc_dropout_mha=use_mc_dropout_mha,
        use_mc_dropout_att=use_mc_dropout_att,
        use_mc_dropout_ffn=use_mc_dropout_ffn,
        channel_wise_dropout_mha=channel_wise_dropout_mha,
        channel_wise_dropout_att=channel_wise_dropout_att,
        channel_wise_dropout_ffn=channel_wise_dropout_ffn,
        hidden_cfg=hidden_cfg,
        **kwargs)
Пример #15
0
    def __init__(self,
                 model_id: int,
                 input_specs: tf.keras.layers.InputSpec = layers.InputSpec(
                     shape=[None, None, None, 3]),
                 depth_multiplier: float = 1.0,
                 stem_type: str = 'v0',
                 resnetd_shortcut: bool = False,
                 replace_stem_max_pool: bool = False,
                 se_ratio: Optional[float] = None,
                 init_stochastic_depth_rate: float = 0.0,
                 activation: str = 'relu',
                 use_sync_bn: bool = False,
                 norm_momentum: float = 0.99,
                 norm_epsilon: float = 0.001,
                 kernel_initializer: str = 'VarianceScaling',
                 kernel_regularizer: Optional[
                     tf.keras.regularizers.Regularizer] = None,
                 bias_regularizer: Optional[
                     tf.keras.regularizers.Regularizer] = None,
                 **kwargs):
        """Initializes a ResNet model.

    Args:
      model_id: An `int` of the depth of ResNet backbone model.
      input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
      depth_multiplier: A `float` of the depth multiplier to uniformaly scale up
        all layers in channel size. This argument is also referred to as
        `width_multiplier` in (https://arxiv.org/abs/2103.07579).
      stem_type: A `str` of stem type of ResNet. Default to `v0`. If set to
        `v1`, use ResNet-D type stem (https://arxiv.org/abs/1812.01187).
      resnetd_shortcut: A `bool` of whether to use ResNet-D shortcut in
        downsampling blocks.
      replace_stem_max_pool: A `bool` of whether to replace the max pool in stem
        with a stride-2 conv,
      se_ratio: A `float` or None. Ratio of the Squeeze-and-Excitation layer.
      init_stochastic_depth_rate: A `float` of initial stochastic depth rate.
      activation: A `str` name of the activation function.
      use_sync_bn: If True, use synchronized batch normalization.
      norm_momentum: A `float` of normalization momentum for the moving average.
      norm_epsilon: A small `float` added to variance to avoid dividing by zero.
      kernel_initializer: A str for kernel initializer of convolutional layers.
      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
        Conv2D. Default to None.
      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
        Default to None.
      **kwargs: Additional keyword arguments to be passed.
    """
        self._model_id = model_id
        self._input_specs = input_specs
        self._depth_multiplier = depth_multiplier
        self._stem_type = stem_type
        self._resnetd_shortcut = resnetd_shortcut
        self._replace_stem_max_pool = replace_stem_max_pool
        self._se_ratio = se_ratio
        self._init_stochastic_depth_rate = init_stochastic_depth_rate
        self._use_sync_bn = use_sync_bn
        self._activation = activation
        self._norm_momentum = norm_momentum
        self._norm_epsilon = norm_epsilon
        if use_sync_bn:
            self._norm = layers.experimental.SyncBatchNormalization
        else:
            self._norm = layers.BatchNormalization
        self._kernel_initializer = kernel_initializer
        self._kernel_regularizer = kernel_regularizer
        self._bias_regularizer = bias_regularizer

        if tf.keras.backend.image_data_format() == 'channels_last':
            bn_axis = -1
        else:
            bn_axis = 1

        # Build ResNet.
        inputs = tf.keras.Input(shape=input_specs.shape[1:])

        if stem_type == 'v0':
            x = layers.Conv2D(filters=int(64 * self._depth_multiplier),
                              kernel_size=7,
                              strides=2,
                              use_bias=False,
                              padding='same',
                              kernel_initializer=self._kernel_initializer,
                              kernel_regularizer=self._kernel_regularizer,
                              bias_regularizer=self._bias_regularizer)(inputs)
            x = self._norm(axis=bn_axis,
                           momentum=norm_momentum,
                           epsilon=norm_epsilon)(x)
            x = tf_utils.get_activation(activation, use_keras_layer=True)(x)
        elif stem_type == 'v1':
            x = layers.Conv2D(filters=int(32 * self._depth_multiplier),
                              kernel_size=3,
                              strides=2,
                              use_bias=False,
                              padding='same',
                              kernel_initializer=self._kernel_initializer,
                              kernel_regularizer=self._kernel_regularizer,
                              bias_regularizer=self._bias_regularizer)(inputs)
            x = self._norm(axis=bn_axis,
                           momentum=norm_momentum,
                           epsilon=norm_epsilon)(x)
            x = tf_utils.get_activation(activation, use_keras_layer=True)(x)
            x = layers.Conv2D(filters=int(32 * self._depth_multiplier),
                              kernel_size=3,
                              strides=1,
                              use_bias=False,
                              padding='same',
                              kernel_initializer=self._kernel_initializer,
                              kernel_regularizer=self._kernel_regularizer,
                              bias_regularizer=self._bias_regularizer)(x)
            x = self._norm(axis=bn_axis,
                           momentum=norm_momentum,
                           epsilon=norm_epsilon)(x)
            x = tf_utils.get_activation(activation, use_keras_layer=True)(x)
            x = layers.Conv2D(filters=int(64 * self._depth_multiplier),
                              kernel_size=3,
                              strides=1,
                              use_bias=False,
                              padding='same',
                              kernel_initializer=self._kernel_initializer,
                              kernel_regularizer=self._kernel_regularizer,
                              bias_regularizer=self._bias_regularizer)(x)
            x = self._norm(axis=bn_axis,
                           momentum=norm_momentum,
                           epsilon=norm_epsilon)(x)
            x = tf_utils.get_activation(activation, use_keras_layer=True)(x)
        else:
            raise ValueError('Stem type {} not supported.'.format(stem_type))

        if replace_stem_max_pool:
            x = layers.Conv2D(filters=int(64 * self._depth_multiplier),
                              kernel_size=3,
                              strides=2,
                              use_bias=False,
                              padding='same',
                              kernel_initializer=self._kernel_initializer,
                              kernel_regularizer=self._kernel_regularizer,
                              bias_regularizer=self._bias_regularizer)(x)
            x = self._norm(axis=bn_axis,
                           momentum=norm_momentum,
                           epsilon=norm_epsilon)(x)
            x = tf_utils.get_activation(activation, use_keras_layer=True)(x)
        else:
            x = layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)

        endpoints = {}
        for i, spec in enumerate(RESNET_SPECS[model_id]):
            if spec[0] == 'residual':
                block_fn = nn_blocks.ResidualBlock
            elif spec[0] == 'bottleneck':
                block_fn = nn_blocks.BottleneckBlock
            else:
                raise ValueError('Block fn `{}` is not supported.'.format(
                    spec[0]))
            x = self._block_group(
                inputs=x,
                filters=int(spec[1] * self._depth_multiplier),
                strides=(1 if i == 0 else 2),
                block_fn=block_fn,
                block_repeats=spec[2],
                stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate(
                    self._init_stochastic_depth_rate, i + 2, 5),
                name='block_group_l{}'.format(i + 2))
            endpoints[str(i + 2)] = x

        self._output_specs = {l: endpoints[l].get_shape() for l in endpoints}

        super(ResNet, self).__init__(inputs=inputs,
                                     outputs=endpoints,
                                     **kwargs)
Пример #16
0
    def __init__(
            self,
            model_id: int,
            input_specs: tf.keras.layers.InputSpec = tf.keras.layers.InputSpec(
                shape=[None, None, None, 3]),
            activation: str = 'relu',
            use_sync_bn: bool = False,
            norm_momentum: float = 0.99,
            norm_epsilon: float = 0.001,
            kernel_initializer: str = 'VarianceScaling',
            kernel_regularizer: tf.keras.regularizers.Regularizer = None,
            **kwargs):
        """Initializes a RevNet model.

    Args:
      model_id: An `int` of depth/id of ResNet backbone model.
      input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
      activation: A `str` name of the activation function.
      use_sync_bn: If True, use synchronized batch normalization.
      norm_momentum: A `float` of normalization momentum for the moving average.
      norm_epsilon: A `float` added to variance to avoid dividing by zero.
      kernel_initializer: A str for kernel initializer of convolutional layers.
      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
        Conv2D. Default to None.
      **kwargs: Additional keyword arguments to be passed.
    """
        self._model_id = model_id
        self._input_specs = input_specs
        self._use_sync_bn = use_sync_bn
        self._activation = activation
        self._norm_momentum = norm_momentum
        self._norm_epsilon = norm_epsilon
        self._kernel_initializer = kernel_initializer
        self._kernel_regularizer = kernel_regularizer
        if use_sync_bn:
            self._norm = tf.keras.layers.experimental.SyncBatchNormalization
        else:
            self._norm = tf.keras.layers.BatchNormalization

        axis = -1 if tf.keras.backend.image_data_format(
        ) == 'channels_last' else 1

        # Build RevNet.
        inputs = tf.keras.Input(shape=input_specs.shape[1:])

        x = tf.keras.layers.Conv2D(
            filters=REVNET_SPECS[model_id][0][1],
            kernel_size=7,
            strides=2,
            use_bias=False,
            padding='same',
            kernel_initializer=self._kernel_initializer,
            kernel_regularizer=self._kernel_regularizer)(inputs)
        x = self._norm(axis=axis, momentum=norm_momentum,
                       epsilon=norm_epsilon)(x)
        x = tf_utils.get_activation(activation)(x)
        x = tf.keras.layers.MaxPool2D(pool_size=3, strides=2,
                                      padding='same')(x)

        endpoints = {}
        for i, spec in enumerate(REVNET_SPECS[model_id]):
            if spec[0] == 'residual':
                inner_block_fn = nn_blocks.ResidualInner
            elif spec[0] == 'bottleneck':
                inner_block_fn = nn_blocks.BottleneckResidualInner
            else:
                raise ValueError('Block fn `{}` is not supported.'.format(
                    spec[0]))

            if spec[1] % 2 != 0:
                raise ValueError(
                    'Number of output filters must be even to ensure '
                    'splitting in channel dimension for reversible blocks')

            x = self._block_group(
                inputs=x,
                filters=spec[1],
                strides=(1 if i == 0 else 2),
                inner_block_fn=inner_block_fn,
                block_repeats=spec[2],
                batch_norm_first=(i != 0),  # Only skip on first block
                name='revblock_group_{}'.format(i + 2))
            endpoints[str(i + 2)] = x

        self._output_specs = {l: endpoints[l].get_shape() for l in endpoints}

        super(RevNet, self).__init__(inputs=inputs,
                                     outputs=endpoints,
                                     **kwargs)
    def __init__(self,
                 min_level,
                 max_level,
                 num_classes,
                 num_anchors_per_location,
                 num_convs=4,
                 num_filters=256,
                 use_separable_conv=False,
                 activation='relu',
                 use_sync_bn=False,
                 norm_momentum=0.99,
                 norm_epsilon=0.001,
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 **kwargs):
        """Initializes a RetinaNet head.

    Args:
      min_level: An `int` number of minimum feature level.
      max_level: An `int` number of maximum feature level.
      num_classes: An `int` number of classes to predict.
      num_anchors_per_location: An `int` number of number of anchors per pixel
        location.
      num_convs: An `int` number that represents the number of the intermediate
        conv layers before the prediction.
      num_filters: An `int` number that represents the number of filters of the
        intermediate conv layers.
      use_separable_conv: A `bool` that indicates whether the separable
        convolution layers is used.
      activation: A `str` that indicates which activation is used, e.g. 'relu',
        'swish', etc.
      use_sync_bn: A `bool` that indicates whether to use synchronized batch
        normalization across different replicas.
      norm_momentum: A `float` of normalization momentum for the moving average.
      norm_epsilon: A `float` added to variance to avoid dividing by zero.
      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
        Conv2D. Default is None.
      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
      **kwargs: Additional keyword arguments to be passed.
    """
        super(RetinaNetHead, self).__init__(**kwargs)
        self._config_dict = {
            'min_level': min_level,
            'max_level': max_level,
            'num_classes': num_classes,
            'num_anchors_per_location': num_anchors_per_location,
            'num_convs': num_convs,
            'num_filters': num_filters,
            'use_separable_conv': use_separable_conv,
            'activation': activation,
            'use_sync_bn': use_sync_bn,
            'norm_momentum': norm_momentum,
            'norm_epsilon': norm_epsilon,
            'kernel_regularizer': kernel_regularizer,
            'bias_regularizer': bias_regularizer,
        }

        if tf.keras.backend.image_data_format() == 'channels_last':
            self._bn_axis = -1
        else:
            self._bn_axis = 1
        self._activation = tf_utils.get_activation(activation)
Пример #18
0
    def __init__(self,
                 model_id,
                 input_specs=layers.InputSpec(shape=[None, None, None, 3]),
                 stem_type='v0',
                 se_ratio=None,
                 init_stochastic_depth_rate=0.0,
                 activation='relu',
                 use_sync_bn=False,
                 norm_momentum=0.99,
                 norm_epsilon=0.001,
                 kernel_initializer='VarianceScaling',
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 **kwargs):
        """ResNet initialization function.

    Args:
      model_id: `int` depth of ResNet backbone model.
      input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
      stem_type: `str` stem type of ResNet. Default to `v0`. If set to `v1`,
        use ResNet-C type stem (https://arxiv.org/abs/1812.01187).
      se_ratio: `float` or None. Ratio of the Squeeze-and-Excitation layer.
      init_stochastic_depth_rate: `float` initial stochastic depth rate.
      activation: `str` name of the activation function.
      use_sync_bn: if True, use synchronized batch normalization.
      norm_momentum: `float` normalization omentum for the moving average.
      norm_epsilon: `float` small float added to variance to avoid dividing by
        zero.
      kernel_initializer: kernel_initializer for convolutional layers.
      kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
                          Default to None.
      bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
                        Default to None.
      **kwargs: keyword arguments to be passed.
    """
        self._model_id = model_id
        self._input_specs = input_specs
        self._stem_type = stem_type
        self._se_ratio = se_ratio
        self._init_stochastic_depth_rate = init_stochastic_depth_rate
        self._use_sync_bn = use_sync_bn
        self._activation = activation
        self._norm_momentum = norm_momentum
        self._norm_epsilon = norm_epsilon
        if use_sync_bn:
            self._norm = layers.experimental.SyncBatchNormalization
        else:
            self._norm = layers.BatchNormalization
        self._kernel_initializer = kernel_initializer
        self._kernel_regularizer = kernel_regularizer
        self._bias_regularizer = bias_regularizer

        if tf.keras.backend.image_data_format() == 'channels_last':
            bn_axis = -1
        else:
            bn_axis = 1

        # Build ResNet.
        inputs = tf.keras.Input(shape=input_specs.shape[1:])

        if stem_type == 'v0':
            x = layers.Conv2D(filters=64,
                              kernel_size=7,
                              strides=2,
                              use_bias=False,
                              padding='same',
                              kernel_initializer=self._kernel_initializer,
                              kernel_regularizer=self._kernel_regularizer,
                              bias_regularizer=self._bias_regularizer)(inputs)
            x = self._norm(axis=bn_axis,
                           momentum=norm_momentum,
                           epsilon=norm_epsilon)(x)
            x = tf_utils.get_activation(activation)(x)
        elif stem_type == 'v1':
            x = layers.Conv2D(filters=32,
                              kernel_size=3,
                              strides=2,
                              use_bias=False,
                              padding='same',
                              kernel_initializer=self._kernel_initializer,
                              kernel_regularizer=self._kernel_regularizer,
                              bias_regularizer=self._bias_regularizer)(inputs)
            x = self._norm(axis=bn_axis,
                           momentum=norm_momentum,
                           epsilon=norm_epsilon)(x)
            x = tf_utils.get_activation(activation)(x)
            x = layers.Conv2D(filters=32,
                              kernel_size=3,
                              strides=1,
                              use_bias=False,
                              padding='same',
                              kernel_initializer=self._kernel_initializer,
                              kernel_regularizer=self._kernel_regularizer,
                              bias_regularizer=self._bias_regularizer)(x)
            x = self._norm(axis=bn_axis,
                           momentum=norm_momentum,
                           epsilon=norm_epsilon)(x)
            x = tf_utils.get_activation(activation)(x)
            x = layers.Conv2D(filters=64,
                              kernel_size=3,
                              strides=1,
                              use_bias=False,
                              padding='same',
                              kernel_initializer=self._kernel_initializer,
                              kernel_regularizer=self._kernel_regularizer,
                              bias_regularizer=self._bias_regularizer)(x)
            x = self._norm(axis=bn_axis,
                           momentum=norm_momentum,
                           epsilon=norm_epsilon)(x)
            x = tf_utils.get_activation(activation)(x)
        else:
            raise ValueError('Stem type {} not supported.'.format(stem_type))

        x = layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)

        endpoints = {}
        for i, spec in enumerate(RESNET_SPECS[model_id]):
            if spec[0] == 'residual':
                block_fn = nn_blocks.ResidualBlock
            elif spec[0] == 'bottleneck':
                block_fn = nn_blocks.BottleneckBlock
            else:
                raise ValueError('Block fn `{}` is not supported.'.format(
                    spec[0]))
            x = self._block_group(
                inputs=x,
                filters=spec[1],
                strides=(1 if i == 0 else 2),
                block_fn=block_fn,
                block_repeats=spec[2],
                stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate(
                    self._init_stochastic_depth_rate, i + 2, 5),
                name='block_group_l{}'.format(i + 2))
            endpoints[str(i + 2)] = x

        self._output_specs = {l: endpoints[l].get_shape() for l in endpoints}

        super(ResNet, self).__init__(inputs=inputs,
                                     outputs=endpoints,
                                     **kwargs)
Пример #19
0
def get_transformer_encoder(bert_config,
                            sequence_length,
                            transformer_encoder_cls=None,
                            output_range=None):
  """Gets a 'TransformerEncoder' object.

  Args:
    bert_config: A 'modeling.BertConfig' or 'modeling.AlbertConfig' object.
    sequence_length: Maximum sequence length of the training data.
    transformer_encoder_cls: A EncoderScaffold class. If it is None, uses the
      default BERT encoder implementation.
    output_range: the sequence output range, [0, output_range). Default setting
      is to return the entire sequence output.

  Returns:
    A networks.TransformerEncoder object.
  """
  if transformer_encoder_cls is not None:
    # TODO(hongkuny): evaluate if it is better to put cfg definition in gin.
    embedding_cfg = dict(
        vocab_size=bert_config.vocab_size,
        type_vocab_size=bert_config.type_vocab_size,
        hidden_size=bert_config.hidden_size,
        seq_length=sequence_length,
        max_seq_length=bert_config.max_position_embeddings,
        initializer=tf.keras.initializers.TruncatedNormal(
            stddev=bert_config.initializer_range),
        dropout_rate=bert_config.hidden_dropout_prob,
    )
    hidden_cfg = dict(
        num_attention_heads=bert_config.num_attention_heads,
        intermediate_size=bert_config.intermediate_size,
        intermediate_activation=tf_utils.get_activation(bert_config.hidden_act),
        dropout_rate=bert_config.hidden_dropout_prob,
        attention_dropout_rate=bert_config.attention_probs_dropout_prob,
        kernel_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=bert_config.initializer_range),
    )
    kwargs = dict(
        embedding_cfg=embedding_cfg,
        hidden_cfg=hidden_cfg,
        num_hidden_instances=bert_config.num_hidden_layers,
        pooled_output_dim=bert_config.hidden_size,
        pooler_layer_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=bert_config.initializer_range))

    # Relies on gin configuration to define the Transformer encoder arguments.
    return transformer_encoder_cls(**kwargs)

  kwargs = dict(
      vocab_size=bert_config.vocab_size,
      hidden_size=bert_config.hidden_size,
      num_layers=bert_config.num_hidden_layers,
      num_attention_heads=bert_config.num_attention_heads,
      intermediate_size=bert_config.intermediate_size,
      activation=tf_utils.get_activation(bert_config.hidden_act),
      dropout_rate=bert_config.hidden_dropout_prob,
      attention_dropout_rate=bert_config.attention_probs_dropout_prob,
      sequence_length=sequence_length,
      max_sequence_length=bert_config.max_position_embeddings,
      type_vocab_size=bert_config.type_vocab_size,
      embedding_width=bert_config.embedding_size,
      initializer=tf.keras.initializers.TruncatedNormal(
          stddev=bert_config.initializer_range))
  if isinstance(bert_config, albert_configs.AlbertConfig):
    return networks.AlbertTransformerEncoder(**kwargs)
  else:
    assert isinstance(bert_config, configs.BertConfig)
    kwargs['output_range'] = output_range
    return networks.TransformerEncoder(**kwargs)
Пример #20
0
def mb_conv_block(inputs: tf.Tensor,
                  block: BlockConfig,
                  config: ModelConfig,
                  prefix: Optional[Text] = None):
    """Mobile Inverted Residual Bottleneck.

  Args:
    inputs: the Keras input to the block
    block: BlockConfig, arguments to create a Block
    config: ModelConfig, a set of model parameters
    prefix: prefix for naming all layers

  Returns:
    the output of the block
  """
    use_se = config.use_se
    activation = tf_utils.get_activation(config.activation)
    drop_connect_rate = config.drop_connect_rate
    data_format = tf.keras.backend.image_data_format()
    use_depthwise = block.conv_type != 'no_depthwise'
    prefix = prefix or ''

    filters = block.input_filters * block.expand_ratio

    x = inputs

    if block.fused_conv:
        # If we use fused mbconv, skip expansion and use regular conv.
        x = conv2d_block(x,
                         filters,
                         config,
                         kernel_size=block.kernel_size,
                         strides=block.strides,
                         activation=activation,
                         name=prefix + 'fused')
    else:
        if block.expand_ratio != 1:
            # Expansion phase
            kernel_size = (1, 1) if use_depthwise else (3, 3)
            x = conv2d_block(x,
                             filters,
                             config,
                             kernel_size=kernel_size,
                             activation=activation,
                             name=prefix + 'expand')

        # Depthwise Convolution
        if use_depthwise:
            x = conv2d_block(x,
                             conv_filters=None,
                             config=config,
                             kernel_size=block.kernel_size,
                             strides=block.strides,
                             activation=activation,
                             depthwise=True,
                             name=prefix + 'depthwise')

    # Squeeze and Excitation phase
    if use_se:
        assert block.se_ratio is not None
        assert 0 < block.se_ratio <= 1
        num_reduced_filters = max(1, int(block.input_filters * block.se_ratio))

        if data_format == 'channels_first':
            se_shape = (filters, 1, 1)
        else:
            se_shape = (1, 1, filters)

        se = tf.keras.layers.GlobalAveragePooling2D(name=prefix +
                                                    'se_squeeze')(x)
        se = tf.keras.layers.Reshape(se_shape, name=prefix + 'se_reshape')(se)

        se = conv2d_block(se,
                          num_reduced_filters,
                          config,
                          use_bias=True,
                          use_batch_norm=False,
                          activation=activation,
                          name=prefix + 'se_reduce')
        se = conv2d_block(se,
                          filters,
                          config,
                          use_bias=True,
                          use_batch_norm=False,
                          activation='sigmoid',
                          name=prefix + 'se_expand')
        x = tf.keras.layers.multiply([x, se], name=prefix + 'se_excite')

    # Output phase
    x = conv2d_block(x,
                     block.output_filters,
                     config,
                     activation=None,
                     name=prefix + 'project')

    # Add identity so that quantization-aware training can insert quantization
    # ops correctly.
    x = tf.keras.layers.Activation(tf_utils.get_activation('identity'),
                                   name=prefix + 'id')(x)

    if (block.id_skip and all(s == 1 for s in block.strides)
            and block.input_filters == block.output_filters):
        if drop_connect_rate and drop_connect_rate > 0:
            # Apply dropconnect
            # The only difference between dropout and dropconnect in TF is scaling by
            # drop_connect_rate during training. See:
            # https://github.com/keras-team/keras/pull/9898#issuecomment-380577612
            x = tf.keras.layers.Dropout(drop_connect_rate,
                                        noise_shape=(None, 1, 1, 1),
                                        name=prefix + 'drop')(x)

        x = tf.keras.layers.add([x, inputs], name=prefix + 'add')

    return x
Пример #21
0
    def _make_block(self,
                    input_tensor,
                    first_block=True,
                    filters=64,
                    stride=2,
                    radix=1,
                    avd=False,
                    avd_first=False,
                    is_first=False):
        x = input_tensor
        inplanes = input_tensor.shape[-1]
        if stride != 1 or inplanes != filters * self.block_expansion:
            short_cut = input_tensor
            if self.avg_down:
                if self.dilation == 1:
                    short_cut = AveragePooling2D(
                        pool_size=stride,
                        strides=stride,
                        padding="same",
                        data_format="channels_last")(short_cut)
                else:
                    short_cut = AveragePooling2D(
                        pool_size=1,
                        strides=1,
                        padding="same",
                        data_format="channels_last")(short_cut)
                short_cut = Conv2D(filters * self.block_expansion,
                                   kernel_size=1,
                                   strides=1,
                                   padding="same",
                                   kernel_initializer="he_normal",
                                   use_bias=False,
                                   data_format="channels_last")(short_cut)
            else:
                short_cut = Conv2D(filters * self.block_expansion,
                                   kernel_size=1,
                                   strides=stride,
                                   padding="same",
                                   kernel_initializer="he_normal",
                                   use_bias=False,
                                   data_format="channels_last")(short_cut)

            short_cut = BatchNormalization(axis=self.channel_axis,
                                           epsilon=1.001e-5)(short_cut)
        else:
            short_cut = input_tensor
        # should the above be in make layer?
        # see https://github.com/zhanghang1989/ResNeSt/blob/master/resnest/torch/resnet.py

        group_width = int(filters *
                          (self.bottleneck_width / 64.0)) * self.cardinality
        x = Conv2D(group_width,
                   kernel_size=1,
                   strides=1,
                   padding="same",
                   kernel_initializer="he_normal",
                   use_bias=False,
                   data_format="channels_last")(x)
        x = BatchNormalization(axis=self.channel_axis, epsilon=1.001e-5)(x)
        x = tf_utils.get_activation(self.activation)(x)

        avd = avd and (stride > 1 or is_first)

        if avd:
            avd_layer = AveragePooling2D(pool_size=3,
                                         strides=stride,
                                         padding="same",
                                         data_format="channels_last")
            stride = 1

        if avd and avd_first:
            x = avd_layer(x)

        if radix >= 1:
            x = self._SplAtConv2d(x,
                                  filters=group_width,
                                  kernel_size=3,
                                  stride=stride,
                                  dilation=self.dilation,
                                  groups=self.cardinality,
                                  radix=radix)
        else:
            x = Conv2D(group_width,
                       kernel_size=3,
                       strides=stride,
                       padding="same",
                       kernel_initializer="he_normal",
                       dilation_rate=self.dilation,
                       use_bias=False,
                       data_format="channels_last")(x)
            x = BatchNormalization(axis=self.channel_axis, epsilon=1.001e-5)(x)
            x = tf_utils.get_activation(self.activation)(x)

        if avd and not avd_first:
            x = avd_layer(x)
            # print('can')
        x = Conv2D(filters * self.block_expansion,
                   kernel_size=1,
                   strides=1,
                   padding="same",
                   kernel_initializer="he_normal",
                   dilation_rate=self.dilation,
                   use_bias=False,
                   data_format="channels_last")(x)
        x = BatchNormalization(axis=self.channel_axis, epsilon=1.001e-5)(x)

        m2 = Add()([x, short_cut])
        m2 = tf_utils.get_activation(self.activation)(m2)
        return m2
Пример #22
0
def efficientnet(image_input: tf.keras.layers.Input, config: ModelConfig):  # pytype: disable=invalid-annotation  # typed-keras
    """Creates an EfficientNet graph given the model parameters.

  This function is wrapped by the `EfficientNet` class to make a tf.keras.Model.

  Args:
    image_input: the input batch of images
    config: the model config

  Returns:
    the output of efficientnet
  """
    depth_coefficient = config.depth_coefficient
    blocks = config.blocks
    stem_base_filters = config.stem_base_filters
    top_base_filters = config.top_base_filters
    activation = tf_utils.get_activation(config.activation)
    dropout_rate = config.dropout_rate
    drop_connect_rate = config.drop_connect_rate
    num_classes = config.num_classes
    input_channels = config.input_channels
    rescale_input = config.rescale_input
    data_format = tf.keras.backend.image_data_format()
    dtype = config.dtype
    weight_decay = config.weight_decay

    x = image_input
    if data_format == 'channels_first':
        # Happens on GPU/TPU if available.
        x = tf.keras.layers.Permute((3, 1, 2))(x)
    if rescale_input:
        x = preprocessing.normalize_images(x,
                                           num_channels=input_channels,
                                           dtype=dtype,
                                           data_format=data_format)

    # Build stem
    x = conv2d_block(x,
                     round_filters(stem_base_filters, config),
                     config,
                     kernel_size=[3, 3],
                     strides=[2, 2],
                     activation=activation,
                     name='stem')

    # Build blocks
    num_blocks_total = sum(
        round_repeats(block.num_repeat, depth_coefficient) for block in blocks)
    block_num = 0

    for stack_idx, block in enumerate(blocks):
        assert block.num_repeat > 0
        # Update block input and output filters based on depth multiplier
        block = block.replace(
            input_filters=round_filters(block.input_filters, config),
            output_filters=round_filters(block.output_filters, config),
            num_repeat=round_repeats(block.num_repeat, depth_coefficient))

        # The first block needs to take care of stride and filter size increase
        drop_rate = drop_connect_rate * float(block_num) / num_blocks_total
        config = config.replace(drop_connect_rate=drop_rate)
        block_prefix = 'stack_{}/block_0/'.format(stack_idx)
        x = mb_conv_block(x, block, config, block_prefix)
        block_num += 1
        if block.num_repeat > 1:
            block = block.replace(input_filters=block.output_filters,
                                  strides=[1, 1])

            for block_idx in range(block.num_repeat - 1):
                drop_rate = drop_connect_rate * float(
                    block_num) / num_blocks_total
                config = config.replace(drop_connect_rate=drop_rate)
                block_prefix = 'stack_{}/block_{}/'.format(
                    stack_idx, block_idx + 1)
                x = mb_conv_block(x, block, config, prefix=block_prefix)
                block_num += 1

    # Build top
    x = conv2d_block(x,
                     round_filters(top_base_filters, config),
                     config,
                     activation=activation,
                     name='top')

    # Build classifier
    x = tf.keras.layers.GlobalAveragePooling2D(name='top_pool')(x)
    if dropout_rate and dropout_rate > 0:
        x = tf.keras.layers.Dropout(dropout_rate, name='top_dropout')(x)
    x = tf.keras.layers.Dense(
        num_classes,
        kernel_initializer=DENSE_KERNEL_INITIALIZER,
        kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
        bias_regularizer=tf.keras.regularizers.l2(weight_decay),
        name='logits')(x)
    x = tf.keras.layers.Activation('softmax', name='probs')(x)

    return x
Пример #23
0
def build_encoder(config: EncoderConfig,
                  embedding_layer: Optional[tf.keras.layers.Layer] = None,
                  encoder_cls=None,
                  bypass_config: bool = False):
  """Instantiate a Transformer encoder network from EncoderConfig.

  Args:
    config: the one-of encoder config, which provides encoder parameters of a
      chosen encoder.
    embedding_layer: an external embedding layer passed to the encoder.
    encoder_cls: an external encoder cls not included in the supported encoders,
      usually used by gin.configurable.
    bypass_config: whether to ignore config instance to create the object with
      `encoder_cls`.

  Returns:
    An encoder instance.
  """
  if bypass_config:
    return encoder_cls()
  encoder_type = config.type
  encoder_cfg = config.get()
  if encoder_cls and encoder_cls.__name__ == "EncoderScaffold":
    embedding_cfg = dict(
        vocab_size=encoder_cfg.vocab_size,
        type_vocab_size=encoder_cfg.type_vocab_size,
        hidden_size=encoder_cfg.hidden_size,
        max_seq_length=encoder_cfg.max_position_embeddings,
        initializer=tf.keras.initializers.TruncatedNormal(
            stddev=encoder_cfg.initializer_range),
        dropout_rate=encoder_cfg.dropout_rate,
    )
    hidden_cfg = dict(
        num_attention_heads=encoder_cfg.num_attention_heads,
        intermediate_size=encoder_cfg.intermediate_size,
        intermediate_activation=tf_utils.get_activation(
            encoder_cfg.hidden_activation),
        dropout_rate=encoder_cfg.dropout_rate,
        attention_dropout_rate=encoder_cfg.attention_dropout_rate,
        kernel_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=encoder_cfg.initializer_range),
    )
    kwargs = dict(
        embedding_cfg=embedding_cfg,
        hidden_cfg=hidden_cfg,
        num_hidden_instances=encoder_cfg.num_layers,
        pooled_output_dim=encoder_cfg.hidden_size,
        pooler_layer_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=encoder_cfg.initializer_range),
        return_all_layer_outputs=encoder_cfg.return_all_encoder_outputs,
        dict_outputs=True)
    return encoder_cls(**kwargs)

  if encoder_type == "any":
    encoder = encoder_cfg.BUILDER(encoder_cfg)
    if not isinstance(encoder,
                      (tf.Module, tf.keras.Model, tf.keras.layers.Layer)):
      raise ValueError("The BUILDER returns an unexpected instance. The "
                       "`build_encoder` should returns a tf.Module, "
                       "tf.keras.Model or tf.keras.layers.Layer. However, "
                       f"we get {encoder.__class__}")
    return encoder

  if encoder_type == "mobilebert":
    return networks.MobileBERTEncoder(
        word_vocab_size=encoder_cfg.word_vocab_size,
        word_embed_size=encoder_cfg.word_embed_size,
        type_vocab_size=encoder_cfg.type_vocab_size,
        max_sequence_length=encoder_cfg.max_sequence_length,
        num_blocks=encoder_cfg.num_blocks,
        hidden_size=encoder_cfg.hidden_size,
        num_attention_heads=encoder_cfg.num_attention_heads,
        intermediate_size=encoder_cfg.intermediate_size,
        intermediate_act_fn=encoder_cfg.hidden_activation,
        hidden_dropout_prob=encoder_cfg.hidden_dropout_prob,
        attention_probs_dropout_prob=encoder_cfg.attention_probs_dropout_prob,
        intra_bottleneck_size=encoder_cfg.intra_bottleneck_size,
        initializer_range=encoder_cfg.initializer_range,
        use_bottleneck_attention=encoder_cfg.use_bottleneck_attention,
        key_query_shared_bottleneck=encoder_cfg.key_query_shared_bottleneck,
        num_feedforward_networks=encoder_cfg.num_feedforward_networks,
        normalization_type=encoder_cfg.normalization_type,
        classifier_activation=encoder_cfg.classifier_activation,
        input_mask_dtype=encoder_cfg.input_mask_dtype)

  if encoder_type == "albert":
    return networks.AlbertEncoder(
        vocab_size=encoder_cfg.vocab_size,
        embedding_width=encoder_cfg.embedding_width,
        hidden_size=encoder_cfg.hidden_size,
        num_layers=encoder_cfg.num_layers,
        num_attention_heads=encoder_cfg.num_attention_heads,
        max_sequence_length=encoder_cfg.max_position_embeddings,
        type_vocab_size=encoder_cfg.type_vocab_size,
        intermediate_size=encoder_cfg.intermediate_size,
        activation=tf_utils.get_activation(encoder_cfg.hidden_activation),
        dropout_rate=encoder_cfg.dropout_rate,
        attention_dropout_rate=encoder_cfg.attention_dropout_rate,
        initializer=tf.keras.initializers.TruncatedNormal(
            stddev=encoder_cfg.initializer_range),
        dict_outputs=True)

  if encoder_type == "bigbird":
    # TODO(frederickliu): Support use_gradient_checkpointing and update
    # experiments to use the EncoderScaffold only.
    if encoder_cfg.use_gradient_checkpointing:
      return bigbird_encoder.BigBirdEncoder(
          vocab_size=encoder_cfg.vocab_size,
          hidden_size=encoder_cfg.hidden_size,
          num_layers=encoder_cfg.num_layers,
          num_attention_heads=encoder_cfg.num_attention_heads,
          intermediate_size=encoder_cfg.intermediate_size,
          activation=tf_utils.get_activation(encoder_cfg.hidden_activation),
          dropout_rate=encoder_cfg.dropout_rate,
          attention_dropout_rate=encoder_cfg.attention_dropout_rate,
          num_rand_blocks=encoder_cfg.num_rand_blocks,
          block_size=encoder_cfg.block_size,
          max_position_embeddings=encoder_cfg.max_position_embeddings,
          type_vocab_size=encoder_cfg.type_vocab_size,
          initializer=tf.keras.initializers.TruncatedNormal(
              stddev=encoder_cfg.initializer_range),
          embedding_width=encoder_cfg.embedding_width,
          use_gradient_checkpointing=encoder_cfg.use_gradient_checkpointing)
    embedding_cfg = dict(
        vocab_size=encoder_cfg.vocab_size,
        type_vocab_size=encoder_cfg.type_vocab_size,
        hidden_size=encoder_cfg.hidden_size,
        max_seq_length=encoder_cfg.max_position_embeddings,
        initializer=tf.keras.initializers.TruncatedNormal(
            stddev=encoder_cfg.initializer_range),
        dropout_rate=encoder_cfg.dropout_rate)
    attention_cfg = dict(
        num_heads=encoder_cfg.num_attention_heads,
        key_dim=int(encoder_cfg.hidden_size // encoder_cfg.num_attention_heads),
        kernel_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=encoder_cfg.initializer_range),
        max_rand_mask_length=encoder_cfg.max_position_embeddings,
        num_rand_blocks=encoder_cfg.num_rand_blocks,
        from_block_size=encoder_cfg.block_size,
        to_block_size=encoder_cfg.block_size,
        )
    hidden_cfg = dict(
        num_attention_heads=encoder_cfg.num_attention_heads,
        intermediate_size=encoder_cfg.intermediate_size,
        intermediate_activation=tf_utils.get_activation(
            encoder_cfg.hidden_activation),
        dropout_rate=encoder_cfg.dropout_rate,
        attention_dropout_rate=encoder_cfg.attention_dropout_rate,
        norm_first=encoder_cfg.norm_first,
        kernel_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=encoder_cfg.initializer_range),
        attention_cls=layers.BigBirdAttention,
        attention_cfg=attention_cfg)
    kwargs = dict(
        embedding_cfg=embedding_cfg,
        hidden_cls=layers.TransformerScaffold,
        hidden_cfg=hidden_cfg,
        num_hidden_instances=encoder_cfg.num_layers,
        mask_cls=layers.BigBirdMasks,
        mask_cfg=dict(block_size=encoder_cfg.block_size),
        pooled_output_dim=encoder_cfg.hidden_size,
        pooler_layer_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=encoder_cfg.initializer_range),
        return_all_layer_outputs=False,
        dict_outputs=True,
        layer_idx_as_attention_seed=True)
    return networks.EncoderScaffold(**kwargs)

  if encoder_type == "kernel":
    embedding_cfg = dict(
        vocab_size=encoder_cfg.vocab_size,
        type_vocab_size=encoder_cfg.type_vocab_size,
        hidden_size=encoder_cfg.hidden_size,
        max_seq_length=encoder_cfg.max_position_embeddings,
        initializer=tf.keras.initializers.TruncatedNormal(
            stddev=encoder_cfg.initializer_range),
        dropout_rate=encoder_cfg.dropout_rate)
    attention_cfg = dict(
        num_heads=encoder_cfg.num_attention_heads,
        key_dim=int(encoder_cfg.hidden_size // encoder_cfg.num_attention_heads),
        kernel_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=encoder_cfg.initializer_range),
        feature_transform=encoder_cfg.feature_transform,
        num_random_features=encoder_cfg.num_random_features,
        redraw=encoder_cfg.redraw,
        is_short_seq=encoder_cfg.is_short_seq,
        begin_kernel=encoder_cfg.begin_kernel,
        scale=encoder_cfg.scale,
        )
    hidden_cfg = dict(
        num_attention_heads=encoder_cfg.num_attention_heads,
        intermediate_size=encoder_cfg.intermediate_size,
        intermediate_activation=tf_utils.get_activation(
            encoder_cfg.hidden_activation),
        dropout_rate=encoder_cfg.dropout_rate,
        attention_dropout_rate=encoder_cfg.attention_dropout_rate,
        norm_first=encoder_cfg.norm_first,
        kernel_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=encoder_cfg.initializer_range),
        attention_cls=layers.KernelAttention,
        attention_cfg=attention_cfg)
    kwargs = dict(
        embedding_cfg=embedding_cfg,
        hidden_cls=layers.TransformerScaffold,
        hidden_cfg=hidden_cfg,
        num_hidden_instances=encoder_cfg.num_layers,
        mask_cls=layers.KernelMask,
        pooled_output_dim=encoder_cfg.hidden_size,
        pooler_layer_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=encoder_cfg.initializer_range),
        return_all_layer_outputs=False,
        dict_outputs=True,
        layer_idx_as_attention_seed=True)
    return networks.EncoderScaffold(**kwargs)

  if encoder_type == "xlnet":
    return networks.XLNetBase(
        vocab_size=encoder_cfg.vocab_size,
        num_layers=encoder_cfg.num_layers,
        hidden_size=encoder_cfg.hidden_size,
        num_attention_heads=encoder_cfg.num_attention_heads,
        head_size=encoder_cfg.head_size,
        inner_size=encoder_cfg.inner_size,
        dropout_rate=encoder_cfg.dropout_rate,
        attention_dropout_rate=encoder_cfg.attention_dropout_rate,
        attention_type=encoder_cfg.attention_type,
        bi_data=encoder_cfg.bi_data,
        two_stream=encoder_cfg.two_stream,
        tie_attention_biases=encoder_cfg.tie_attention_biases,
        memory_length=encoder_cfg.memory_length,
        clamp_length=encoder_cfg.clamp_length,
        reuse_length=encoder_cfg.reuse_length,
        inner_activation=encoder_cfg.inner_activation,
        use_cls_mask=encoder_cfg.use_cls_mask,
        embedding_width=encoder_cfg.embedding_width,
        initializer=tf.keras.initializers.RandomNormal(
            stddev=encoder_cfg.initializer_range))

  if encoder_type == "reuse":
    embedding_cfg = dict(
        vocab_size=encoder_cfg.vocab_size,
        type_vocab_size=encoder_cfg.type_vocab_size,
        hidden_size=encoder_cfg.hidden_size,
        max_seq_length=encoder_cfg.max_position_embeddings,
        initializer=tf.keras.initializers.TruncatedNormal(
            stddev=encoder_cfg.initializer_range),
        dropout_rate=encoder_cfg.dropout_rate)
    hidden_cfg = dict(
        num_attention_heads=encoder_cfg.num_attention_heads,
        inner_dim=encoder_cfg.intermediate_size,
        inner_activation=tf_utils.get_activation(
            encoder_cfg.hidden_activation),
        output_dropout=encoder_cfg.dropout_rate,
        attention_dropout=encoder_cfg.attention_dropout_rate,
        norm_first=encoder_cfg.norm_first,
        kernel_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=encoder_cfg.initializer_range),
        reuse_attention=encoder_cfg.reuse_attention,
        use_relative_pe=encoder_cfg.use_relative_pe,
        pe_max_seq_length=encoder_cfg.pe_max_seq_length,
        max_reuse_layer_idx=encoder_cfg.max_reuse_layer_idx)
    kwargs = dict(
        embedding_cfg=embedding_cfg,
        hidden_cls=layers.ReuseTransformer,
        hidden_cfg=hidden_cfg,
        num_hidden_instances=encoder_cfg.num_layers,
        pooled_output_dim=encoder_cfg.hidden_size,
        pooler_layer_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=encoder_cfg.initializer_range),
        return_all_layer_outputs=False,
        dict_outputs=True,
        feed_layer_idx=True,
        recursive=True)
    return networks.EncoderScaffold(**kwargs)

  bert_encoder_cls = networks.BertEncoder
  if encoder_type == "bert_v2":
    bert_encoder_cls = networks.BertEncoderV2

  # Uses the default BERTEncoder configuration schema to create the encoder.
  # If it does not match, please add a switch branch by the encoder type.
  return bert_encoder_cls(
      vocab_size=encoder_cfg.vocab_size,
      hidden_size=encoder_cfg.hidden_size,
      num_layers=encoder_cfg.num_layers,
      num_attention_heads=encoder_cfg.num_attention_heads,
      intermediate_size=encoder_cfg.intermediate_size,
      activation=tf_utils.get_activation(encoder_cfg.hidden_activation),
      dropout_rate=encoder_cfg.dropout_rate,
      attention_dropout_rate=encoder_cfg.attention_dropout_rate,
      max_sequence_length=encoder_cfg.max_position_embeddings,
      type_vocab_size=encoder_cfg.type_vocab_size,
      initializer=tf.keras.initializers.TruncatedNormal(
          stddev=encoder_cfg.initializer_range),
      output_range=encoder_cfg.output_range,
      embedding_width=encoder_cfg.embedding_size,
      embedding_layer=embedding_layer,
      return_all_encoder_outputs=encoder_cfg.return_all_encoder_outputs,
      dict_outputs=True,
      norm_first=encoder_cfg.norm_first)
Пример #24
0
    def __init__(self,
                 min_level: int,
                 max_level: int,
                 num_classes: int,
                 num_anchors_per_location: int,
                 num_convs: int = 4,
                 num_filters: int = 256,
                 attribute_heads: Optional[List[Dict[str, Any]]] = None,
                 use_separable_conv: bool = False,
                 activation: str = 'relu',
                 use_sync_bn: bool = False,
                 norm_momentum: float = 0.99,
                 norm_epsilon: float = 0.001,
                 kernel_regularizer: Optional[
                     tf.keras.regularizers.Regularizer] = None,
                 bias_regularizer: Optional[
                     tf.keras.regularizers.Regularizer] = None,
                 num_params_per_anchor: int = 4,
                 **kwargs):
        """Initializes a RetinaNet head.

    Args:
      min_level: An `int` number of minimum feature level.
      max_level: An `int` number of maximum feature level.
      num_classes: An `int` number of classes to predict.
      num_anchors_per_location: An `int` number of number of anchors per pixel
        location.
      num_convs: An `int` number that represents the number of the intermediate
        conv layers before the prediction.
      num_filters: An `int` number that represents the number of filters of the
        intermediate conv layers.
      attribute_heads: If not None, a list that contains a dict for each
        additional attribute head. Each dict consists of 3 key-value pairs:
        `name`, `type` ('regression' or 'classification'), and `size` (number
        of predicted values for each instance).
      use_separable_conv: A `bool` that indicates whether the separable
        convolution layers is used.
      activation: A `str` that indicates which activation is used, e.g. 'relu',
        'swish', etc.
      use_sync_bn: A `bool` that indicates whether to use synchronized batch
        normalization across different replicas.
      norm_momentum: A `float` of normalization momentum for the moving average.
      norm_epsilon: A `float` added to variance to avoid dividing by zero.
      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
        Conv2D. Default is None.
      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
      num_params_per_anchor: Number of parameters required to specify an anchor
        box. For example, `num_params_per_anchor` would be 4 for axis-aligned
        anchor boxes specified by their y-centers, x-centers, heights, and
        widths.
      **kwargs: Additional keyword arguments to be passed.
    """
        super(RetinaNetHead, self).__init__(**kwargs)
        self._config_dict = {
            'min_level': min_level,
            'max_level': max_level,
            'num_classes': num_classes,
            'num_anchors_per_location': num_anchors_per_location,
            'num_convs': num_convs,
            'num_filters': num_filters,
            'attribute_heads': attribute_heads,
            'use_separable_conv': use_separable_conv,
            'activation': activation,
            'use_sync_bn': use_sync_bn,
            'norm_momentum': norm_momentum,
            'norm_epsilon': norm_epsilon,
            'kernel_regularizer': kernel_regularizer,
            'bias_regularizer': bias_regularizer,
            'num_params_per_anchor': num_params_per_anchor,
        }

        if tf.keras.backend.image_data_format() == 'channels_last':
            self._bn_axis = -1
        else:
            self._bn_axis = 1
        self._activation = tf_utils.get_activation(activation)
Пример #25
0
    def _resample_with_alpha(self,
                             inputs,
                             input_width,
                             input_block_fn,
                             target_width,
                             target_num_filters,
                             target_block_fn,
                             alpha=0.5):
        """Matches resolution and feature dimension."""
        _, _, _, input_num_filters = inputs.get_shape().as_list()
        if input_block_fn == 'bottleneck':
            input_num_filters /= 4
        new_num_filters = int(input_num_filters * alpha)

        x = layers.Conv2D(filters=new_num_filters,
                          kernel_size=1,
                          strides=1,
                          use_bias=False,
                          kernel_initializer=self._kernel_initializer,
                          kernel_regularizer=self._kernel_regularizer,
                          bias_regularizer=self._bias_regularizer)(inputs)
        x = self._norm(axis=self._bn_axis,
                       momentum=self._norm_momentum,
                       epsilon=self._norm_epsilon)(x)
        x = tf_utils.get_activation(self._activation_fn)(x)

        # Spatial resampling.
        if input_width > target_width:
            x = layers.Conv2D(filters=new_num_filters,
                              kernel_size=3,
                              strides=2,
                              padding='SAME',
                              use_bias=False,
                              kernel_initializer=self._kernel_initializer,
                              kernel_regularizer=self._kernel_regularizer,
                              bias_regularizer=self._bias_regularizer)(x)
            x = self._norm(axis=self._bn_axis,
                           momentum=self._norm_momentum,
                           epsilon=self._norm_epsilon)(x)
            x = tf_utils.get_activation(self._activation_fn)(x)
            input_width /= 2
            while input_width > target_width:
                x = layers.MaxPool2D(pool_size=3, strides=2, padding='SAME')(x)
                input_width /= 2
        elif input_width < target_width:
            scale = target_width // input_width
            x = spatial_transform_ops.nearest_upsampling(x, scale=scale)

        # Last 1x1 conv to match filter size.
        if target_block_fn == 'bottleneck':
            target_num_filters *= 4
        x = layers.Conv2D(filters=target_num_filters,
                          kernel_size=1,
                          strides=1,
                          use_bias=False,
                          kernel_initializer=self._kernel_initializer,
                          kernel_regularizer=self._kernel_regularizer,
                          bias_regularizer=self._bias_regularizer)(x)
        x = self._norm(axis=self._bn_axis,
                       momentum=self._norm_momentum,
                       epsilon=self._norm_epsilon)(x)
        return x
Пример #26
0
  def __init__(self,
               model_id,
               output_stride,
               input_specs=layers.InputSpec(shape=[None, None, None, 3]),
               stem_type='v0',
               se_ratio=None,
               init_stochastic_depth_rate=0.0,
               multigrid=None,
               last_stage_repeats=1,
               activation='relu',
               use_sync_bn=False,
               norm_momentum=0.99,
               norm_epsilon=0.001,
               kernel_initializer='VarianceScaling',
               kernel_regularizer=None,
               bias_regularizer=None,
               **kwargs):
    """Initializes a ResNet model with DeepLab modification.

    Args:
      model_id: An `int` specifies depth of ResNet backbone model.
      output_stride: An `int` of output stride, ratio of input to output
        resolution.
      input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
      stem_type: A `str` of stem type. Can be `v0` or `v1`. `v1` replaces 7x7
        conv by 3 3x3 convs.
      se_ratio: A `float` or None. Ratio of the Squeeze-and-Excitation layer.
      init_stochastic_depth_rate: A `float` of initial stochastic depth rate.
      multigrid: A tuple of the same length as the number of blocks in the last
        resnet stage.
      last_stage_repeats: An `int` that specifies how many times last stage is
        repeated.
      activation: A `str` name of the activation function.
      use_sync_bn: If True, use synchronized batch normalization.
      norm_momentum: A `float` of normalization momentum for the moving average.
      norm_epsilon: A `float` added to variance to avoid dividing by zero.
      kernel_initializer: A str for kernel initializer of convolutional layers.
      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
        Conv2D. Default to None.
      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
        Default to None.
      **kwargs: Additional keyword arguments to be passed.
    """
    self._model_id = model_id
    self._output_stride = output_stride
    self._input_specs = input_specs
    self._use_sync_bn = use_sync_bn
    self._activation = activation
    self._norm_momentum = norm_momentum
    self._norm_epsilon = norm_epsilon
    if use_sync_bn:
      self._norm = layers.experimental.SyncBatchNormalization
    else:
      self._norm = layers.BatchNormalization
    self._kernel_initializer = kernel_initializer
    self._kernel_regularizer = kernel_regularizer
    self._bias_regularizer = bias_regularizer
    self._stem_type = stem_type
    self._se_ratio = se_ratio
    self._init_stochastic_depth_rate = init_stochastic_depth_rate

    if tf.keras.backend.image_data_format() == 'channels_last':
      bn_axis = -1
    else:
      bn_axis = 1

    # Build ResNet.
    inputs = tf.keras.Input(shape=input_specs.shape[1:])

    if stem_type == 'v0':
      x = layers.Conv2D(
          filters=64,
          kernel_size=7,
          strides=2,
          use_bias=False,
          padding='same',
          kernel_initializer=self._kernel_initializer,
          kernel_regularizer=self._kernel_regularizer,
          bias_regularizer=self._bias_regularizer)(
              inputs)
      x = self._norm(
          axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(
              x)
      x = tf_utils.get_activation(activation)(x)
    elif stem_type == 'v1':
      x = layers.Conv2D(
          filters=64,
          kernel_size=3,
          strides=2,
          use_bias=False,
          padding='same',
          kernel_initializer=self._kernel_initializer,
          kernel_regularizer=self._kernel_regularizer,
          bias_regularizer=self._bias_regularizer)(
              inputs)
      x = self._norm(
          axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(
              x)
      x = tf_utils.get_activation(activation)(x)
      x = layers.Conv2D(
          filters=64,
          kernel_size=3,
          strides=1,
          use_bias=False,
          padding='same',
          kernel_initializer=self._kernel_initializer,
          kernel_regularizer=self._kernel_regularizer,
          bias_regularizer=self._bias_regularizer)(
              x)
      x = self._norm(
          axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(
              x)
      x = tf_utils.get_activation(activation)(x)
      x = layers.Conv2D(
          filters=128,
          kernel_size=3,
          strides=1,
          use_bias=False,
          padding='same',
          kernel_initializer=self._kernel_initializer,
          kernel_regularizer=self._kernel_regularizer,
          bias_regularizer=self._bias_regularizer)(
              x)
      x = self._norm(
          axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(
              x)
      x = tf_utils.get_activation(activation)(x)
    else:
      raise ValueError('Stem type {} not supported.'.format(stem_type))

    x = layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)

    normal_resnet_stage = int(np.math.log2(self._output_stride)) - 2

    endpoints = {}
    for i in range(normal_resnet_stage + 1):
      spec = RESNET_SPECS[model_id][i]
      if spec[0] == 'bottleneck':
        block_fn = nn_blocks.BottleneckBlock
      else:
        raise ValueError('Block fn `{}` is not supported.'.format(spec[0]))
      x = self._block_group(
          inputs=x,
          filters=spec[1],
          strides=(1 if i == 0 else 2),
          dilation_rate=1,
          block_fn=block_fn,
          block_repeats=spec[2],
          stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate(
              self._init_stochastic_depth_rate, i + 2, 4 + last_stage_repeats),
          name='block_group_l{}'.format(i + 2))
      endpoints[str(i + 2)] = x

    dilation_rate = 2
    for i in range(normal_resnet_stage + 1, 3 + last_stage_repeats):
      spec = RESNET_SPECS[model_id][i] if i < 3 else RESNET_SPECS[model_id][-1]
      if spec[0] == 'bottleneck':
        block_fn = nn_blocks.BottleneckBlock
      else:
        raise ValueError('Block fn `{}` is not supported.'.format(spec[0]))
      x = self._block_group(
          inputs=x,
          filters=spec[1],
          strides=1,
          dilation_rate=dilation_rate,
          block_fn=block_fn,
          block_repeats=spec[2],
          stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate(
              self._init_stochastic_depth_rate, i + 2, 4 + last_stage_repeats),
          multigrid=multigrid if i >= 3 else None,
          name='block_group_l{}'.format(i + 2))
      dilation_rate *= 2

    endpoints[str(normal_resnet_stage + 2)] = x

    self._output_specs = {l: endpoints[l].get_shape() for l in endpoints}

    super(DilatedResNet, self).__init__(
        inputs=inputs, outputs=endpoints, **kwargs)
    def __init__(
            self,
            model_id: int,
            temporal_strides: List[int],
            temporal_kernel_sizes: List[Tuple[int]],
            use_self_gating: List[int] = None,
            input_specs=layers.InputSpec(shape=[None, None, None, None, 3]),
            stem_conv_temporal_kernel_size=5,
            stem_conv_temporal_stride=2,
            stem_pool_temporal_stride=2,
            activation='relu',
            use_sync_bn=False,
            norm_momentum=0.99,
            norm_epsilon=0.001,
            kernel_initializer='VarianceScaling',
            kernel_regularizer=None,
            bias_regularizer=None,
            **kwargs):
        """ResNet3D initialization function.

    Args:
      model_id: `int` depth of ResNet backbone model.
      temporal_strides: a list of integers that specifies the temporal strides
        for all 3d blocks.
      temporal_kernel_sizes: a list of tuples that specifies the temporal kernel
        sizes for all 3d blocks in different block groups.
      use_self_gating: a list of booleans to specify applying self-gating module
        or not in each block group. If None, self-gating is not applied.
      input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
      stem_conv_temporal_kernel_size: `int` temporal kernel size for the first
        conv layer.
      stem_conv_temporal_stride: `int` temporal stride for the first conv layer.
      stem_pool_temporal_stride: `int` temporal stride for the first pool layer.
      activation: `str` name of the activation function.
      use_sync_bn: if True, use synchronized batch normalization.
      norm_momentum: `float` normalization omentum for the moving average.
      norm_epsilon: `float` small float added to variance to avoid dividing by
        zero.
      kernel_initializer: kernel_initializer for convolutional layers.
      kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
        Default to None.
      bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
        Default to None.
      **kwargs: keyword arguments to be passed.
    """
        self._model_id = model_id
        self._temporal_strides = temporal_strides
        self._temporal_kernel_sizes = temporal_kernel_sizes
        self._input_specs = input_specs
        self._stem_conv_temporal_kernel_size = stem_conv_temporal_kernel_size
        self._stem_conv_temporal_stride = stem_conv_temporal_stride
        self._stem_pool_temporal_stride = stem_pool_temporal_stride
        self._use_self_gating = use_self_gating
        self._use_sync_bn = use_sync_bn
        self._activation = activation
        self._norm_momentum = norm_momentum
        self._norm_epsilon = norm_epsilon
        if use_sync_bn:
            self._norm = layers.experimental.SyncBatchNormalization
        else:
            self._norm = layers.BatchNormalization
        self._kernel_initializer = kernel_initializer
        self._kernel_regularizer = kernel_regularizer
        self._bias_regularizer = bias_regularizer
        if tf.keras.backend.image_data_format() == 'channels_last':
            bn_axis = -1
        else:
            bn_axis = 1

        # Build ResNet3D backbone.
        inputs = tf.keras.Input(shape=input_specs.shape[1:])

        # Build stem.
        x = layers.Conv3D(filters=64,
                          kernel_size=[stem_conv_temporal_kernel_size, 7, 7],
                          strides=[stem_conv_temporal_stride, 2, 2],
                          use_bias=False,
                          padding='same',
                          kernel_initializer=self._kernel_initializer,
                          kernel_regularizer=self._kernel_regularizer,
                          bias_regularizer=self._bias_regularizer)(inputs)
        x = self._norm(axis=bn_axis,
                       momentum=norm_momentum,
                       epsilon=norm_epsilon)(x)
        x = tf_utils.get_activation(activation)(x)

        temporal_kernel_size = 1 if stem_pool_temporal_stride == 1 else 3
        x = layers.MaxPool3D(pool_size=[temporal_kernel_size, 3, 3],
                             strides=[stem_pool_temporal_stride, 2, 2],
                             padding='same')(x)

        # Build intermediate blocks and endpoints.
        resnet_specs = RESNET_SPECS[model_id]
        if len(temporal_strides) != len(resnet_specs) or len(
                temporal_kernel_sizes) != len(resnet_specs):
            raise ValueError(
                'Number of blocks in temporal specs should equal to resnet_specs.'
            )

        endpoints = {}
        for i, resnet_spec in enumerate(resnet_specs):
            if resnet_spec[0] == 'bottleneck3d':
                block_fn = nn_blocks_3d.BottleneckBlock3D
            else:
                raise ValueError('Block fn `{}` is not supported.'.format(
                    resnet_spec[0]))

            x = self._block_group(
                inputs=x,
                filters=resnet_spec[1],
                temporal_kernel_sizes=temporal_kernel_sizes[i],
                temporal_strides=temporal_strides[i],
                spatial_strides=(1 if i == 0 else 2),
                block_fn=block_fn,
                block_repeats=resnet_spec[2],
                use_self_gating=use_self_gating[i]
                if use_self_gating else False,
                name='block_group_l{}'.format(i + 2))
            endpoints[i + 2] = x

        self._output_specs = {l: endpoints[l].get_shape() for l in endpoints}

        super(ResNet3D, self).__init__(inputs=inputs,
                                       outputs=endpoints,
                                       **kwargs)
Пример #28
0
    def _build_scale_permuted_network(self,
                                      net,
                                      input_width,
                                      weighted_fusion=False):
        """Builds scale-permuted network."""
        net_sizes = [
            int(math.ceil(input_width / 2)),
            int(math.ceil(input_width / 2**2))
        ]
        num_outgoing_connections = [0] * len(net)

        endpoints = {}
        for i, block_spec in enumerate(self._block_specs):
            # Update block level if it is larger than max_level to avoid building
            # blocks smaller than requested.
            block_spec.level = min(block_spec.level, self._max_level)
            # Find out specs for the target block.
            target_width = int(math.ceil(input_width / 2**block_spec.level))
            target_num_filters = int(FILTER_SIZE_MAP[block_spec.level] *
                                     self._filter_size_scale)

            # Resample then merge input0 and input1.
            parents = []
            input0 = block_spec.input_offsets[0]
            input1 = block_spec.input_offsets[1]

            x0 = self._resample_with_sepconv(
                inputs=net[input0],
                input_width=net_sizes[input0],
                target_width=target_width,
                target_num_filters=target_num_filters)
            parents.append(x0)
            num_outgoing_connections[input0] += 1

            x1 = self._resample_with_sepconv(
                inputs=net[input1],
                input_width=net_sizes[input1],
                target_width=target_width,
                target_num_filters=target_num_filters)
            parents.append(x1)
            num_outgoing_connections[input1] += 1

            # Merge 0 outdegree blocks to the output block.
            if block_spec.is_output:
                for j, (j_feat, j_connections) in enumerate(
                        zip(net, num_outgoing_connections)):
                    if j_connections == 0 and (j_feat.shape[2] == target_width
                                               and j_feat.shape[3]
                                               == x0.shape[3]):
                        parents.append(j_feat)
                        num_outgoing_connections[j] += 1

            # pylint: disable=g-direct-tensorflow-import
            if weighted_fusion:
                dtype = parents[0].dtype
                parent_weights = [
                    tf.nn.relu(
                        tf.cast(tf.Variable(1.0,
                                            name='block{}_fusion{}'.format(
                                                i, j)),
                                dtype=dtype)) for j in range(len(parents))
                ]
                weights_sum = parent_weights[0]
                for adder in parent_weights[1:]:
                    weights_sum = layers.Add()([weights_sum, adder])

                parents = [
                    parents[i] * parent_weights[i] / (weights_sum + 0.0001)
                    for i in range(len(parents))
                ]

            # Fuse all parent nodes then build a new block.
            x = parents[0]
            for adder in parents[1:]:
                x = layers.Add()([x, adder])
            x = tf_utils.get_activation(self._activation,
                                        use_keras_layer=True)(x)
            x = self._block_group(
                inputs=x,
                in_filters=target_num_filters,
                out_filters=target_num_filters,
                strides=1,
                se_ratio=self._se_ratio,
                expand_ratio=self._expand_ratio,
                block_repeats=self._block_repeats,
                stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate(
                    self._init_stochastic_depth_rate, i + 1,
                    len(self._block_specs)),
                name='scale_permuted_block_{}'.format(i + 1))

            net.append(x)
            net_sizes.append(target_width)
            num_outgoing_connections.append(0)

            # Save output feats.
            if block_spec.is_output:
                if block_spec.level in endpoints:
                    raise ValueError(
                        'Duplicate feats found for output level {}.'.format(
                            block_spec.level))
                if (block_spec.level < self._min_level
                        or block_spec.level > self._max_level):
                    logging.warning(
                        'SpineNet output level out of range [min_level, max_levle] = [%s, %s] will not be used for further processing.',
                        self._min_level, self._max_level)
                endpoints[str(block_spec.level)] = x

        return endpoints
Пример #29
0
    def __init__(self,
                 input_specs: Mapping[str, tf.TensorShape],
                 min_level: int = 3,
                 max_level: int = 7,
                 num_filters: int = 256,
                 fusion_type: str = 'sum',
                 use_separable_conv: bool = False,
                 activation: str = 'relu',
                 use_sync_bn: bool = False,
                 norm_momentum: float = 0.99,
                 norm_epsilon: float = 0.001,
                 kernel_initializer: str = 'VarianceScaling',
                 kernel_regularizer: Optional[
                     tf.keras.regularizers.Regularizer] = None,
                 bias_regularizer: Optional[
                     tf.keras.regularizers.Regularizer] = None,
                 **kwargs):
        """Initializes a Feature Pyramid Network (FPN).

    Args:
      input_specs: A `dict` of input specifications. A dictionary consists of
        {level: TensorShape} from a backbone.
      min_level: An `int` of minimum level in FPN output feature maps.
      max_level: An `int` of maximum level in FPN output feature maps.
      num_filters: An `int` number of filters in FPN layers.
      fusion_type: A `str` of `sum` or `concat`. Whether performing sum or
        concat for feature fusion.
      use_separable_conv: A `bool`.  If True use separable convolution for
        convolution in FPN layers.
      activation: A `str` name of the activation function.
      use_sync_bn: A `bool`. If True, use synchronized batch normalization.
      norm_momentum: A `float` of normalization momentum for the moving average.
      norm_epsilon: A `float` added to variance to avoid dividing by zero.
      kernel_initializer: A `str` name of kernel_initializer for convolutional
        layers.
      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
        Conv2D. Default is None.
      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
      **kwargs: Additional keyword arguments to be passed.
    """
        self._config_dict = {
            'input_specs': input_specs,
            'min_level': min_level,
            'max_level': max_level,
            'num_filters': num_filters,
            'fusion_type': fusion_type,
            'use_separable_conv': use_separable_conv,
            'activation': activation,
            'use_sync_bn': use_sync_bn,
            'norm_momentum': norm_momentum,
            'norm_epsilon': norm_epsilon,
            'kernel_initializer': kernel_initializer,
            'kernel_regularizer': kernel_regularizer,
            'bias_regularizer': bias_regularizer,
        }
        if use_separable_conv:
            conv2d = tf.keras.layers.SeparableConv2D
        else:
            conv2d = tf.keras.layers.Conv2D
        if use_sync_bn:
            norm = tf.keras.layers.experimental.SyncBatchNormalization
        else:
            norm = tf.keras.layers.BatchNormalization
        activation_fn = tf.keras.layers.Activation(
            tf_utils.get_activation(activation))

        # Build input feature pyramid.
        if tf.keras.backend.image_data_format() == 'channels_last':
            bn_axis = -1
        else:
            bn_axis = 1

        # Get input feature pyramid from backbone.
        logging.info('FPN input_specs: %s', input_specs)
        inputs = self._build_input_pyramid(input_specs, min_level)
        backbone_max_level = min(int(max(inputs.keys())), max_level)

        # Build lateral connections.
        feats_lateral = {}
        for level in range(min_level, backbone_max_level + 1):
            feats_lateral[str(level)] = conv2d(
                filters=num_filters,
                kernel_size=1,
                padding='same',
                kernel_initializer=kernel_initializer,
                kernel_regularizer=kernel_regularizer,
                bias_regularizer=bias_regularizer)(inputs[str(level)])

        # Build top-down path.
        feats = {
            str(backbone_max_level): feats_lateral[str(backbone_max_level)]
        }
        for level in range(backbone_max_level - 1, min_level - 1, -1):
            feat_a = spatial_transform_ops.nearest_upsampling(
                feats[str(level + 1)], 2)
            feat_b = feats_lateral[str(level)]

            if fusion_type == 'sum':
                feats[str(level)] = feat_a + feat_b
            elif fusion_type == 'concat':
                feats[str(level)] = tf.concat([feat_a, feat_b], axis=-1)
            else:
                raise ValueError(
                    'Fusion type {} not supported.'.format(fusion_type))

        # TODO(xianzhi): consider to remove bias in conv2d.
        # Build post-hoc 3x3 convolution kernel.
        for level in range(min_level, backbone_max_level + 1):
            feats[str(level)] = conv2d(filters=num_filters,
                                       strides=1,
                                       kernel_size=3,
                                       padding='same',
                                       kernel_initializer=kernel_initializer,
                                       kernel_regularizer=kernel_regularizer,
                                       bias_regularizer=bias_regularizer)(
                                           feats[str(level)])

        # TODO(xianzhi): consider to remove bias in conv2d.
        # Build coarser FPN levels introduced for RetinaNet.
        for level in range(backbone_max_level + 1, max_level + 1):
            feats_in = feats[str(level - 1)]
            if level > backbone_max_level + 1:
                feats_in = activation_fn(feats_in)
            feats[str(level)] = conv2d(
                filters=num_filters,
                strides=2,
                kernel_size=3,
                padding='same',
                kernel_initializer=kernel_initializer,
                kernel_regularizer=kernel_regularizer,
                bias_regularizer=bias_regularizer)(feats_in)

        # Apply batch norm layers.
        for level in range(min_level, max_level + 1):
            feats[str(level)] = norm(axis=bn_axis,
                                     momentum=norm_momentum,
                                     epsilon=norm_epsilon)(feats[str(level)])

        self._output_specs = {
            str(level): feats[str(level)].get_shape()
            for level in range(min_level, max_level + 1)
        }

        super(FPN, self).__init__(inputs=inputs, outputs=feats, **kwargs)
Пример #30
0
  def __init__(self,
               num_classes,
               upsample_factor=2,
               num_convs=4,
               num_filters=256,
               use_separable_conv=False,
               activation='relu',
               use_sync_bn=False,
               norm_momentum=0.99,
               norm_epsilon=0.001,
               kernel_regularizer=None,
               bias_regularizer=None,
               class_agnostic=False,
               **kwargs):
    """Initialize params to build the mask head.

    Args:
      num_classes: `int`, the number of classes.
      upsample_factor: `int`, >= 1, the upsample factor to generate the
        final predicted masks.
      num_convs: `int` number that represents the number of the intermediate
        conv layers before the mask prediction layers.
      num_filters: `int` number that represents the number of filters of the
        intermediate conv layers.
      use_separable_conv: `bool`, indicating whether the separable conv layers
        is used.
      activation: `string`, indicating which activation is used, e.g. 'relu',
        'swish', etc.
      use_sync_bn: `bool`, whether to use synchronized batch normalization
        across different replicas.
      norm_momentum: `float`, the momentum parameter of the normalization
        layers.
      norm_epsilon: `float`, the epsilon parameter of the normalization layers.
      kernel_regularizer: `tf.keras.regularizers.Regularizer` object for layer
        kernel.
      bias_regularizer: `tf.keras.regularizers.Regularizer` object for bias.
      class_agnostic: `bool`, if set, we use a single channel mask head that
        is shared between all classes.
      **kwargs: other keyword arguments passed to Layer.
    """
    super(MaskHead, self).__init__(**kwargs)
    self._config_dict = {
        'num_classes': num_classes,
        'upsample_factor': upsample_factor,
        'num_convs': num_convs,
        'num_filters': num_filters,
        'use_separable_conv': use_separable_conv,
        'activation': activation,
        'use_sync_bn': use_sync_bn,
        'norm_momentum': norm_momentum,
        'norm_epsilon': norm_epsilon,
        'kernel_regularizer': kernel_regularizer,
        'bias_regularizer': bias_regularizer,
        'class_agnostic': class_agnostic
    }

    if tf.keras.backend.image_data_format() == 'channels_last':
      self._bn_axis = -1
    else:
      self._bn_axis = 1
    self._activation = tf_utils.get_activation(activation)