Exemplo n.º 1
0
def _maybe_squeeze_and_excite(
    params,
    input_filters_or_mask,
    inner_activation,
    gating_activation,
    enabled
):
  """Generate a squeeze-and-excite layer or identity function."""
  def make_squeeze_and_excite():
    return _squeeze_and_excite(
        params=params,
        input_filters_or_mask=input_filters_or_mask,
        inner_activation=inner_activation,
        gating_activation=gating_activation)

  # We use explicit bool comparisons to make sure a user doesn't pass in a
  # bad configuration like enabled=OneOf([False, True, 42]).
  if isinstance(enabled, bool):
    return make_squeeze_and_excite() if enabled else layers.Identity()
  elif isinstance(enabled, schema.OneOf):
    options = []
    for choice in enabled.choices:
      options.append(make_squeeze_and_excite() if choice else layers.Identity())
    return layers.maybe_switch_v2(enabled.mask, options)
  else:
    raise ValueError('Unsupported value for "enabled": {}'.format(enabled))
Exemplo n.º 2
0
def _build_residual_spec(params, layer_spec, input_filters, output_filters,
                         filters_base):
    """Builds a layers.Layer implementation for ResidualSpec specification."""
    def can_optimize_residual_spec(layer):
        """Returns true if we can replace residual layer with an identity."""
        can_optimize_oneof = (isinstance(layer, schema.OneOf)
                              and len(layer.choices) == 1 and isinstance(
                                  layer.choices[0], basic_specs.ZeroSpec))
        can_optimize_zerospec = isinstance(layer, basic_specs.ZeroSpec)
        return can_optimize_oneof or can_optimize_zerospec

    if can_optimize_residual_spec(layer_spec.layer):
        return layers.Identity()
    layer = _build_layer(params, layer_spec.layer, input_filters,
                         output_filters, filters_base)
    return layers.ParallelSum([layer, layers.Identity()])
Exemplo n.º 3
0
def _squeeze_and_excite(params,
                        input_filters_or_mask,
                        inner_activation,
                        gating_activation):
  """Generate a squeeze-and-excite layer."""
  # We provide two code paths:
  # 1. For the case where the number of input filters is known at graph
  #    construction time, and input_filters_or_mask is an int. This typically
  #    happens during stand-alone model training.
  # 2. For the case where the number of input filters is not known until
  #    runtime, and input_filters_or_mask is a 1D float tensor. This often
  #    happens during an architecture search.
  if isinstance(input_filters_or_mask, int):
    input_filters = input_filters_or_mask
    hidden_filters = search_space_utils.make_divisible(
        input_filters * _SQUEEZE_AND_EXCITE_RATIO,
        divisor=params['filters_base'])

    return layers.ParallelProduct([
        layers.Identity(),
        layers.Sequential([
            layers.GlobalAveragePool(keepdims=True),
            layers.Conv2D(
                filters=hidden_filters,
                kernel_size=(1, 1),
                kernel_initializer=params['kernel_initializer'],
                kernel_regularizer=params['kernel_regularizer'],
                use_bias=True),
            inner_activation,
            layers.Conv2D(
                filters=input_filters,
                kernel_size=(1, 1),
                kernel_initializer=params['kernel_initializer'],
                kernel_regularizer=params['kernel_regularizer'],
                use_bias=True),
            gating_activation,
        ]),
    ])
  else:
    input_mask = input_filters_or_mask
    input_filters = tf.reduce_sum(input_mask)
    hidden_filters = search_space_utils.tf_make_divisible(
        input_filters * _SQUEEZE_AND_EXCITE_RATIO,
        divisor=params['filters_base'])

    max_input_filters = int(input_mask.shape[0])
    max_hidden_filters = search_space_utils.make_divisible(
        max_input_filters * _SQUEEZE_AND_EXCITE_RATIO,
        divisor=params['filters_base'])

    hidden_mask = tf.sequence_mask(
        hidden_filters, max_hidden_filters, dtype=tf.float32)

    return layers.ParallelProduct([
        layers.Identity(),
        layers.Sequential([
            layers.GlobalAveragePool(keepdims=True),
            layers.MaskedConv2D(
                input_mask=input_mask,
                output_mask=hidden_mask,
                kernel_size=(1, 1),
                kernel_initializer=params['kernel_initializer'],
                kernel_regularizer=params['kernel_regularizer'],
                use_bias=True),
            inner_activation,
            layers.MaskedConv2D(
                input_mask=hidden_mask,
                output_mask=input_mask,
                kernel_size=(1, 1),
                kernel_initializer=params['kernel_initializer'],
                kernel_regularizer=params['kernel_regularizer'],
                use_bias=True),
            gating_activation,
        ])
    ])