Exemplo n.º 1
0
 def ssim(self, x, y):
     """Computes a differentiable structured image similarity measure."""
     c1 = 0.01**2
     c2 = 0.03**2
     mu_x = slim.avg_pool2d(x, 3, 1, 'VALID')
     mu_y = slim.avg_pool2d(y, 3, 1, 'VALID')
     sigma_x = slim.avg_pool2d(x**2, 3, 1, 'VALID') - mu_x**2
     sigma_y = slim.avg_pool2d(y**2, 3, 1, 'VALID') - mu_y**2
     sigma_xy = slim.avg_pool2d(x * y, 3, 1, 'VALID') - mu_x * mu_y
     ssim_n = (2 * mu_x * mu_y + c1) * (2 * sigma_xy + c2)
     ssim_d = (mu_x**2 + mu_y**2 + c1) * (sigma_x + sigma_y + c2)
     ssim = ssim_n / ssim_d
     return tf.clip_by_value((1 - ssim) / 2, 0, 1)
Exemplo n.º 2
0
def _build_nas_aux_head(inputs, dimension, data_format):
    """Builds the auxiliary head described in the NAS paper."""
    shape = inputs.shape
    if shape.rank < 4:
        return None
    shape = shape.as_list()
    shape = shape[1:3] if data_format == "NHWC" else shape[2:4]
    if np.any(np.array(shape) < np.array([5, 5])):
        return None

    with tf.compat.v1.variable_scope("aux_logits"):
        with arg_scope(DATA_FORMAT_OPS, data_format=data_format):
            aux_logits = tf_slim.avg_pool2d(inputs, [5, 5],
                                            stride=3,
                                            padding="SAME")
            aux_logits = tf_slim.conv2d(aux_logits, 128, [1, 1], scope="proj")
            aux_logits = tf_slim.batch_norm(aux_logits, scope="aux_bn0")
            aux_logits = tf.nn.relu6(aux_logits)
            # Shape of feature map before the final layer.
            shape = aux_logits.shape
            shape = shape[1:3] if data_format == "NHWC" else shape[2:4]
            aux_logits = tf_slim.conv2d(aux_logits,
                                        768,
                                        shape,
                                        padding="VALID")
            aux_logits = tf_slim.batch_norm(aux_logits, scope="aux_bn1")
            aux_logits = tf.nn.relu6(aux_logits)
            aux_logits = tf.keras.layers.Flatten()(aux_logits)
            aux_logits = tf_slim.fully_connected(aux_logits, dimension)

    return aux_logits
Exemplo n.º 3
0
def block_inception_c(inputs, scope=None, reuse=None):
  """Builds Inception-C block for Inception v4 network."""
  # By default use stride=1 and SAME padding
  with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
                      stride=1, padding='SAME'):
    with tf.variable_scope(
        scope, 'BlockInceptionC', [inputs], reuse=reuse):
      with tf.variable_scope('Branch_0'):
        branch_0 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1')
      with tf.variable_scope('Branch_1'):
        branch_1 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
        branch_1 = tf.concat(axis=3, values=[
            slim.conv2d(branch_1, 256, [1, 3], scope='Conv2d_0b_1x3'),
            slim.conv2d(branch_1, 256, [3, 1], scope='Conv2d_0c_3x1')])
      with tf.variable_scope('Branch_2'):
        branch_2 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
        branch_2 = slim.conv2d(branch_2, 448, [3, 1], scope='Conv2d_0b_3x1')
        branch_2 = slim.conv2d(branch_2, 512, [1, 3], scope='Conv2d_0c_1x3')
        branch_2 = tf.concat(axis=3, values=[
            slim.conv2d(branch_2, 256, [1, 3], scope='Conv2d_0d_1x3'),
            slim.conv2d(branch_2, 256, [3, 1], scope='Conv2d_0e_3x1')])
      with tf.variable_scope('Branch_3'):
        branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
        branch_3 = slim.conv2d(branch_3, 256, [1, 1], scope='Conv2d_0b_1x1')
      return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
def model(
    inputs,
    is_training = True,
    dropout_keep_prob = 0.8,
    reuse = None,
    scope = 'InceptionV4',
    bottleneck_dim = 512,
):
    # inputs = tf.image.grayscale_to_rgb(inputs)
    with tf.variable_scope(
        scope, 'InceptionV4', [inputs], reuse = reuse
    ) as scope:
        with slim.arg_scope(
            [slim.batch_norm, slim.dropout], is_training = is_training
        ):
            net, end_points = inception_v4_base(inputs, scope = scope)
            print(net.shape)

            with slim.arg_scope(
                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                stride = 1,
                padding = 'SAME',
            ):
                with tf.variable_scope('Logits'):
                    # 8 x 8 x 1536
                    kernel_size = net.get_shape()[1:3]
                    print(kernel_size)
                    if kernel_size.is_fully_defined():
                        net = slim.avg_pool2d(
                            net,
                            kernel_size,
                            padding = 'VALID',
                            scope = 'AvgPool_1a',
                        )
                    else:
                        net = tf.reduce_mean(
                            input_tensor = net,
                            axis = [1, 2],
                            keepdims = True,
                            name = 'global_pool',
                        )
                    end_points['global_pool'] = net
                    # 1 x 1 x 1536
                    net = slim.dropout(
                        net, dropout_keep_prob, scope = 'Dropout_1b'
                    )
                    net = slim.flatten(net, scope = 'PreLogitsFlatten')
                    end_points['PreLogitsFlatten'] = net

                    bottleneck = slim.fully_connected(
                        net, bottleneck_dim, scope = 'bottleneck'
                    )
                    logits = slim.fully_connected(
                        bottleneck,
                        2,
                        activation_fn = None,
                        scope = 'Logits_vad',
                    )
                    return logits
Exemplo n.º 5
0
    def SSIM(self, x, y):
        C1 = 0.01 ** 2
        C2 = 0.03 ** 2

        mu_x = slim.avg_pool2d(x, 3, 1, 'VALID')
        mu_y = slim.avg_pool2d(y, 3, 1, 'VALID')

        sigma_x  = slim.avg_pool2d(x ** 2, 3, 1, 'VALID') - mu_x ** 2
        sigma_y  = slim.avg_pool2d(y ** 2, 3, 1, 'VALID') - mu_y ** 2
        sigma_xy = slim.avg_pool2d(x * y , 3, 1, 'VALID') - mu_x * mu_y

        SSIM_n = (2 * mu_x * mu_y + C1) * (2 * sigma_xy + C2)
        SSIM_d = (mu_x ** 2 + mu_y ** 2 + C1) * (sigma_x + sigma_y + C2)

        SSIM = SSIM_n / SSIM_d

        return tf.clip_by_value((1 - SSIM) / 2, 0, 1)
Exemplo n.º 6
0
def inference(images,
              keep_probability,
              phase_train=True,
              bottleneck_layer_size=128,
              weight_decay=0.0,
              reuse=None):
    batch_norm_params = {
        # Decay for the moving averages.
        'decay': 0.995,
        # epsilon to prevent 0s in variance.
        'epsilon': 0.001,
        # force in-place updates of mean and variance estimates
        'updates_collections': None,
        # Moving averages ends up in the trainable variables collection
        'variables_collections': [tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES],
    }
    with slim.arg_scope(
        [slim.conv2d, slim.fully_connected],
            weights_initializer=tf.compat.v1.keras.initializers.
            VarianceScaling(
                scale=1.0,
                mode="fan_avg",
                distribution=("uniform" if True else "truncated_normal")),
            weights_regularizer=tf.keras.regularizers.l2(0.5 * (weight_decay)),
            normalizer_fn=slim.batch_norm,
            normalizer_params=batch_norm_params):
        with tf.compat.v1.variable_scope('squeezenet', [images], reuse=reuse):
            with slim.arg_scope([slim.batch_norm, slim.dropout],
                                is_training=phase_train):
                net = slim.conv2d(images, 96, [7, 7], stride=2, scope='conv1')
                net = slim.max_pool2d(net, [3, 3], stride=2, scope='maxpool1')
                net = fire_module(net, 16, 64, scope='fire2')
                net = fire_module(net, 16, 64, scope='fire3')
                net = fire_module(net, 32, 128, scope='fire4')
                net = slim.max_pool2d(net, [2, 2], stride=2, scope='maxpool4')
                net = fire_module(net, 32, 128, scope='fire5')
                net = fire_module(net, 48, 192, scope='fire6')
                net = fire_module(net, 48, 192, scope='fire7')
                net = fire_module(net, 64, 256, scope='fire8')
                net = slim.max_pool2d(net, [3, 3], stride=2, scope='maxpool8')
                net = fire_module(net, 64, 256, scope='fire9')
                net = slim.dropout(net, keep_probability)
                net = slim.conv2d(net,
                                  1000, [1, 1],
                                  activation_fn=None,
                                  normalizer_fn=None,
                                  scope='conv10')
                net = slim.avg_pool2d(net,
                                      net.get_shape()[1:3],
                                      scope='avgpool10')
                net = tf.squeeze(net, [1, 2], name='logits')
                net = slim.fully_connected(net,
                                           bottleneck_layer_size,
                                           activation_fn=None,
                                           scope='Bottleneck',
                                           reuse=False)
    return net, None
Exemplo n.º 7
0
def _squeeze_and_excite(h, hidden_dim, activation_fn=tf.nn.relu6):
  with tf.variable_scope(None, default_name='SqueezeExcite'):
    height, width = h.shape[1], h.shape[2]
    u = slim.avg_pool2d(h, [height, width], stride=1, padding='VALID')
    u = _conv(u, hidden_dim, 1,
              normalizer_fn=None, activation_fn=activation_fn)
    u = _conv(u, h.shape[-1], 1,
              normalizer_fn=None, activation_fn=tf.nn.sigmoid)
    return u * h
def model(
    inputs,
    is_training=True,
    dropout_keep_prob=0.8,
    reuse=None,
    scope='InceptionV4',
    create_aux_logits=True,
    num_classes=2,
):
    with tf.variable_scope(scope, 'InceptionV4', [inputs],
                           reuse=reuse) as scope:
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            net, end_points = inception_v4_base(inputs, scope=scope)
            print(net.shape)

            with slim.arg_scope(
                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                    stride=1,
                    padding='SAME',
            ):

                # Final pooling and prediction
                # TODO(sguada,arnoegw): Consider adding a parameter global_pool which
                # can be set to False to disable pooling here (as in resnet_*()).
                with tf.variable_scope('Logits'):
                    # 8 x 8 x 1536
                    kernel_size = net.get_shape()[1:3]
                    print(kernel_size)
                    if kernel_size.is_fully_defined():
                        net = slim.avg_pool2d(
                            net,
                            kernel_size,
                            padding='VALID',
                            scope='AvgPool_1a',
                        )
                    else:
                        net = tf.reduce_mean(
                            input_tensor=net,
                            axis=[1, 2],
                            keepdims=True,
                            name='global_pool',
                        )
                    end_points['global_pool'] = net
                    # 1 x 1 x 1536
                    net = slim.dropout(net,
                                       dropout_keep_prob,
                                       scope='Dropout_1b')
                    net = slim.flatten(net, scope='PreLogitsFlatten')
                    end_points['PreLogitsFlatten'] = net
                    # 1536
                    logits = slim.fully_connected(net,
                                                  num_classes,
                                                  activation_fn=None,
                                                  scope='Logits')
                    return logits
Exemplo n.º 9
0
def _pooling(net, stride, operation):
    """Parses operation and performs the correct pooling operation on net."""
    padding = 'SAME'
    pooling_type, pooling_shape = _operation_to_pooling_info(operation)
    if pooling_type == 'avg':
        net = slim.avg_pool2d(net,
                              pooling_shape,
                              stride=stride,
                              padding=padding)
    elif pooling_type == 'max':
        net = slim.max_pool2d(net,
                              pooling_shape,
                              stride=stride,
                              padding=padding)
    else:
        raise NotImplementedError('Unimplemented pooling type: ', pooling_type)
    return net
Exemplo n.º 10
0
def _transition_block(inputs,
                      num_filters,
                      compression=1.0,
                      scope=None,
                      outputs_collections=None):

    num_filters = int(num_filters * compression)
    with tf.variable_scope(scope, 'transition_blockx', [inputs]) as sc:
        net = inputs
        net = _conv(net, num_filters, 1, scope='blk')

        net = slim.avg_pool2d(net, 2)

        net = slim.utils.collect_named_outputs(outputs_collections, sc.name,
                                               net)

    return net, num_filters
Exemplo n.º 11
0
def _build_aux_head(net, end_points, num_classes, hparams, scope):
    """Auxiliary head used for all models across all datasets."""
    with tf.compat.v1.variable_scope(scope):
        aux_logits = tf.identity(net)
        with tf.compat.v1.variable_scope('aux_logits'):
            aux_logits = slim.avg_pool2d(aux_logits, [5, 5],
                                         stride=3,
                                         padding='VALID')
            aux_logits = slim.conv2d(aux_logits, 128, [1, 1], scope='proj')
            aux_logits = slim.batch_norm(aux_logits, scope='aux_bn0')
            aux_logits = tf.nn.relu(aux_logits)
            # Shape of feature map before the final layer.
            shape = aux_logits.shape
            if hparams.data_format == 'NHWC':
                shape = shape[1:3]
            else:
                shape = shape[2:4]
            aux_logits = slim.conv2d(aux_logits, 768, shape, padding='VALID')
            aux_logits = slim.batch_norm(aux_logits, scope='aux_bn1')
            aux_logits = tf.nn.relu(aux_logits)
            aux_logits = slim.flatten(aux_logits)
            aux_logits = slim.fully_connected(aux_logits, num_classes)
            end_points['AuxLogits'] = aux_logits
Exemplo n.º 12
0
def inception_v4(inputs,
                 is_training=True,
                 dropout_keep_prob=0.8,
                 reuse=None,
                 scope='InceptionV4'):
    """Creates the Inception V4 model.

    Args:
      inputs: a 4-D tensor of size [batch_size, height, width, 3].
      num_classes: number of predicted classes. If 0 or None, the logits layer
        is omitted and the input features to the logits layer (before dropout)
        are returned instead.
      is_training: whether is training or not.
      dropout_keep_prob: float, the fraction to keep before final layer.
      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.
      scope: Optional variable_scope.
      create_aux_logits: Whether to include the auxiliary logits.

    Returns:
      net: a Tensor with the logits (pre-softmax activations) if num_classes
        is a non-zero integer, or the non-dropped input to the logits layer
        if num_classes is 0 or None.
      end_points: the set of end_points from the inception model.
    """
    with tf.variable_scope(scope, 'InceptionV4', [inputs],
                           reuse=reuse) as scope:
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            net, end_points = inception_v4_base(inputs, scope=scope)

            with slim.arg_scope(
                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                    stride=1,
                    padding='SAME'):
                # Final pooling and prediction
                # TODO(sguada,arnoegw): Consider adding a parameter global_pool which
                # can be set to False to disable pooling here (as in resnet_*()).
                with tf.variable_scope('Embeddings'):
                    # 8 x 8 x 1536
                    kernel_size = net.get_shape()[1:3]
                    if kernel_size.is_fully_defined():
                        net = slim.avg_pool2d(net,
                                              kernel_size,
                                              padding='VALID',
                                              scope='AvgPool_1a')
                    else:
                        net = tf.reduce_mean(input_tensor=net,
                                             axis=[1, 2],
                                             keepdims=True,
                                             name='global_pool')
                    # 1 x 1 x 1536
                    net = slim.dropout(net,
                                       dropout_keep_prob,
                                       scope='Dropout_1b')
                    net = slim.flatten(net, scope='PreEmbeddingsFlatten')
                    # 1536
                    net = slim.fully_connected(net,
                                               512,
                                               activation_fn=None,
                                               scope='Embeddings')
                    net = (tf.math.tanh(net) + 1) / 2
        return net
Exemplo n.º 13
0
def inception_resnet_v2(inputs,
                        is_training=True,
                        dropout_keep_prob=0.8,
                        bottleneck_layer_size=128,
                        reuse=None,
                        scope='InceptionResnetV2'):
    """Creates the Inception Resnet V2 model.
    Args:
      inputs: a 4-D tensor of size [batch_size, height, width, 3].
      num_classes: number of predicted classes.
      is_training: whether is training or not.
      dropout_keep_prob: float, the fraction to keep before final layer.
      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.
      scope: Optional variable_scope.
    Returns:
      logits: the logits outputs of the model.
      end_points: the set of end_points from the inception model.
    """
    end_points = {}

    with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse):
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            with slim.arg_scope(
                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                    stride=1,
                    padding='SAME'):

                # 149 x 149 x 32
                net = slim.conv2d(inputs,
                                  32,
                                  3,
                                  stride=2,
                                  padding='VALID',
                                  scope='Conv2d_1a_3x3')
                end_points['Conv2d_1a_3x3'] = net
                # 147 x 147 x 32
                net = slim.conv2d(net,
                                  32,
                                  3,
                                  padding='VALID',
                                  scope='Conv2d_2a_3x3')
                end_points['Conv2d_2a_3x3'] = net
                # 147 x 147 x 64
                net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
                end_points['Conv2d_2b_3x3'] = net
                # 73 x 73 x 64
                net = slim.max_pool2d(net,
                                      3,
                                      stride=2,
                                      padding='VALID',
                                      scope='MaxPool_3a_3x3')
                end_points['MaxPool_3a_3x3'] = net
                # 73 x 73 x 80
                net = slim.conv2d(net,
                                  80,
                                  1,
                                  padding='VALID',
                                  scope='Conv2d_3b_1x1')
                end_points['Conv2d_3b_1x1'] = net
                # 71 x 71 x 192
                net = slim.conv2d(net,
                                  192,
                                  3,
                                  padding='VALID',
                                  scope='Conv2d_4a_3x3')
                end_points['Conv2d_4a_3x3'] = net
                # 35 x 35 x 192
                net = slim.max_pool2d(net,
                                      3,
                                      stride=2,
                                      padding='VALID',
                                      scope='MaxPool_5a_3x3')
                end_points['MaxPool_5a_3x3'] = net

                # 35 x 35 x 320
                with tf.variable_scope('Mixed_5b'):
                    with tf.variable_scope('Branch_0'):
                        tower_conv = slim.conv2d(net,
                                                 96,
                                                 1,
                                                 scope='Conv2d_1x1')
                    with tf.variable_scope('Branch_1'):
                        tower_conv1_0 = slim.conv2d(net,
                                                    48,
                                                    1,
                                                    scope='Conv2d_0a_1x1')
                        tower_conv1_1 = slim.conv2d(tower_conv1_0,
                                                    64,
                                                    5,
                                                    scope='Conv2d_0b_5x5')
                    with tf.variable_scope('Branch_2'):
                        tower_conv2_0 = slim.conv2d(net,
                                                    64,
                                                    1,
                                                    scope='Conv2d_0a_1x1')
                        tower_conv2_1 = slim.conv2d(tower_conv2_0,
                                                    96,
                                                    3,
                                                    scope='Conv2d_0b_3x3')
                        tower_conv2_2 = slim.conv2d(tower_conv2_1,
                                                    96,
                                                    3,
                                                    scope='Conv2d_0c_3x3')
                    with tf.variable_scope('Branch_3'):
                        tower_pool = slim.avg_pool2d(net,
                                                     3,
                                                     stride=1,
                                                     padding='SAME',
                                                     scope='AvgPool_0a_3x3')
                        tower_pool_1 = slim.conv2d(tower_pool,
                                                   64,
                                                   1,
                                                   scope='Conv2d_0b_1x1')
                    net = tf.concat([
                        tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1
                    ], 3)

                end_points['Mixed_5b'] = net
                net = slim.repeat(net, 10, block35, scale=0.17)

                # 17 x 17 x 1024
                with tf.variable_scope('Mixed_6a'):
                    with tf.variable_scope('Branch_0'):
                        tower_conv = slim.conv2d(net,
                                                 384,
                                                 3,
                                                 stride=2,
                                                 padding='VALID',
                                                 scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_1'):
                        tower_conv1_0 = slim.conv2d(net,
                                                    256,
                                                    1,
                                                    scope='Conv2d_0a_1x1')
                        tower_conv1_1 = slim.conv2d(tower_conv1_0,
                                                    256,
                                                    3,
                                                    scope='Conv2d_0b_3x3')
                        tower_conv1_2 = slim.conv2d(tower_conv1_1,
                                                    384,
                                                    3,
                                                    stride=2,
                                                    padding='VALID',
                                                    scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_2'):
                        tower_pool = slim.max_pool2d(net,
                                                     3,
                                                     stride=2,
                                                     padding='VALID',
                                                     scope='MaxPool_1a_3x3')
                    net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3)

                end_points['Mixed_6a'] = net
                net = slim.repeat(net, 20, block17, scale=0.10)

                with tf.variable_scope('Mixed_7a'):
                    with tf.variable_scope('Branch_0'):
                        tower_conv = slim.conv2d(net,
                                                 256,
                                                 1,
                                                 scope='Conv2d_0a_1x1')
                        tower_conv_1 = slim.conv2d(tower_conv,
                                                   384,
                                                   3,
                                                   stride=2,
                                                   padding='VALID',
                                                   scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_1'):
                        tower_conv1 = slim.conv2d(net,
                                                  256,
                                                  1,
                                                  scope='Conv2d_0a_1x1')
                        tower_conv1_1 = slim.conv2d(tower_conv1,
                                                    288,
                                                    3,
                                                    stride=2,
                                                    padding='VALID',
                                                    scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_2'):
                        tower_conv2 = slim.conv2d(net,
                                                  256,
                                                  1,
                                                  scope='Conv2d_0a_1x1')
                        tower_conv2_1 = slim.conv2d(tower_conv2,
                                                    288,
                                                    3,
                                                    scope='Conv2d_0b_3x3')
                        tower_conv2_2 = slim.conv2d(tower_conv2_1,
                                                    320,
                                                    3,
                                                    stride=2,
                                                    padding='VALID',
                                                    scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_3'):
                        tower_pool = slim.max_pool2d(net,
                                                     3,
                                                     stride=2,
                                                     padding='VALID',
                                                     scope='MaxPool_1a_3x3')
                    net = tf.concat([
                        tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool
                    ], 3)

                end_points['Mixed_7a'] = net

                net = slim.repeat(net, 9, block8, scale=0.20)
                net = block8(net, activation_fn=None)

                net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1')
                end_points['Conv2d_7b_1x1'] = net

                with tf.variable_scope('Logits'):
                    end_points['PrePool'] = net
                    #pylint: disable=no-member
                    net = slim.avg_pool2d(net,
                                          net.get_shape()[1:3],
                                          padding='VALID',
                                          scope='AvgPool_1a_8x8')
                    net = slim.flatten(net)

                    net = slim.dropout(net,
                                       dropout_keep_prob,
                                       is_training=is_training,
                                       scope='Dropout')

                    end_points['PreLogitsFlatten'] = net

                net = slim.fully_connected(net,
                                           bottleneck_layer_size,
                                           activation_fn=None,
                                           scope='Bottleneck',
                                           reuse=False)

    return net, end_points
Exemplo n.º 14
0
def mobilenet_v1(inputs,
                 num_classes=1000,
                 dropout_keep_prob=0.999,
                 is_training=True,
                 min_depth=8,
                 depth_multiplier=1.0,
                 conv_defs=None,
                 prediction_fn=tf.nn.softmax,
                 spatial_squeeze=True,
                 reuse=None,
                 scope='MobilenetV1'):
    """Mobilenet v1 model for classification.

    Args:
      inputs: a tensor of shape [batch_size, height, width, channels].
      num_classes: number of predicted classes.
      dropout_keep_prob: the percentage of activation values that are retained.
      is_training: whether is training or not.
      min_depth: Minimum depth value (number of channels) for all convolution ops.
        Enforced when depth_multiplier < 1, and not an active constraint when
        depth_multiplier >= 1.
      depth_multiplier: Float multiplier for the depth (number of channels)
        for all convolution ops. The value must be greater than zero. Typical
        usage will be to set this value in (0, 1) to reduce the number of
        parameters or computation cost of the model.
      conv_defs: A list of ConvDef namedtuples specifying the net architecture.
      prediction_fn: a function to get predictions out of logits.
      spatial_squeeze: if True, logits is of shape is [B, C], if false logits is
          of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.
      scope: Optional variable_scope.

    Returns:
      logits: the pre-softmax activations, a tensor of size
        [batch_size, num_classes]
      end_points: a dictionary from components of the network to the corresponding
        activation.

    Raises:
      ValueError: Input rank is invalid.
    """
    input_shape = inputs.get_shape().as_list()
    if len(input_shape) != 4:
        raise ValueError('Invalid input tensor rank, expected 4, was: %d' %
                         len(input_shape))

    with tf.compat.v1.variable_scope(scope,
                                     'MobilenetV1', [inputs, num_classes],
                                     reuse=reuse) as scope:
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            net, end_points = mobilenet_v1_base(
                inputs,
                scope=scope,
                min_depth=min_depth,
                depth_multiplier=depth_multiplier,
                conv_defs=conv_defs)
            with tf.compat.v1.variable_scope('Logits'):
                kernel_size = _reduced_kernel_size_for_small_input(net, [7, 7])
                net = slim.avg_pool2d(net,
                                      kernel_size,
                                      padding='VALID',
                                      scope='AvgPool_1a')
                end_points['AvgPool_1a'] = net
                # 1 x 1 x 1024
                net = slim.dropout(net,
                                   keep_prob=dropout_keep_prob,
                                   scope='Dropout_1b')
                logits = slim.conv2d(net,
                                     num_classes, [1, 1],
                                     activation_fn=None,
                                     normalizer_fn=None,
                                     scope='Conv2d_1c_1x1')
                if spatial_squeeze:
                    logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
            end_points['Logits'] = logits
            if prediction_fn:
                end_points['Predictions'] = prediction_fn(logits,
                                                          scope='Predictions')
    return logits, end_points
Exemplo n.º 15
0
def inception_v1(inputs,
                 num_classes=1000,
                 is_training=True,
                 dropout_keep_prob=0.8,
                 prediction_fn=slim.softmax,
                 spatial_squeeze=True,
                 reuse=None,
                 scope='InceptionV1',
                 global_pool=False):
    """Defines the Inception V1 architecture.

  This architecture is defined in:

    Going deeper with convolutions
    Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
    Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich.
    http://arxiv.org/pdf/1409.4842v1.pdf.

  The default image size used to train this network is 224x224.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes. If 0 or None, the logits layer
      is omitted and the input features to the logits layer (before dropout)
      are returned instead.
    is_training: whether is training or not.
    dropout_keep_prob: the percentage of activation values that are retained.
    prediction_fn: a function to get predictions out of logits.
    spatial_squeeze: if True, logits is of shape [B, C], if false logits is of
        shape [B, 1, 1, C], where B is batch_size and C is number of classes.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.
    global_pool: Optional boolean flag to control the avgpooling before the
      logits layer. If false or unset, pooling is done with a fixed window
      that reduces default-sized inputs to 1x1, while larger inputs lead to
      larger outputs. If true, any input size is pooled down to 1x1.

  Returns:
    net: a Tensor with the logits (pre-softmax activations) if num_classes
      is a non-zero integer, or the non-dropped-out input to the logits layer
      if num_classes is 0 or None.
    end_points: a dictionary from components of the network to the corresponding
      activation.
  """
    # Final pooling and prediction
    with tf.compat.v1.variable_scope(scope,
                                     'InceptionV1', [inputs],
                                     reuse=reuse) as scope:
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            net, end_points = inception_v1_base(inputs, scope=scope)
            with tf.compat.v1.variable_scope('Logits'):
                if global_pool:
                    # Global average pooling.
                    net = tf.reduce_mean(input_tensor=net,
                                         axis=[1, 2],
                                         keepdims=True,
                                         name='global_pool')
                    end_points['global_pool'] = net
                else:
                    # Pooling with a fixed kernel size.
                    net = slim.avg_pool2d(net, [7, 7],
                                          stride=1,
                                          scope='AvgPool_0a_7x7')
                    end_points['AvgPool_0a_7x7'] = net
                if not num_classes:
                    return net, end_points
                net = slim.dropout(net, dropout_keep_prob, scope='Dropout_0b')
                logits = slim.conv2d(net,
                                     num_classes, [1, 1],
                                     activation_fn=None,
                                     normalizer_fn=None,
                                     scope='Conv2d_0c_1x1')
                if spatial_squeeze:
                    logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')

                end_points['Logits'] = logits
                end_points['Predictions'] = prediction_fn(logits,
                                                          scope='Predictions')
    return logits, end_points
Exemplo n.º 16
0
def inception_v3(inputs,
                 num_classes=1000,
                 is_training=True,
                 drop_out_keep_prob=0.8,
                 prediction_fn=slim.softmax,
                 spatial_squeeze=True,
                 reuse=None,
                 scope='InceptionV3'):
    with tf.compat.v1.variable_scope(scope,
                                     'InceptionV3', [inputs, num_classes],
                                     reuse=reuse) as scope:
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            net, end_points = inception_v3_base(inputs, scope=scope)

        with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                            stride=1,
                            padding='SAME'):
            aux_logits = end_points['Mixed_6e']
            with tf.compat.v1.variable_scope('AuxLogits'):
                aux_logits = slim.avg_pool2d(aux_logits, [5, 5],
                                             stride=3,
                                             padding='VALID',
                                             scope='AvgPool_1a_5x5')
                aux_logits = slim.conv2d(aux_logits,
                                         128, [1, 1],
                                         scope='Conv2d_1b_1x1')
                aux_logits = slim.conv2d(
                    aux_logits,
                    768, [5, 5],
                    weights_initializer=trunc_normal(0.01),
                    padding='VALID',
                    scope='Conv2d_1c_5x5')
                aux_logits = slim.conv2d(
                    aux_logits,
                    num_classes, [1, 1],
                    activation_fn=None,
                    normalizer_fn=None,
                    weights_initializer=trunc_normal(0.001),
                    scope='Conv2d_1d_1x1')
                if spatial_squeeze:
                    aux_logits = tf.squeeze(aux_logits, [1, 2],
                                            name='SpatialSqueeze')

                end_points['AuxLogits'] = aux_logits
            with tf.compat.v1.variable_scope('logits'):
                net = slim.avg_pool2d(net, [8, 8],
                                      padding='VALID',
                                      scope='AvgPool_1a_8x8')
                net = slim.dropout(net,
                                   keep_prob=drop_out_keep_prob,
                                   scope='Dropout_1b')
                end_points['PreLogits'] = net
                logits = slim.conv2d(net,
                                     num_classes, [1, 1],
                                     activation_fn=None,
                                     normalizer_fn=None,
                                     scope='Conv2d_1e_1x1')
                if spatial_squeeze:
                    logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')

                end_points['Logits'] = logits
                end_points['Predictions'] = prediction_fn(logits,
                                                          scope='Predictions')

            return logits, end_points
Exemplo n.º 17
0
def inception_resnet_v2(inputs,
                        num_classes=1001,
                        is_training=True,
                        dropout_keep_prob=0.8,
                        reuse=None,
                        scope='InceptionResnetV2',
                        create_aux_logits=True,
                        activation_fn=tf.nn.relu):
    """Creates the Inception Resnet V2 model.

  Args:
    inputs: a 4-D tensor of size [batch_size, height, width, 3].
      Dimension batch_size may be undefined. If create_aux_logits is false,
      also height and width may be undefined.
    num_classes: number of predicted classes. If 0 or None, the logits layer
      is omitted and the input features to the logits layer (before  dropout)
      are returned instead.
    is_training: whether is training or not.
    dropout_keep_prob: float, the fraction to keep before final layer.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.
    create_aux_logits: Whether to include the auxilliary logits.
    activation_fn: Activation function for conv2d.

  Returns:
    net: the output of the logits layer (if num_classes is a non-zero integer),
      or the non-dropped-out input to the logits layer (if num_classes is 0 or
      None).
    end_points: the set of end_points from the inception model.
  """
    end_points = {}

    with tf.variable_scope(scope, 'InceptionResnetV2', [inputs],
                           reuse=reuse) as scope:
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):

            net, end_points = inception_resnet_v2_base(
                inputs, scope=scope, activation_fn=activation_fn)

            if create_aux_logits and num_classes:
                with tf.variable_scope('AuxLogits'):
                    aux = end_points['PreAuxLogits']
                    aux = slim.avg_pool2d(aux,
                                          5,
                                          stride=3,
                                          padding='VALID',
                                          scope='Conv2d_1a_3x3')
                    aux = slim.conv2d(aux, 128, 1, scope='Conv2d_1b_1x1')
                    aux = slim.conv2d(aux,
                                      768,
                                      aux.get_shape()[1:3],
                                      padding='VALID',
                                      scope='Conv2d_2a_5x5')
                    aux = slim.flatten(aux)
                    aux = slim.fully_connected(aux,
                                               num_classes,
                                               activation_fn=None,
                                               scope='Logits')
                    end_points['AuxLogits'] = aux

            with tf.variable_scope('Logits'):
                # TODO(sguada,arnoegw): Consider adding a parameter global_pool which
                # can be set to False to disable pooling here (as in resnet_*()).
                kernel_size = net.get_shape()[1:3]
                if kernel_size.is_fully_defined():
                    net = slim.avg_pool2d(net,
                                          kernel_size,
                                          padding='VALID',
                                          scope='AvgPool_1a_8x8')
                else:
                    net = tf.reduce_mean(net, [1, 2],
                                         keep_dims=True,
                                         name='global_pool')
                end_points['global_pool'] = net
                if not num_classes:
                    return net, end_points
                net = slim.flatten(net)
                net = slim.dropout(net,
                                   dropout_keep_prob,
                                   is_training=is_training,
                                   scope='Dropout')
                end_points['PreLogitsFlatten'] = net
                logits = slim.fully_connected(net,
                                              num_classes,
                                              activation_fn=None,
                                              scope='Logits')
                end_points['Logits'] = logits
                end_points['Predictions'] = tf.nn.softmax(logits,
                                                          name='Predictions')

        return logits, end_points
def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
                                  min_depth, insert_1x1_conv, image_features,
                                  pool_residual=False):
  """Generates multi resolution feature maps from input image features.

  Generates multi-scale feature maps for detection as in the SSD papers by
  Liu et al: https://arxiv.org/pdf/1512.02325v2.pdf, See Sec 2.1.

  More specifically, it performs the following two tasks:
  1) If a layer name is provided in the configuration, returns that layer as a
     feature map.
  2) If a layer name is left as an empty string, constructs a new feature map
     based on the spatial shape and depth configuration. Note that the current
     implementation only supports generating new layers using convolution of
     stride 2 resulting in a spatial resolution reduction by a factor of 2.
     By default convolution kernel size is set to 3, and it can be customized
     by caller.

  An example of the configuration for Inception V3:
  {
    'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
    'layer_depth': [-1, -1, -1, 512, 256, 128]
  }

  Args:
    feature_map_layout: Dictionary of specifications for the feature map
      layouts in the following format (Inception V2/V3 respectively):
      {
        'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''],
        'layer_depth': [-1, -1, -1, 512, 256, 128]
      }
      or
      {
        'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
        'layer_depth': [-1, -1, -1, 512, 256, 128]
      }
      If 'from_layer' is specified, the specified feature map is directly used
      as a box predictor layer, and the layer_depth is directly infered from the
      feature map (instead of using the provided 'layer_depth' parameter). In
      this case, our convention is to set 'layer_depth' to -1 for clarity.
      Otherwise, if 'from_layer' is an empty string, then the box predictor
      layer will be built from the previous layer using convolution operations.
      Note that the current implementation only supports generating new layers
      using convolutions of stride 2 (resulting in a spatial resolution
      reduction by a factor of 2), and will be extended to a more flexible
      design. Convolution kernel size is set to 3 by default, and can be
      customized by 'conv_kernel_size' parameter (similarily, 'conv_kernel_size'
      should be set to -1 if 'from_layer' is specified). The created convolution
      operation will be a normal 2D convolution by default, and a depthwise
      convolution followed by 1x1 convolution if 'use_depthwise' is set to True.
    depth_multiplier: Depth multiplier for convolutional layers.
    min_depth: Minimum depth for convolutional layers.
    insert_1x1_conv: A boolean indicating whether an additional 1x1 convolution
      should be inserted before shrinking the feature map.
    image_features: A dictionary of handles to activation tensors from the
      base feature extractor.
    pool_residual: Whether to add an average pooling layer followed by a
      residual connection between subsequent feature maps when the channel
      depth match. For example, with option 'layer_depth': [-1, 512, 256, 256],
      a pooling and residual layer is added between the third and forth feature
      map. This option is better used with Weight Shared Convolution Box
      Predictor when all feature maps have the same channel depth to encourage
      more consistent features across multi-scale feature maps.

  Returns:
    feature_maps: an OrderedDict mapping keys (feature map names) to
      tensors where each tensor has shape [batch, height_i, width_i, depth_i].

  Raises:
    ValueError: if the number entries in 'from_layer' and
      'layer_depth' do not match.
    ValueError: if the generated layer does not have the same resolution
      as specified.
  """
  depth_fn = get_depth_fn(depth_multiplier, min_depth)

  feature_map_keys = []
  feature_maps = []
  base_from_layer = ''
  use_explicit_padding = False
  if 'use_explicit_padding' in feature_map_layout:
    use_explicit_padding = feature_map_layout['use_explicit_padding']
  use_depthwise = False
  if 'use_depthwise' in feature_map_layout:
    use_depthwise = feature_map_layout['use_depthwise']
  for index, from_layer in enumerate(feature_map_layout['from_layer']):
    layer_depth = feature_map_layout['layer_depth'][index]
    conv_kernel_size = 3
    if 'conv_kernel_size' in feature_map_layout:
      conv_kernel_size = feature_map_layout['conv_kernel_size'][index]
    if from_layer:
      feature_map = image_features[from_layer]
      base_from_layer = from_layer
      feature_map_keys.append(from_layer)
    else:
      pre_layer = feature_maps[-1]
      pre_layer_depth = pre_layer.get_shape().as_list()[3]
      intermediate_layer = pre_layer
      if insert_1x1_conv:
        layer_name = '{}_1_Conv2d_{}_1x1_{}'.format(
            base_from_layer, index, depth_fn(layer_depth // 2))
        intermediate_layer = slim.conv2d(
            pre_layer,
            depth_fn(layer_depth // 2), [1, 1],
            padding='SAME',
            stride=1,
            scope=layer_name)
      layer_name = '{}_2_Conv2d_{}_{}x{}_s2_{}'.format(
          base_from_layer, index, conv_kernel_size, conv_kernel_size,
          depth_fn(layer_depth))
      stride = 2
      padding = 'SAME'
      if use_explicit_padding:
        padding = 'VALID'
        intermediate_layer = ops.fixed_padding(
            intermediate_layer, conv_kernel_size)
      if use_depthwise:
        feature_map = slim.separable_conv2d(
            intermediate_layer,
            None, [conv_kernel_size, conv_kernel_size],
            depth_multiplier=1,
            padding=padding,
            stride=stride,
            scope=layer_name + '_depthwise')
        feature_map = slim.conv2d(
            feature_map,
            depth_fn(layer_depth), [1, 1],
            padding='SAME',
            stride=1,
            scope=layer_name)
        if pool_residual and pre_layer_depth == depth_fn(layer_depth):
          feature_map += slim.avg_pool2d(
              pre_layer, [3, 3],
              padding='SAME',
              stride=2,
              scope=layer_name + '_pool')
      else:
        feature_map = slim.conv2d(
            intermediate_layer,
            depth_fn(layer_depth), [conv_kernel_size, conv_kernel_size],
            padding=padding,
            stride=stride,
            scope=layer_name)
      feature_map_keys.append(layer_name)
    feature_maps.append(feature_map)
  return collections.OrderedDict(
      [(x, y) for (x, y) in zip(feature_map_keys, feature_maps)])
Exemplo n.º 19
0
def attention_inception_v3_base(inputs,
                                final_endpoint='Mixed_7c',
                                min_depth=16,
                                depth_multiplier=1.0,
                                scope=None,
                                attention_module='',
                                attention_position='all'):
    """Inception model from http://arxiv.org/abs/1512.00567.

  Constructs an Inception v3 network from inputs to the given final endpoint.
  This method can construct the network up to the final inception block
  Mixed_7c.

  Note that the names of the layers in the paper do not correspond to the names
  of the endpoints registered by this function although they build the same
  network.

  Here is a mapping from the old_names to the new names:
  Old name          | New name
  =======================================
  conv0             | Conv2d_1a_3x3
  conv1             | Conv2d_2a_3x3
  conv2             | Conv2d_2b_3x3
  pool1             | MaxPool_3a_3x3
  conv3             | Conv2d_3b_1x1
  conv4             | Conv2d_4a_3x3
  pool2             | MaxPool_5a_3x3
  mixed_35x35x256a  | Mixed_5b
  mixed_35x35x288a  | Mixed_5c
  mixed_35x35x288b  | Mixed_5d
  mixed_17x17x768a  | Mixed_6a
  mixed_17x17x768b  | Mixed_6b
  mixed_17x17x768c  | Mixed_6c
  mixed_17x17x768d  | Mixed_6d
  mixed_17x17x768e  | Mixed_6e
  mixed_8x8x1280a   | Mixed_7a
  mixed_8x8x2048a   | Mixed_7b
  mixed_8x8x2048b   | Mixed_7c

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    final_endpoint: specifies the endpoint to construct the network up to. It
      can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
      'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3',
      'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c',
      'Mixed_6d', 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c'].
    min_depth: Minimum depth value (number of channels) for all convolution ops.
      Enforced when depth_multiplier < 1, and not an active constraint when
      depth_multiplier >= 1.
    depth_multiplier: Float multiplier for the depth (number of channels)
      for all convolution ops. The value must be greater than zero. Typical
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
    scope: Optional variable_scope.
    attention_module: Optional attention_module. Accepted values are '' or
      'se_block'.
    attention_position: Optional attention_position. Default is 'all'. Accepted
      values are 'head', 'extractor', and 'all'.

  Returns:
    tensor_out: output tensor corresponding to the final_endpoint.
    end_points: a set of activations for external use, for example summaries or
                losses.

  Raises:
    ValueError: if final_endpoint is not set to one of the predefined values,
                or depth_multiplier <= 0
  """
    # end_points will collect relevant activations for external use, for example
    # summaries or losses.
    end_points = {}

    def add_and_check_final(name, net):
        end_points[name] = net
        return name == final_endpoint

    def add_attention_layer(attention_module, attention_position, net,
                            end_point):
        if attention_module:
            if attention_position == 'extractor' or attention_position == 'all':
                end_point, net = att.attach_attention_module(
                    net, attention_module, end_point)
        return net, end_point

    if depth_multiplier <= 0:
        raise ValueError('depth_multiplier is not greater than zero.')
    depth = lambda d: max(int(d * depth_multiplier), min_depth)

    with tf.variable_scope(scope, 'InceptionV3', [inputs]):
        with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                            stride=1,
                            padding='VALID'):
            # 299 x 299 x 3
            end_point = 'Input'
            if attention_module:
                if attention_position == 'head' or attention_position == 'all':
                    end_point, net = att.attach_attention_module(
                        inputs, attention_module, end_point)
                    if add_and_check_final(end_point, net):
                        return net, end_points

                else:
                    net = inputs
            else:
                net = inputs

            end_point = 'Conv2d_1a_3x3'
            net = slim.conv2d(net,
                              depth(32), [3, 3],
                              stride=2,
                              scope=end_point)
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # 149 x 149 x 32
            end_point = 'Conv2d_2a_3x3'
            net = slim.conv2d(net, depth(32), [3, 3], scope=end_point)
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # 147 x 147 x 32
            end_point = 'Conv2d_2b_3x3'
            net = slim.conv2d(net,
                              depth(64), [3, 3],
                              padding='SAME',
                              scope=end_point)
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # 147 x 147 x 64
            end_point = 'MaxPool_3a_3x3'
            net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # 73 x 73 x 64
            end_point = 'Conv2d_3b_1x1'
            net = slim.conv2d(net, depth(80), [1, 1], scope=end_point)
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # 73 x 73 x 80.
            end_point = 'Conv2d_4a_3x3'
            net = slim.conv2d(net, depth(192), [3, 3], scope=end_point)
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # 71 x 71 x 192.
            end_point = 'MaxPool_5a_3x3'
            net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # 35 x 35 x 192.

        # Inception blocks
        with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                            stride=1,
                            padding='SAME'):
            # mixed: 35 x 35 x 256.
            end_point = 'Mixed_5b'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(48), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(64), [5, 5],
                                           scope='Conv2d_0b_5x5')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(96), [3, 3],
                                           scope='Conv2d_0b_3x3')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(96), [3, 3],
                                           scope='Conv2d_0c_3x3')
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(32), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # Attention Module after Mixed_5b
            net, end_point = add_attention_layer(attention_module,
                                                 attention_position, net,
                                                 end_point)
            if add_and_check_final(end_point, net):
                return net, end_points

            # mixed_1: 35 x 35 x 288.
            end_point = 'Mixed_5c'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(48), [1, 1],
                                           scope='Conv2d_0b_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(64), [5, 5],
                                           scope='Conv_1_0c_5x5')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(96), [3, 3],
                                           scope='Conv2d_0b_3x3')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(96), [3, 3],
                                           scope='Conv2d_0c_3x3')
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # Attention Module after Mixed_5c
            net, end_point = add_attention_layer(attention_module,
                                                 attention_position, net,
                                                 end_point)
            if add_and_check_final(end_point, net):
                return net, end_points

            # mixed_2: 35 x 35 x 288.
            end_point = 'Mixed_5d'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(48), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(64), [5, 5],
                                           scope='Conv2d_0b_5x5')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(96), [3, 3],
                                           scope='Conv2d_0b_3x3')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(96), [3, 3],
                                           scope='Conv2d_0c_3x3')
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # Attention Module after Mixed_5d
            net, end_point = add_attention_layer(attention_module,
                                                 attention_position, net,
                                                 end_point)
            if add_and_check_final(end_point, net):
                return net, end_points

            # mixed_3: 17 x 17 x 768.
            end_point = 'Mixed_6a'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(384), [3, 3],
                                           stride=2,
                                           padding='VALID',
                                           scope='Conv2d_1a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(96), [3, 3],
                                           scope='Conv2d_0b_3x3')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(96), [3, 3],
                                           stride=2,
                                           padding='VALID',
                                           scope='Conv2d_1a_1x1')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.max_pool2d(net, [3, 3],
                                               stride=2,
                                               padding='VALID',
                                               scope='MaxPool_1a_3x3')
                net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # Attention Module after Mixed_6a
            net, end_point = add_attention_layer(attention_module,
                                                 attention_position, net,
                                                 end_point)
            if add_and_check_final(end_point, net):
                return net, end_points

            # mixed4: 17 x 17 x 768.
            end_point = 'Mixed_6b'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(128), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(128), [1, 7],
                                           scope='Conv2d_0b_1x7')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [7, 1],
                                           scope='Conv2d_0c_7x1')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(128), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(128), [7, 1],
                                           scope='Conv2d_0b_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(128), [1, 7],
                                           scope='Conv2d_0c_1x7')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(128), [7, 1],
                                           scope='Conv2d_0d_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(192), [1, 7],
                                           scope='Conv2d_0e_1x7')
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # Attention Module after Mixed_6b
            net, end_point = add_attention_layer(attention_module,
                                                 attention_position, net,
                                                 end_point)
            if add_and_check_final(end_point, net):
                return net, end_points

            # mixed_5: 17 x 17 x 768.
            end_point = 'Mixed_6c'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(160), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(160), [1, 7],
                                           scope='Conv2d_0b_1x7')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [7, 1],
                                           scope='Conv2d_0c_7x1')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(160), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(160), [7, 1],
                                           scope='Conv2d_0b_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(160), [1, 7],
                                           scope='Conv2d_0c_1x7')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(160), [7, 1],
                                           scope='Conv2d_0d_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(192), [1, 7],
                                           scope='Conv2d_0e_1x7')
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # Attention Module after Mixed_6c
            net, end_point = add_attention_layer(attention_module,
                                                 attention_position, net,
                                                 end_point)
            if add_and_check_final(end_point, net):
                return net, end_points

            # mixed_6: 17 x 17 x 768.
            end_point = 'Mixed_6d'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(160), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(160), [1, 7],
                                           scope='Conv2d_0b_1x7')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [7, 1],
                                           scope='Conv2d_0c_7x1')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(160), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(160), [7, 1],
                                           scope='Conv2d_0b_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(160), [1, 7],
                                           scope='Conv2d_0c_1x7')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(160), [7, 1],
                                           scope='Conv2d_0d_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(192), [1, 7],
                                           scope='Conv2d_0e_1x7')
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # Attention Module after Mixed_6d
            net, end_point = add_attention_layer(attention_module,
                                                 attention_position, net,
                                                 end_point)
            if add_and_check_final(end_point, net):
                return net, end_points

            # mixed_7: 17 x 17 x 768.
            end_point = 'Mixed_6e'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [1, 7],
                                           scope='Conv2d_0b_1x7')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [7, 1],
                                           scope='Conv2d_0c_7x1')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(192), [7, 1],
                                           scope='Conv2d_0b_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(192), [1, 7],
                                           scope='Conv2d_0c_1x7')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(192), [7, 1],
                                           scope='Conv2d_0d_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(192), [1, 7],
                                           scope='Conv2d_0e_1x7')
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # Attention Module after Mixed_6e
            net, end_point = add_attention_layer(attention_module,
                                                 attention_position, net,
                                                 end_point)
            if add_and_check_final(end_point, net):
                return net, end_points

            # mixed_8: 8 x 8 x 1280.
            end_point = 'Mixed_7a'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_0 = slim.conv2d(branch_0,
                                           depth(320), [3, 3],
                                           stride=2,
                                           padding='VALID',
                                           scope='Conv2d_1a_3x3')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [1, 7],
                                           scope='Conv2d_0b_1x7')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [7, 1],
                                           scope='Conv2d_0c_7x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [3, 3],
                                           stride=2,
                                           padding='VALID',
                                           scope='Conv2d_1a_3x3')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.max_pool2d(net, [3, 3],
                                               stride=2,
                                               padding='VALID',
                                               scope='MaxPool_1a_3x3')
                net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # Attention Module after Mixed_7a
            net, end_point = add_attention_layer(attention_module,
                                                 attention_position, net,
                                                 end_point)
            if add_and_check_final(end_point, net):
                return net, end_points

            # mixed_9: 8 x 8 x 2048.
            end_point = 'Mixed_7b'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(320), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(384), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = tf.concat(axis=3,
                                         values=[
                                             slim.conv2d(
                                                 branch_1,
                                                 depth(384), [1, 3],
                                                 scope='Conv2d_0b_1x3'),
                                             slim.conv2d(branch_1,
                                                         depth(384), [3, 1],
                                                         scope='Conv2d_0b_3x1')
                                         ])
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(448), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(384), [3, 3],
                                           scope='Conv2d_0b_3x3')
                    branch_2 = tf.concat(axis=3,
                                         values=[
                                             slim.conv2d(
                                                 branch_2,
                                                 depth(384), [1, 3],
                                                 scope='Conv2d_0c_1x3'),
                                             slim.conv2d(branch_2,
                                                         depth(384), [3, 1],
                                                         scope='Conv2d_0d_3x1')
                                         ])
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # Attention Module after Mixed_7b
            net, end_point = add_attention_layer(attention_module,
                                                 attention_position, net,
                                                 end_point)
            if add_and_check_final(end_point, net):
                return net, end_points

            # mixed_10: 8 x 8 x 2048.
            end_point = 'Mixed_7c'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(320), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(384), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = tf.concat(axis=3,
                                         values=[
                                             slim.conv2d(
                                                 branch_1,
                                                 depth(384), [1, 3],
                                                 scope='Conv2d_0b_1x3'),
                                             slim.conv2d(branch_1,
                                                         depth(384), [3, 1],
                                                         scope='Conv2d_0c_3x1')
                                         ])
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(448), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(384), [3, 3],
                                           scope='Conv2d_0b_3x3')
                    branch_2 = tf.concat(axis=3,
                                         values=[
                                             slim.conv2d(
                                                 branch_2,
                                                 depth(384), [1, 3],
                                                 scope='Conv2d_0c_1x3'),
                                             slim.conv2d(branch_2,
                                                         depth(384), [3, 1],
                                                         scope='Conv2d_0d_3x1')
                                         ])
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
        raise ValueError('Unknown final endpoint %s' % final_endpoint)
Exemplo n.º 20
0
def inception_v2_base(inputs,
                      final_endpoint='Mixed_5c',
                      min_depth=16,
                      depth_multiplier=1.0,
                      use_separable_conv=True,
                      data_format='NHWC',
                      scope=None):
  """Inception v2 (6a2).

  Constructs an Inception v2 network from inputs to the given final endpoint.
  This method can construct the network up to the layer inception(5b) as
  described in http://arxiv.org/abs/1502.03167.

  Args:
    inputs: a tensor of shape [batch_size, height, width, channels].
    final_endpoint: specifies the endpoint to construct the network up to. It
      can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
      'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'Mixed_4a',
      'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a', 'Mixed_5b',
      'Mixed_5c'].
    min_depth: Minimum depth value (number of channels) for all convolution ops.
      Enforced when depth_multiplier < 1, and not an active constraint when
      depth_multiplier >= 1.
    depth_multiplier: Float multiplier for the depth (number of channels)
      for all convolution ops. The value must be greater than zero. Typical
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
    use_separable_conv: Use a separable convolution for the first layer
      Conv2d_1a_7x7. If this is False, use a normal convolution instead.
    data_format: Data format of the activations ('NHWC' or 'NCHW').
    scope: Optional variable_scope.

  Returns:
    tensor_out: output tensor corresponding to the final_endpoint.
    end_points: a set of activations for external use, for example summaries or
                losses.

  Raises:
    ValueError: if final_endpoint is not set to one of the predefined values,
                or depth_multiplier <= 0
  """

  # end_points will collect relevant activations for external use, for example
  # summaries or losses.
  end_points = {}

  # Used to find thinned depths for each layer.
  if depth_multiplier <= 0:
    raise ValueError('depth_multiplier is not greater than zero.')
  depth = lambda d: max(int(d * depth_multiplier), min_depth)

  if data_format != 'NHWC' and data_format != 'NCHW':
    raise ValueError('data_format must be either NHWC or NCHW.')
  if data_format == 'NCHW' and use_separable_conv:
    raise ValueError(
        'separable convolution only supports NHWC layout. NCHW data format can'
        ' only be used when use_separable_conv is False.'
    )

  concat_dim = 3 if data_format == 'NHWC' else 1
  with tf.compat.v1.variable_scope(scope, 'InceptionV2', [inputs]):
    with slim.arg_scope(
        [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
        stride=1,
        padding='SAME',
        data_format=data_format):

      # Note that sizes in the comments below assume an input spatial size of
      # 224x224, however, the inputs can be of any size greater 32x32.

      # 224 x 224 x 3
      end_point = 'Conv2d_1a_7x7'

      if use_separable_conv:
        # depthwise_multiplier here is different from depth_multiplier.
        # depthwise_multiplier determines the output channels of the initial
        # depthwise conv (see docs for tf.nn.separable_conv2d), while
        # depth_multiplier controls the # channels of the subsequent 1x1
        # convolution. Must have
        #   in_channels * depthwise_multipler <= out_channels
        # so that the separable convolution is not overparameterized.
        depthwise_multiplier = min(int(depth(64) / 3), 8)
        net = slim.separable_conv2d(
            inputs, depth(64), [7, 7],
            depth_multiplier=depthwise_multiplier,
            stride=2,
            padding='SAME',
            weights_initializer=trunc_normal(1.0),
            scope=end_point)
      else:
        # Use a normal convolution instead of a separable convolution.
        net = slim.conv2d(
            inputs,
            depth(64), [7, 7],
            stride=2,
            weights_initializer=trunc_normal(1.0),
            scope=end_point)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 112 x 112 x 64
      end_point = 'MaxPool_2a_3x3'
      net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 56 x 56 x 64
      end_point = 'Conv2d_2b_1x1'
      net = slim.conv2d(net, depth(64), [1, 1], scope=end_point,
                        weights_initializer=trunc_normal(0.1))
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 56 x 56 x 64
      end_point = 'Conv2d_2c_3x3'
      net = slim.conv2d(net, depth(192), [3, 3], scope=end_point)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 56 x 56 x 192
      end_point = 'MaxPool_3a_3x3'
      net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 28 x 28 x 192
      # Inception module.
      end_point = 'Mixed_3b'
      with tf.compat.v1.variable_scope(end_point):
        with tf.compat.v1.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
        with tf.compat.v1.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(64), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(64), [3, 3],
                                 scope='Conv2d_0b_3x3')
        with tf.compat.v1.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(64), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.compat.v1.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(32), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(
            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 28 x 28 x 256
      end_point = 'Mixed_3c'
      with tf.compat.v1.variable_scope(end_point):
        with tf.compat.v1.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
        with tf.compat.v1.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(64), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(96), [3, 3],
                                 scope='Conv2d_0b_3x3')
        with tf.compat.v1.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(64), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.compat.v1.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(64), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(
            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 28 x 28 x 320
      end_point = 'Mixed_4a'
      with tf.compat.v1.variable_scope(end_point):
        with tf.compat.v1.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(
              net, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_0 = slim.conv2d(branch_0, depth(160), [3, 3], stride=2,
                                 scope='Conv2d_1a_3x3')
        with tf.compat.v1.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(64), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(
              branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3')
          branch_1 = slim.conv2d(
              branch_1, depth(96), [3, 3], stride=2, scope='Conv2d_1a_3x3')
        with tf.compat.v1.variable_scope('Branch_2'):
          branch_2 = slim.max_pool2d(
              net, [3, 3], stride=2, scope='MaxPool_1a_3x3')
        net = tf.concat(axis=concat_dim, values=[branch_0, branch_1, branch_2])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 14 x 14 x 576
      end_point = 'Mixed_4b'
      with tf.compat.v1.variable_scope(end_point):
        with tf.compat.v1.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(224), [1, 1], scope='Conv2d_0a_1x1')
        with tf.compat.v1.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(64), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(
              branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3')
        with tf.compat.v1.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(96), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.compat.v1.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(
            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 14 x 14 x 576
      end_point = 'Mixed_4c'
      with tf.compat.v1.variable_scope(end_point):
        with tf.compat.v1.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
        with tf.compat.v1.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(96), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(128), [3, 3],
                                 scope='Conv2d_0b_3x3')
        with tf.compat.v1.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(96), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.compat.v1.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(
            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 14 x 14 x 576
      end_point = 'Mixed_4d'
      with tf.compat.v1.variable_scope(end_point):
        with tf.compat.v1.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
        with tf.compat.v1.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(160), [3, 3],
                                 scope='Conv2d_0b_3x3')
        with tf.compat.v1.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(160), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(160), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.compat.v1.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(96), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(
            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 14 x 14 x 576
      end_point = 'Mixed_4e'
      with tf.compat.v1.variable_scope(end_point):
        with tf.compat.v1.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(96), [1, 1], scope='Conv2d_0a_1x1')
        with tf.compat.v1.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(192), [3, 3],
                                 scope='Conv2d_0b_3x3')
        with tf.compat.v1.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(160), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(192), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(192), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.compat.v1.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(96), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(
            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 14 x 14 x 576
      end_point = 'Mixed_5a'
      with tf.compat.v1.variable_scope(end_point):
        with tf.compat.v1.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(
              net, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_0 = slim.conv2d(branch_0, depth(192), [3, 3], stride=2,
                                 scope='Conv2d_1a_3x3')
        with tf.compat.v1.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(192), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(256), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], stride=2,
                                 scope='Conv2d_1a_3x3')
        with tf.compat.v1.variable_scope('Branch_2'):
          branch_2 = slim.max_pool2d(net, [3, 3], stride=2,
                                     scope='MaxPool_1a_3x3')
        net = tf.concat(
            axis=concat_dim, values=[branch_0, branch_1, branch_2])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 7 x 7 x 1024
      end_point = 'Mixed_5b'
      with tf.compat.v1.variable_scope(end_point):
        with tf.compat.v1.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1')
        with tf.compat.v1.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(192), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(320), [3, 3],
                                 scope='Conv2d_0b_3x3')
        with tf.compat.v1.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(160), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.compat.v1.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(
            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 7 x 7 x 1024
      end_point = 'Mixed_5c'
      with tf.compat.v1.variable_scope(end_point):
        with tf.compat.v1.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1')
        with tf.compat.v1.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(192), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(320), [3, 3],
                                 scope='Conv2d_0b_3x3')
        with tf.compat.v1.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(192), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.compat.v1.variable_scope('Branch_3'):
          branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(
            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
    raise ValueError('Unknown final endpoint %s' % final_endpoint)
    def _extract_box_classifier_features(self, proposal_feature_maps, scope):
        """Extracts second stage box classifier features.

    Args:
      proposal_feature_maps: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, crop_height, crop_width, depth]
        representing the feature map cropped to each proposal.
      scope: A scope name (unused).

    Returns:
      proposal_classifier_features: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, height, width, depth]
        representing box classifier features for each proposal.
    """
        net = proposal_feature_maps

        depth = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
        trunc_normal = lambda stddev: tf.truncated_normal_initializer(
            0.0, stddev)

        data_format = 'NHWC'
        concat_dim = 3 if data_format == 'NHWC' else 1

        with tf.variable_scope('InceptionV2', reuse=self._reuse_weights):
            with slim.arg_scope(
                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                    stride=1,
                    padding='SAME',
                    data_format=data_format):
                with _batch_norm_arg_scope(
                    [slim.conv2d, slim.separable_conv2d],
                        batch_norm_scale=True,
                        train_batch_norm=self._train_batch_norm):

                    with tf.variable_scope('Mixed_5a'):
                        with tf.variable_scope('Branch_0'):
                            branch_0 = slim.conv2d(
                                net,
                                depth(128), [1, 1],
                                weights_initializer=trunc_normal(0.09),
                                scope='Conv2d_0a_1x1')
                            branch_0 = slim.conv2d(branch_0,
                                                   depth(192), [3, 3],
                                                   stride=2,
                                                   scope='Conv2d_1a_3x3')
                        with tf.variable_scope('Branch_1'):
                            branch_1 = slim.conv2d(
                                net,
                                depth(192), [1, 1],
                                weights_initializer=trunc_normal(0.09),
                                scope='Conv2d_0a_1x1')
                            branch_1 = slim.conv2d(branch_1,
                                                   depth(256), [3, 3],
                                                   scope='Conv2d_0b_3x3')
                            branch_1 = slim.conv2d(branch_1,
                                                   depth(256), [3, 3],
                                                   stride=2,
                                                   scope='Conv2d_1a_3x3')
                        with tf.variable_scope('Branch_2'):
                            branch_2 = slim.max_pool2d(net, [3, 3],
                                                       stride=2,
                                                       scope='MaxPool_1a_3x3')
                        net = tf.concat([branch_0, branch_1, branch_2],
                                        concat_dim)

                    with tf.variable_scope('Mixed_5b'):
                        with tf.variable_scope('Branch_0'):
                            branch_0 = slim.conv2d(net,
                                                   depth(352), [1, 1],
                                                   scope='Conv2d_0a_1x1')
                        with tf.variable_scope('Branch_1'):
                            branch_1 = slim.conv2d(
                                net,
                                depth(192), [1, 1],
                                weights_initializer=trunc_normal(0.09),
                                scope='Conv2d_0a_1x1')
                            branch_1 = slim.conv2d(branch_1,
                                                   depth(320), [3, 3],
                                                   scope='Conv2d_0b_3x3')
                        with tf.variable_scope('Branch_2'):
                            branch_2 = slim.conv2d(
                                net,
                                depth(160), [1, 1],
                                weights_initializer=trunc_normal(0.09),
                                scope='Conv2d_0a_1x1')
                            branch_2 = slim.conv2d(branch_2,
                                                   depth(224), [3, 3],
                                                   scope='Conv2d_0b_3x3')
                            branch_2 = slim.conv2d(branch_2,
                                                   depth(224), [3, 3],
                                                   scope='Conv2d_0c_3x3')
                        with tf.variable_scope('Branch_3'):
                            branch_3 = slim.avg_pool2d(net, [3, 3],
                                                       scope='AvgPool_0a_3x3')
                            branch_3 = slim.conv2d(
                                branch_3,
                                depth(128), [1, 1],
                                weights_initializer=trunc_normal(0.1),
                                scope='Conv2d_0b_1x1')
                        net = tf.concat(
                            [branch_0, branch_1, branch_2, branch_3],
                            concat_dim)

                    with tf.variable_scope('Mixed_5c'):
                        with tf.variable_scope('Branch_0'):
                            branch_0 = slim.conv2d(net,
                                                   depth(352), [1, 1],
                                                   scope='Conv2d_0a_1x1')
                        with tf.variable_scope('Branch_1'):
                            branch_1 = slim.conv2d(
                                net,
                                depth(192), [1, 1],
                                weights_initializer=trunc_normal(0.09),
                                scope='Conv2d_0a_1x1')
                            branch_1 = slim.conv2d(branch_1,
                                                   depth(320), [3, 3],
                                                   scope='Conv2d_0b_3x3')
                        with tf.variable_scope('Branch_2'):
                            branch_2 = slim.conv2d(
                                net,
                                depth(192), [1, 1],
                                weights_initializer=trunc_normal(0.09),
                                scope='Conv2d_0a_1x1')
                            branch_2 = slim.conv2d(branch_2,
                                                   depth(224), [3, 3],
                                                   scope='Conv2d_0b_3x3')
                            branch_2 = slim.conv2d(branch_2,
                                                   depth(224), [3, 3],
                                                   scope='Conv2d_0c_3x3')
                        with tf.variable_scope('Branch_3'):
                            branch_3 = slim.max_pool2d(net, [3, 3],
                                                       scope='MaxPool_0a_3x3')
                            branch_3 = slim.conv2d(
                                branch_3,
                                depth(128), [1, 1],
                                weights_initializer=trunc_normal(0.1),
                                scope='Conv2d_0b_1x1')
                        proposal_classifier_features = tf.concat(
                            [branch_0, branch_1, branch_2, branch_3],
                            concat_dim)

        return proposal_classifier_features
Exemplo n.º 22
0
def inception_v2(inputs,
                 num_classes=1000,
                 is_training=True,
                 dropout_keep_prob=0.8,
                 min_depth=16,
                 depth_multiplier=1.0,
                 prediction_fn=slim.softmax,
                 spatial_squeeze=True,
                 reuse=None,
                 scope='InceptionV2',
                 global_pool=False):
  """Inception v2 model for classification.

  Constructs an Inception v2 network for classification as described in
  http://arxiv.org/abs/1502.03167.

  The default image size used to train this network is 224x224.

  Args:
    inputs: a tensor of shape [batch_size, height, width, channels].
    num_classes: number of predicted classes. If 0 or None, the logits layer
      is omitted and the input features to the logits layer (before dropout)
      are returned instead.
    is_training: whether is training or not.
    dropout_keep_prob: the percentage of activation values that are retained.
    min_depth: Minimum depth value (number of channels) for all convolution ops.
      Enforced when depth_multiplier < 1, and not an active constraint when
      depth_multiplier >= 1.
    depth_multiplier: Float multiplier for the depth (number of channels)
      for all convolution ops. The value must be greater than zero. Typical
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
    prediction_fn: a function to get predictions out of logits.
    spatial_squeeze: if True, logits is of shape [B, C], if false logits is of
        shape [B, 1, 1, C], where B is batch_size and C is number of classes.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.
    global_pool: Optional boolean flag to control the avgpooling before the
      logits layer. If false or unset, pooling is done with a fixed window
      that reduces default-sized inputs to 1x1, while larger inputs lead to
      larger outputs. If true, any input size is pooled down to 1x1.

  Returns:
    net: a Tensor with the logits (pre-softmax activations) if num_classes
      is a non-zero integer, or the non-dropped-out input to the logits layer
      if num_classes is 0 or None.
    end_points: a dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: if final_endpoint is not set to one of the predefined values,
                or depth_multiplier <= 0
  """
  if depth_multiplier <= 0:
    raise ValueError('depth_multiplier is not greater than zero.')

  # Final pooling and prediction
  with tf.compat.v1.variable_scope(scope, 'InceptionV2', [inputs], reuse=reuse) as scope:
    with slim.arg_scope([slim.batch_norm, slim.dropout],
                        is_training=is_training):
      net, end_points = inception_v2_base(
          inputs, scope=scope, min_depth=min_depth,
          depth_multiplier=depth_multiplier)
      with tf.compat.v1.variable_scope('Logits'):
        if global_pool:
          # Global average pooling.
          net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool')
          end_points['global_pool'] = net
        else:
          # Pooling with a fixed kernel size.
          kernel_size = _reduced_kernel_size_for_small_input(net, [7, 7])
          net = slim.avg_pool2d(net, kernel_size, padding='VALID',
                                scope='AvgPool_1a_{}x{}'.format(*kernel_size))
          end_points['AvgPool_1a'] = net
        if not num_classes:
          return net, end_points
        # 1 x 1 x 1024
        net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b')
        logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                             normalizer_fn=None, scope='Conv2d_1c_1x1')
        if spatial_squeeze:
          logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
      end_points['Logits'] = logits
      end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
  return logits, end_points
Exemplo n.º 23
0
def inception_v3_base(inputs, scope=None):
    end_points = {}

    with tf.compat.v1.variable_scope(scope, 'InceptionV3', [inputs]):
        with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                            stride=1,
                            padding='VALID'):
            net = slim.conv2d(inputs,
                              32, [3, 3],
                              stride=2,
                              scope='Conv2d_1a_3x3')
            net = slim.conv2d(net, 32, [3, 3], scope='Conv2d_2a_3x3')
            net = slim.conv2d(net,
                              64, [3, 3],
                              padding='SAME',
                              scope='Conv2d_2b_3x3')
            net = slim.max_pool2d(net, [3, 3],
                                  stride=2,
                                  scope='MaxPool_3a_3x3')
            net = slim.conv2d(net, 80, [1, 1], scope='Conv2d_3b_1x1')
            net = slim.conv2d(net, 192, [3, 3], scope='Conv2d_4a_3x3')
            net = slim.max_pool2d(net, [3, 3],
                                  stride=2,
                                  scope='MaxPool_5a_3x3')

        with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                            stride=1,
                            padding='SAME'):
            with tf.compat.v1.variable_scope('Mixed_5b'):
                with tf.compat.v1.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           64, [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.compat.v1.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           48, [1, 1],
                                           scope='Conv2d_1a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           64, [5, 5],
                                           scope='Conv2d_1b_5x5')
                with tf.compat.v1.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           64, [1, 1],
                                           scope='Conv2d_2a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           96, [3, 3],
                                           scope='Conv2d_2b_3x3')
                    branch_2 = slim.conv2d(branch_2,
                                           96, [3, 3],
                                           scope='Conv2d_2c_3x3')
                with tf.compat.v1.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_3a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           32, [1, 1],
                                           scope='Conv2d_3b_1x1')
                net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)

            with tf.compat.v1.variable_scope('Mixed_5c'):
                with tf.compat.v1.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           64, [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.compat.v1.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           48, [1, 1],
                                           scope='Conv2d_1a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           64, [5, 5],
                                           scope='Conv2d_1b_5x5')
                with tf.compat.v1.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           64, [1, 1],
                                           scope='Conv2d_2a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           96, [3, 3],
                                           scope='Conv2d_2b_3x3')
                    branch_2 = slim.conv2d(branch_2,
                                           96, [3, 3],
                                           scope='Conv2d_2c_3x3')
                with tf.compat.v1.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_3a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           64, [1, 1],
                                           scope='Conv2d_3b_1x1')
                net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)

            with tf.compat.v1.variable_scope('Mixed_5d'):
                with tf.compat.v1.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           64, [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.compat.v1.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           48, [1, 1],
                                           scope='Conv2d_1a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           64, [5, 5],
                                           scope='Conv2d_1b_5x5')
                with tf.compat.v1.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           64, [1, 1],
                                           scope='Conv2d_2a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           96, [3, 3],
                                           scope='Conv2d_2b_3x3')
                    branch_2 = slim.conv2d(branch_2,
                                           96, [3, 3],
                                           scope='Conv2d_2c_3x3')
                with tf.compat.v1.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_3a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           64, [1, 1],
                                           scope='Conv2d_3b_1x1')
                net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)

            with tf.compat.v1.variable_scope('Mixed_6a'):
                with tf.compat.v1.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           384, [3, 3],
                                           stride=2,
                                           padding='VALID',
                                           scope='Conv2d_0a_3x3')
                with tf.compat.v1.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           64, [1, 1],
                                           scope='Conv2d_1a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           96, [3, 3],
                                           scope='Conv2d_1b_3x3')
                    branch_1 = slim.conv2d(branch_1,
                                           96, [3, 3],
                                           stride=2,
                                           padding='VALID',
                                           scope='Conv2d_1c_3x3')
                with tf.compat.v1.variable_scope('Branch_2'):
                    branch_3 = slim.max_pool2d(net, [3, 3],
                                               stride=2,
                                               padding='VALID',
                                               scope='MaxPool_3a_3x3')
                net = tf.concat([branch_3, branch_2, branch_3], 3)

            with tf.compat.v1.variable_scope('Mixed_6b'):
                with tf.compat.v1.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           192, [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.compat.v1.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           128, [1, 1],
                                           scope='Conv2d_1a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           128, [1, 7],
                                           scope='Conv2d_1b_1x7')
                    branch_1 = slim.conv2d(branch_1,
                                           192, [7, 1],
                                           scope='Conv2d_1c_7x1')
                with tf.compat.v1.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           128, [1, 1],
                                           scope='Conv2d_2a_1x1')
                    branch_2 = slim.conv2d(branch_1,
                                           128, [7, 1],
                                           scope='Conv2d_2b_7x1')
                    branch_2 = slim.conv2d(branch_1,
                                           128, [1, 7],
                                           scope='Conv2d_2c_1x7')
                    branch_2 = slim.conv2d(branch_1,
                                           128, [7, 1],
                                           scope='Conv2d_2d_7x1')
                    branch_2 = slim.conv2d(branch_1,
                                           192, [1, 7],
                                           scope='Conv2d_2e_1x7')
                with tf.compat.v1.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_3a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           192, [1, 1],
                                           scope='Conv2d_3b_1x1')
                net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)

            with tf.compat.v1.variable_scope('Mixed_6c'):
                with tf.compat.v1.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           192, [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.compat.v1.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           160, [1, 1],
                                           scope='Conv2d_1a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           160, [1, 7],
                                           scope='Conv2d_1b_1x7')
                    branch_1 = slim.conv2d(branch_1,
                                           192, [7, 1],
                                           scope='Conv2d_1c_7x1')
                with tf.compat.v1.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           160, [1, 1],
                                           scope='Conv2d_2a_1x1')
                    branch_2 = slim.conv2d(branch_1,
                                           160, [7, 1],
                                           scope='Conv2d_2b_7x1')
                    branch_2 = slim.conv2d(branch_1,
                                           160, [1, 7],
                                           scope='Conv2d_2c_1x7')
                    branch_2 = slim.conv2d(branch_1,
                                           160, [7, 1],
                                           scope='Conv2d_2d_7x1')
                    branch_2 = slim.conv2d(branch_1,
                                           192, [1, 7],
                                           scope='Conv2d_2e_1x7')
                with tf.compat.v1.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_3a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           192, [1, 1],
                                           scope='Conv2d_3b_1x1')
                net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)

            with tf.compat.v1.variable_scope('Mixed_6d'):
                with tf.compat.v1.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           192, [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.compat.v1.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           160, [1, 1],
                                           scope='Conv2d_1a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           160, [1, 7],
                                           scope='Conv2d_1b_1x7')
                    branch_1 = slim.conv2d(branch_1,
                                           192, [7, 1],
                                           scope='Conv2d_1c_7x1')
                with tf.compat.v1.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           160, [1, 1],
                                           scope='Conv2d_2a_1x1')
                    branch_2 = slim.conv2d(branch_1,
                                           160, [7, 1],
                                           scope='Conv2d_2b_7x1')
                    branch_2 = slim.conv2d(branch_1,
                                           160, [1, 7],
                                           scope='Conv2d_2c_1x7')
                    branch_2 = slim.conv2d(branch_1,
                                           160, [7, 1],
                                           scope='Conv2d_2d_7x1')
                    branch_2 = slim.conv2d(branch_1,
                                           192, [1, 7],
                                           scope='Conv2d_2e_1x7')
                with tf.compat.v1.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_3a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           192, [1, 1],
                                           scope='Conv2d_3b_1x1')
                net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)

            with tf.compat.v1.variable_scope('Mixed_6e'):
                with tf.compat.v1.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           192, [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.compat.v1.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           192, [1, 1],
                                           scope='Conv2d_1a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           192, [1, 7],
                                           scope='Conv2d_1b_1x7')
                    branch_1 = slim.conv2d(branch_1,
                                           192, [7, 1],
                                           scope='Conv2d_1c_7x1')
                with tf.compat.v1.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           160, [1, 1],
                                           scope='Conv2d_2a_1x1')
                    branch_2 = slim.conv2d(branch_1,
                                           192, [7, 1],
                                           scope='Conv2d_2b_7x1')
                    branch_2 = slim.conv2d(branch_1,
                                           192, [1, 7],
                                           scope='Conv2d_2c_1x7')
                    branch_2 = slim.conv2d(branch_1,
                                           192, [7, 1],
                                           scope='Conv2d_2d_7x1')
                    branch_2 = slim.conv2d(branch_1,
                                           192, [1, 7],
                                           scope='Conv2d_2e_1x7')
                with tf.compat.v1.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_3a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           192, [1, 1],
                                           scope='Conv2d_3b_1x1')
                net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)

            end_points['Mixed_6e'] = net

            with tf.compat.v1.variable_scope('Mixed_7a'):
                with tf.compat.v1.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           192, [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_0 = slim.conv2d(branch_0,
                                           320, [3, 3],
                                           stride=2,
                                           padding='VALID',
                                           scope='Conv2d_0b_3x3')
                with tf.compat.v1.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           192, [1, 1],
                                           scope='Conv2d_1a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           192, [1, 7],
                                           scope='Conv2d_1b_1x7')
                    branch_1 = slim.conv2d(branch_1,
                                           192, [7, 1],
                                           scope='Conv2d_1c_7x1')
                    branch_1 = slim.conv2d(branch_1,
                                           192, [3, 3],
                                           stride=2,
                                           padding='VALID',
                                           scope='Conv2d_1d_3x3')
                with tf.compat.v1.variable_scope('Branch_2'):
                    branch_2 = slim.max_pool2d(net, [3, 3],
                                               scope='MaxPool_2a_3x3')
                net = tf.concat([branch_0, branch_1, branch_2], 3)

            with tf.compat.v1.variable_scope('Mixed_7b'):
                with tf.compat.v1.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           320, [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.compat.v1.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           384, [1, 1],
                                           scope='Conv2d_1a_1x1')
                    branch_1 = tf.concat([
                        slim.conv2d(
                            branch_1, 384, [1, 3], scope='Conv2d_1b_1x3'),
                        slim.conv2d(
                            branch_1, 384, [3, 1], scope='Conv2d_1c_3x1')
                    ], 3)
                with tf.compat.v1.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           448, [1, 1],
                                           scope='Conv2d_2a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           384, [3, 3],
                                           scope='Conv2d_2b_1x1')
                    branch_2 = tf.concat([
                        slim.conv2d(
                            branch_2, 384, [1, 3], scope='Conv2d_2c_1x3'),
                        slim.conv2d(
                            branch_2, 384, [3, 1], scope='Conv2d_2d_3x1')
                    ], 3)
                with tf.compat.v1.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_3a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           192, [1, 1],
                                           scope='Conv2d_3b_1x1')
                net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)

            with tf.compat.v1.variable_scope('Mixed_7c'):
                with tf.compat.v1.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           320, [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.compat.v1.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           384, [1, 1],
                                           scope='Conv2d_1a_1x1')
                    branch_1 = tf.concat([
                        slim.conv2d(
                            branch_1, 384, [1, 3], scope='Conv2d_1b_1x3'),
                        slim.conv2d(
                            branch_1, 384, [3, 1], scope='Conv2d_1c_3x1')
                    ], 3)
                with tf.compat.v1.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           448, [1, 1],
                                           scope='Conv2d_2a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           384, [3, 3],
                                           scope='Conv2d_2b_1x1')
                    branch_2 = tf.concat([
                        slim.conv2d(
                            branch_2, 384, [1, 3], scope='Conv2d_2c_1x3'),
                        slim.conv2d(
                            branch_2, 384, [3, 1], scope='Conv2d_2d_3x1')
                    ], 3)
                with tf.compat.v1.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_3a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           192, [1, 1],
                                           scope='Conv2d_3b_1x1')
                net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)

            return net, end_points
Exemplo n.º 24
0
    def build_loss(self):
        """Adds ops for computing loss."""
        with tf.compat.v1.name_scope('compute_loss'):
            self.reconstr_loss = 0
            self.smooth_loss = 0
            self.ssim_loss = 0
            self.icp_transform_loss = 0
            self.icp_residual_loss = 0

            # self.images is organized by ...[scale][B, h, w, seq_len * 3].
            self.images = [{} for _ in range(NUM_SCALES)]
            # Following nested lists are organized by ...[scale][source-target].
            self.warped_image = [{} for _ in range(NUM_SCALES)]
            self.warp_mask = [{} for _ in range(NUM_SCALES)]
            self.warp_error = [{} for _ in range(NUM_SCALES)]
            self.ssim_error = [{} for _ in range(NUM_SCALES)]
            self.icp_transform = [{} for _ in range(NUM_SCALES)]
            self.icp_residual = [{} for _ in range(NUM_SCALES)]

            self.middle_frame_index = util.get_seq_middle(self.seq_length)

            # Compute losses at each scale.
            for s in range(NUM_SCALES):
                # Scale image stack.
                height_s = int(self.img_height / (2**s))
                width_s = int(self.img_width / (2**s))
                self.images[s] = tf.image.resize(
                    self.image_stack, [height_s, width_s],
                    method=tf.image.ResizeMethod.AREA)

                # Smoothness.
                if self.smooth_weight > 0:
                    for i in range(self.seq_length):
                        # In legacy mode, use the depth map from the middle frame only.
                        if not self.legacy_mode or i == self.middle_frame_index:
                            self.smooth_loss += 1.0 / (
                                2**s) * self.depth_smoothness(
                                    self.disp[i][s],
                                    self.images[s][:, :, :, 3 * i:3 * (i + 1)])

                for i in range(self.seq_length):
                    for j in range(self.seq_length):
                        # Only consider adjacent frames.
                        if i == j or abs(i - j) != 1:
                            continue
                        # In legacy mode, only consider the middle frame as target.
                        if self.legacy_mode and j != self.middle_frame_index:
                            continue
                        source = self.images[s][:, :, :, 3 * i:3 * (i + 1)]
                        target = self.images[s][:, :, :, 3 * j:3 * (j + 1)]
                        target_depth = self.depth[j][s]
                        key = '%d-%d' % (i, j)

                        # Extract ego-motion from i to j
                        egomotion_index = min(i, j)
                        egomotion_mult = 1
                        if i > j:
                            # Need to inverse egomotion when going back in sequence.
                            egomotion_mult *= -1
                        # For compatiblity with SfMLearner, interpret all egomotion vectors
                        # as pointing toward the middle frame.  Note that unlike SfMLearner,
                        # each vector captures the motion to/from its next frame, and not
                        # the center frame.  Although with seq_length == 3, there is no
                        # difference.
                        if self.legacy_mode:
                            if egomotion_index >= self.middle_frame_index:
                                egomotion_mult *= -1
                        egomotion = egomotion_mult * self.egomotion[:,
                                                                    egomotion_index, :]

                        # Inverse warp the source image to the target image frame for
                        # photometric consistency loss.
                        self.warped_image[s][key], self.warp_mask[s][key] = (
                            project.inverse_warp(
                                source, target_depth, egomotion,
                                self.intrinsic_mat[:, s, :, :],
                                self.intrinsic_mat_inv[:, s, :, :]))

                        # Reconstruction loss.
                        self.warp_error[s][key] = tf.abs(
                            self.warped_image[s][key] - target)
                        self.reconstr_loss += tf.reduce_mean(
                            input_tensor=self.warp_error[s][key] *
                            self.warp_mask[s][key])
                        # SSIM.
                        if self.ssim_weight > 0:
                            self.ssim_error[s][key] = self.ssim(
                                self.warped_image[s][key], target)
                            # TODO(rezama): This should be min_pool2d().
                            ssim_mask = slim.avg_pool2d(
                                self.warp_mask[s][key], 3, 1, 'VALID')
                            self.ssim_loss += tf.reduce_mean(
                                input_tensor=self.ssim_error[s][key] *
                                ssim_mask)
                        # 3D loss.
                        if self.icp_weight > 0:
                            cloud_a = self.cloud[j][s]
                            cloud_b = self.cloud[i][s]
                            self.icp_transform[s][key], self.icp_residual[s][
                                key] = icp(cloud_a, egomotion, cloud_b)
                            self.icp_transform_loss += 1.0 / (
                                2**s) * tf.reduce_mean(input_tensor=tf.abs(
                                    self.icp_transform[s][key]))
                            self.icp_residual_loss += 1.0 / (
                                2**s) * tf.reduce_mean(input_tensor=tf.abs(
                                    self.icp_residual[s][key]))

            self.total_loss = self.reconstr_weight * self.reconstr_loss
            if self.smooth_weight > 0:
                self.total_loss += self.smooth_weight * self.smooth_loss
            if self.ssim_weight > 0:
                self.total_loss += self.ssim_weight * self.ssim_loss
            if self.icp_weight > 0:
                self.total_loss += self.icp_weight * (self.icp_transform_loss +
                                                      self.icp_residual_loss)
def mobilenet_v1(inputs,
                 num_classes=1000,
                 dropout_keep_prob=0.999,
                 is_training=True,
                 min_depth=8,
                 depth_multiplier=1.0,
                 conv_defs=None,
                 prediction_fn=slim.softmax,
                 spatial_squeeze=True,
                 reuse=None,
                 scope='MobilenetV1',
                 global_pool=False):
    """Mobilenet v1 model for classification.

  Args:
    inputs: a tensor of shape [batch_size, height, width, channels].
    num_classes: number of predicted classes. If 0 or None, the logits layer
      is omitted and the input features to the logits layer (before dropout)
      are returned instead.
    dropout_keep_prob: the percentage of activation values that are retained.
    is_training: whether is training or not.
    min_depth: Minimum depth value (number of channels) for all convolution ops.
      Enforced when depth_multiplier < 1, and not an active constraint when
      depth_multiplier >= 1.
    depth_multiplier: Float multiplier for the depth (number of channels)
      for all convolution ops. The value must be greater than zero. Typical
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
    conv_defs: A list of ConvDef namedtuples specifying the net architecture.
    prediction_fn: a function to get predictions out of logits.
    spatial_squeeze: if True, logits is of shape is [B, C], if false logits is
        of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.
    global_pool: Optional boolean flag to control the avgpooling before the
      logits layer. If false or unset, pooling is done with a fixed window
      that reduces default-sized inputs to 1x1, while larger inputs lead to
      larger outputs. If true, any input size is pooled down to 1x1.

  Returns:
    net: a 2D Tensor with the logits (pre-softmax activations) if num_classes
      is a non-zero integer, or the non-dropped-out input to the logits layer
      if num_classes is 0 or None.
    end_points: a dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: Input rank is invalid.
  """
    input_shape = inputs.get_shape().as_list()
    if len(input_shape) != 4:
        raise ValueError('Invalid input tensor rank, expected 4, was: %d' %
                         len(input_shape))

    with tf.compat.v1.variable_scope(scope,
                                     'MobilenetV1', [inputs],
                                     reuse=reuse) as scope:
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            net, end_points = mobilenet_v1_base(
                inputs,
                scope=scope,
                min_depth=min_depth,
                depth_multiplier=depth_multiplier,
                conv_defs=conv_defs)
            with tf.compat.v1.variable_scope('Logits'):
                if global_pool:
                    # Global average pooling.
                    net = tf.reduce_mean(input_tensor=net,
                                         axis=[1, 2],
                                         keepdims=True,
                                         name='global_pool')
                    end_points['global_pool'] = net
                else:
                    # Pooling with a fixed kernel size.
                    kernel_size = _reduced_kernel_size_for_small_input(
                        net, [7, 7])
                    net = slim.avg_pool2d(net,
                                          kernel_size,
                                          padding='VALID',
                                          scope='AvgPool_1a')
                    end_points['AvgPool_1a'] = net
                if not num_classes:
                    return net, end_points
                # 1 x 1 x 1024
                net = slim.dropout(net,
                                   keep_prob=dropout_keep_prob,
                                   scope='Dropout_1b')
                logits = slim.conv2d(net,
                                     num_classes, [1, 1],
                                     activation_fn=None,
                                     normalizer_fn=None,
                                     scope='Conv2d_1c_1x1')
                if spatial_squeeze:
                    logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
            end_points['Logits'] = logits
            if prediction_fn:
                end_points['Predictions'] = prediction_fn(logits,
                                                          scope='Predictions')
    return logits, end_points
Exemplo n.º 26
0
def inception_v3(images,
                 trainable=True,
                 is_training=True,
                 weight_decay=0.00004,
                 stddev=0.1,
                 dropout_keep_prob=0.8,
                 use_batch_norm=True,
                 batch_norm_params=None,
                 add_summaries=True,
                 scope="InceptionV3"):
    """Builds an Inception V3 subgraph for image embeddings.

    Args:
      images: A float32 Tensor of shape [batch, height, width, channels].
      trainable: Whether the inception submodel should be trainable or not.
      is_training: Boolean indicating training mode or not.
      weight_decay: Coefficient for weight regularization.
      stddev: The standard deviation of the trunctated normal weight initializer.
      dropout_keep_prob: Dropout keep probability.
      use_batch_norm: Whether to use batch normalization.
      batch_norm_params: Parameters for batch normalization. See
        tf.contrib.layers.batch_norm for details.
      add_summaries: Whether to add activation summaries.
      scope: Optional Variable scope.

    Returns:
      end_points: A dictionary of activations from inception_v3 layers.
    """
    # Only consider the inception model to be in training mode if it's trainable.
    is_inception_model_training = trainable and is_training

    if use_batch_norm:
        # Default parameters for batch normalization.
        if not batch_norm_params:
            batch_norm_params = {
                "is_training": is_inception_model_training,
                "trainable": trainable,
                # Decay for the moving averages.
                "decay": 0.9997,
                # Epsilon to prevent 0s in variance.
                "epsilon": 0.001,
                # Collection containing the moving mean and moving variance.
                "variables_collections": {
                    "beta": None,
                    "gamma": None,
                    "moving_mean": ["moving_vars"],
                    "moving_variance": ["moving_vars"],
                }
            }
    else:
        batch_norm_params = None

    if trainable:
        weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay)
    else:
        weights_regularizer = None

    with tf.compat.v1.variable_scope(scope, "InceptionV3", [images]) as scope:
        with slim.arg_scope([slim.conv2d, slim.fully_connected],
                            weights_regularizer=weights_regularizer,
                            trainable=trainable):
            with slim.arg_scope([slim.conv2d],
                                weights_initializer=tf.compat.v1.
                                truncated_normal_initializer(stddev=stddev),
                                activation_fn=tf.nn.relu,
                                normalizer_fn=slim.batch_norm,
                                normalizer_params=batch_norm_params):
                net, end_points = inception_v3_base(images, scope=scope)
                with tf.compat.v1.variable_scope("logits"):
                    shape = net.get_shape()
                    net = slim.avg_pool2d(net,
                                          shape[1:3],
                                          padding="VALID",
                                          scope="pool")
                    net = slim.dropout(net,
                                       keep_prob=dropout_keep_prob,
                                       is_training=is_inception_model_training,
                                       scope="dropout")
                    net = slim.flatten(net, scope="flatten")

    # Add summaries.
    if add_summaries:
        for v in end_points.values():
            slim.summarize_activation(v)

    return net
Exemplo n.º 27
0
def inception_resnet_v1(inputs,
                        is_training=True,
                        dropout_keep_prob=0.8,
                        bottleneck_layer_size=128,
                        reuse=None,
                        scope='InceptionResnetV1'):
    """Creates the Inception Resnet V1 model.
    Args:
      inputs: a 4-D tensor of size [batch_size, height, width, 3].
      num_classes: number of predicted classes.
      is_training: whether is training or not.
      dropout_keep_prob: float, the fraction to keep before final layer.
      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.
      scope: Optional variable_scope.
    Returns:
      logits: the logits outputs of the model.
      end_points: the set of end_points from the inception model.
    """
    end_points = {}

    with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse):
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            with slim.arg_scope(
                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                    stride=1,
                    padding='SAME'):
                # 149 x 149 x 32
                net = slim.conv2d(inputs,
                                  32,
                                  3,
                                  stride=2,
                                  padding='VALID',
                                  scope='Conv2d_1a_3x3')
                end_points['Conv2d_1a_3x3'] = net
                # 147 x 147 x 32
                net = slim.conv2d(net,
                                  32,
                                  3,
                                  padding='VALID',
                                  scope='Conv2d_2a_3x3')
                end_points['Conv2d_2a_3x3'] = net
                # 147 x 147 x 64
                net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
                end_points['Conv2d_2b_3x3'] = net
                # 73 x 73 x 64
                net = slim.max_pool2d(net,
                                      3,
                                      stride=2,
                                      padding='VALID',
                                      scope='MaxPool_3a_3x3')
                end_points['MaxPool_3a_3x3'] = net
                # 73 x 73 x 80
                net = slim.conv2d(net,
                                  80,
                                  1,
                                  padding='VALID',
                                  scope='Conv2d_3b_1x1')
                end_points['Conv2d_3b_1x1'] = net
                # 71 x 71 x 192
                net = slim.conv2d(net,
                                  192,
                                  3,
                                  padding='VALID',
                                  scope='Conv2d_4a_3x3')
                end_points['Conv2d_4a_3x3'] = net
                # 35 x 35 x 256
                net = slim.conv2d(net,
                                  256,
                                  3,
                                  stride=2,
                                  padding='VALID',
                                  scope='Conv2d_4b_3x3')
                end_points['Conv2d_4b_3x3'] = net

                # 5 x Inception-resnet-A
                net = slim.repeat(net, 5, block35, scale=0.17)

                # Reduction-A
                with tf.variable_scope('Mixed_6a'):
                    net = reduction_a(net, 192, 192, 256, 384)
                end_points['Mixed_6a'] = net

                # 10 x Inception-Resnet-B
                net = slim.repeat(net, 10, block17, scale=0.10)

                # Reduction-B
                with tf.variable_scope('Mixed_7a'):
                    net = reduction_b(net)
                end_points['Mixed_7a'] = net

                # 5 x Inception-Resnet-C
                net = slim.repeat(net, 5, block8, scale=0.20)
                net = block8(net, activation_fn=None)

                with tf.variable_scope('Logits'):
                    end_points['PrePool'] = net
                    # pylint: disable=no-member
                    net = slim.avg_pool2d(net,
                                          net.get_shape()[1:3],
                                          padding='VALID',
                                          scope='AvgPool_1a_8x8')
                    net = slim.flatten(net)

                    net = slim.dropout(net,
                                       dropout_keep_prob,
                                       is_training=is_training,
                                       scope='Dropout')

                    end_points['PreLogitsFlatten'] = net

                net = slim.fully_connected(net,
                                           bottleneck_layer_size,
                                           activation_fn=None,
                                           scope='Bottleneck',
                                           reuse=False)

    return net, end_points
Exemplo n.º 28
0
def inception_resnet_v2_base(inputs,
                             final_endpoint='Conv2d_7b_1x1',
                             output_stride=16,
                             align_feature_maps=False,
                             scope=None,
                             activation_fn=tf.nn.relu):
    """Inception model from  http://arxiv.org/abs/1602.07261.

  Constructs an Inception Resnet v2 network from inputs to the given final
  endpoint. This method can construct the network up to the final inception
  block Conv2d_7b_1x1.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    final_endpoint: specifies the endpoint to construct the network up to. It
      can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
      'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3',
      'Mixed_5b', 'Mixed_6a', 'PreAuxLogits', 'Mixed_7a', 'Conv2d_7b_1x1']
    output_stride: A scalar that specifies the requested ratio of input to
      output spatial resolution. Only supports 8 and 16.
    align_feature_maps: When true, changes all the VALID paddings in the network
      to SAME padding so that the feature maps are aligned.
    scope: Optional variable_scope.
    activation_fn: Activation function for block scopes.

  Returns:
    tensor_out: output tensor corresponding to the final_endpoint.
    end_points: a set of activations for external use, for example summaries or
                losses.

  Raises:
    ValueError: if final_endpoint is not set to one of the predefined values,
      or if the output_stride is not 8 or 16, or if the output_stride is 8 and
      we request an end point after 'PreAuxLogits'.
  """
    if output_stride != 8 and output_stride != 16:
        raise ValueError('output_stride must be 8 or 16.')

    padding = 'SAME' if align_feature_maps else 'VALID'

    end_points = {}

    def add_and_check_final(name, net):
        end_points[name] = net
        return name == final_endpoint

    with tf.variable_scope(scope, 'InceptionResnetV2', [inputs]):
        with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                            stride=1,
                            padding='SAME'):
            # 149 x 149 x 32
            net = slim.conv2d(inputs,
                              32,
                              3,
                              stride=2,
                              padding=padding,
                              scope='Conv2d_1a_3x3')
            if add_and_check_final('Conv2d_1a_3x3', net):
                return net, end_points

            # 147 x 147 x 32
            net = slim.conv2d(net,
                              32,
                              3,
                              padding=padding,
                              scope='Conv2d_2a_3x3')
            if add_and_check_final('Conv2d_2a_3x3', net):
                return net, end_points
            # 147 x 147 x 64
            net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
            if add_and_check_final('Conv2d_2b_3x3', net):
                return net, end_points
            # 73 x 73 x 64
            net = slim.max_pool2d(net,
                                  3,
                                  stride=2,
                                  padding=padding,
                                  scope='MaxPool_3a_3x3')
            if add_and_check_final('MaxPool_3a_3x3', net):
                return net, end_points
            # 73 x 73 x 80
            net = slim.conv2d(net,
                              80,
                              1,
                              padding=padding,
                              scope='Conv2d_3b_1x1')
            if add_and_check_final('Conv2d_3b_1x1', net):
                return net, end_points
            # 71 x 71 x 192
            net = slim.conv2d(net,
                              192,
                              3,
                              padding=padding,
                              scope='Conv2d_4a_3x3')
            if add_and_check_final('Conv2d_4a_3x3', net):
                return net, end_points
            # 35 x 35 x 192
            net = slim.max_pool2d(net,
                                  3,
                                  stride=2,
                                  padding=padding,
                                  scope='MaxPool_5a_3x3')
            if add_and_check_final('MaxPool_5a_3x3', net):
                return net, end_points

            # 35 x 35 x 320
            with tf.variable_scope('Mixed_5b'):
                with tf.variable_scope('Branch_0'):
                    tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1')
                with tf.variable_scope('Branch_1'):
                    tower_conv1_0 = slim.conv2d(net,
                                                48,
                                                1,
                                                scope='Conv2d_0a_1x1')
                    tower_conv1_1 = slim.conv2d(tower_conv1_0,
                                                64,
                                                5,
                                                scope='Conv2d_0b_5x5')
                with tf.variable_scope('Branch_2'):
                    tower_conv2_0 = slim.conv2d(net,
                                                64,
                                                1,
                                                scope='Conv2d_0a_1x1')
                    tower_conv2_1 = slim.conv2d(tower_conv2_0,
                                                96,
                                                3,
                                                scope='Conv2d_0b_3x3')
                    tower_conv2_2 = slim.conv2d(tower_conv2_1,
                                                96,
                                                3,
                                                scope='Conv2d_0c_3x3')
                with tf.variable_scope('Branch_3'):
                    tower_pool = slim.avg_pool2d(net,
                                                 3,
                                                 stride=1,
                                                 padding='SAME',
                                                 scope='AvgPool_0a_3x3')
                    tower_pool_1 = slim.conv2d(tower_pool,
                                               64,
                                               1,
                                               scope='Conv2d_0b_1x1')
                net = tf.concat(
                    [tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1],
                    3)

            if add_and_check_final('Mixed_5b', net): return net, end_points
            # TODO(alemi): Register intermediate endpoints
            net = slim.repeat(net,
                              10,
                              block35,
                              scale=0.17,
                              activation_fn=activation_fn)

            # 17 x 17 x 1088 if output_stride == 8,
            # 33 x 33 x 1088 if output_stride == 16
            use_atrous = output_stride == 8

            with tf.variable_scope('Mixed_6a'):
                with tf.variable_scope('Branch_0'):
                    tower_conv = slim.conv2d(net,
                                             384,
                                             3,
                                             stride=1 if use_atrous else 2,
                                             padding=padding,
                                             scope='Conv2d_1a_3x3')
                with tf.variable_scope('Branch_1'):
                    tower_conv1_0 = slim.conv2d(net,
                                                256,
                                                1,
                                                scope='Conv2d_0a_1x1')
                    tower_conv1_1 = slim.conv2d(tower_conv1_0,
                                                256,
                                                3,
                                                scope='Conv2d_0b_3x3')
                    tower_conv1_2 = slim.conv2d(tower_conv1_1,
                                                384,
                                                3,
                                                stride=1 if use_atrous else 2,
                                                padding=padding,
                                                scope='Conv2d_1a_3x3')
                with tf.variable_scope('Branch_2'):
                    tower_pool = slim.max_pool2d(net,
                                                 3,
                                                 stride=1 if use_atrous else 2,
                                                 padding=padding,
                                                 scope='MaxPool_1a_3x3')
                net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3)

            if add_and_check_final('Mixed_6a', net): return net, end_points

            # TODO(alemi): register intermediate endpoints
            with slim.arg_scope([slim.conv2d], rate=2 if use_atrous else 1):
                net = slim.repeat(net,
                                  20,
                                  block17,
                                  scale=0.10,
                                  activation_fn=activation_fn)
            if add_and_check_final('PreAuxLogits', net): return net, end_points

            if output_stride == 8:
                # TODO(gpapan): Properly support output_stride for the rest of the net.
                raise ValueError(
                    'output_stride==8 is only supported up to the '
                    'PreAuxlogits end_point for now.')

            # 8 x 8 x 2080
            with tf.variable_scope('Mixed_7a'):
                with tf.variable_scope('Branch_0'):
                    tower_conv = slim.conv2d(net,
                                             256,
                                             1,
                                             scope='Conv2d_0a_1x1')
                    tower_conv_1 = slim.conv2d(tower_conv,
                                               384,
                                               3,
                                               stride=2,
                                               padding=padding,
                                               scope='Conv2d_1a_3x3')
                with tf.variable_scope('Branch_1'):
                    tower_conv1 = slim.conv2d(net,
                                              256,
                                              1,
                                              scope='Conv2d_0a_1x1')
                    tower_conv1_1 = slim.conv2d(tower_conv1,
                                                288,
                                                3,
                                                stride=2,
                                                padding=padding,
                                                scope='Conv2d_1a_3x3')
                with tf.variable_scope('Branch_2'):
                    tower_conv2 = slim.conv2d(net,
                                              256,
                                              1,
                                              scope='Conv2d_0a_1x1')
                    tower_conv2_1 = slim.conv2d(tower_conv2,
                                                288,
                                                3,
                                                scope='Conv2d_0b_3x3')
                    tower_conv2_2 = slim.conv2d(tower_conv2_1,
                                                320,
                                                3,
                                                stride=2,
                                                padding=padding,
                                                scope='Conv2d_1a_3x3')
                with tf.variable_scope('Branch_3'):
                    tower_pool = slim.max_pool2d(net,
                                                 3,
                                                 stride=2,
                                                 padding=padding,
                                                 scope='MaxPool_1a_3x3')
                net = tf.concat(
                    [tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool],
                    3)

            if add_and_check_final('Mixed_7a', net): return net, end_points

            # TODO(alemi): register intermediate endpoints
            net = slim.repeat(net,
                              9,
                              block8,
                              scale=0.20,
                              activation_fn=activation_fn)
            net = block8(net, activation_fn=None)

            # 8 x 8 x 1536
            net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1')
            if add_and_check_final('Conv2d_7b_1x1', net):
                return net, end_points

        raise ValueError('final_endpoint (%s) not recognized', final_endpoint)
Exemplo n.º 29
0
def attention_inception_v3(inputs,
                           num_classes=1000,
                           is_training=True,
                           dropout_keep_prob=0.8,
                           min_depth=16,
                           depth_multiplier=1.0,
                           prediction_fn=slim.softmax,
                           spatial_squeeze=True,
                           reuse=None,
                           create_aux_logits=True,
                           scope='InceptionV3',
                           global_pool=False,
                           attention_module='',
                           attention_position='all'):
    """Inception model from http://arxiv.org/abs/1512.00567.

  "Rethinking the Inception Architecture for Computer Vision"

  Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,
  Zbigniew Wojna.

  With the default arguments this method constructs the exact model defined in
  the paper. However, one can experiment with variations of the inception_v3
  network by changing arguments dropout_keep_prob, min_depth and
  depth_multiplier.

  The default image size used to train this network is 299x299.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes. If 0 or None, the logits layer
      is omitted and the input features to the logits layer (before dropout)
      are returned instead.
    is_training: whether is training or not.
    dropout_keep_prob: the percentage of activation values that are retained.
    min_depth: Minimum depth value (number of channels) for all convolution ops.
      Enforced when depth_multiplier < 1, and not an active constraint when
      depth_multiplier >= 1.
    depth_multiplier: Float multiplier for the depth (number of channels)
      for all convolution ops. The value must be greater than zero. Typical
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
    prediction_fn: a function to get predictions out of logits.
    spatial_squeeze: if True, logits is of shape [B, C], if false logits is of
        shape [B, 1, 1, C], where B is batch_size and C is number of classes.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    create_aux_logits: Whether to create the auxiliary logits.
    scope: Optional variable_scope.
    global_pool: Optional boolean flag to control the avgpooling before the
      logits layer. If false or unset, pooling is done with a fixed window
      that reduces default-sized inputs to 1x1, while larger inputs lead to
      larger outputs. If true, any input size is pooled down to 1x1.
    attention_module: Optional attention_module. Accepted values are '' or
      'se_block'.
    attention_position: Optional attention_position. Default is 'all'. Accepted
      values are 'head', 'extractor', and 'all'.

  Returns:
    net: a Tensor with the logits (pre-softmax activations) if num_classes
      is a non-zero integer, or the non-dropped-out input to the logits layer
      if num_classes is 0 or None.
    end_points: a dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: if 'depth_multiplier' is less than or equal to zero.
  """
    if depth_multiplier <= 0:
        raise ValueError('depth_multiplier is not greater than zero.')
    depth = lambda d: max(int(d * depth_multiplier), min_depth)

    with tf.variable_scope(scope, 'InceptionV3', [inputs],
                           reuse=reuse) as scope:
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            net, end_points = attention_inception_v3_base(
                inputs,
                scope=scope,
                min_depth=min_depth,
                depth_multiplier=depth_multiplier,
                attention_module=attention_module,
                attention_position=attention_position)

            # Auxiliary Head logits
            if create_aux_logits and num_classes:
                with slim.arg_scope(
                    [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                        stride=1,
                        padding='SAME'):
                    aux_logits = end_points['Mixed_6e']
                    with tf.variable_scope('AuxLogits'):
                        aux_logits = slim.avg_pool2d(aux_logits, [5, 5],
                                                     stride=3,
                                                     padding='VALID',
                                                     scope='AvgPool_1a_5x5')
                        aux_logits = slim.conv2d(aux_logits,
                                                 depth(128), [1, 1],
                                                 scope='Conv2d_1b_1x1')

                        # Shape of feature map before the final layer.
                        kernel_size = _reduced_kernel_size_for_small_input(
                            aux_logits, [5, 5])
                        aux_logits = slim.conv2d(
                            aux_logits,
                            depth(768),
                            kernel_size,
                            weights_initializer=trunc_normal(0.01),
                            padding='VALID',
                            scope='Conv2d_2a_{}x{}'.format(*kernel_size))
                        aux_logits = slim.conv2d(
                            aux_logits,
                            num_classes, [1, 1],
                            activation_fn=None,
                            normalizer_fn=None,
                            weights_initializer=trunc_normal(0.001),
                            scope='Conv2d_2b_1x1')
                        if spatial_squeeze:
                            aux_logits = tf.squeeze(aux_logits, [1, 2],
                                                    name='SpatialSqueeze')
                        end_points['AuxLogits'] = aux_logits

            # Final pooling and prediction
            with tf.variable_scope('Logits'):
                if global_pool:
                    # Global average pooling.
                    net = tf.reduce_mean(input_tensor=net,
                                         axis=[1, 2],
                                         keepdims=True,
                                         name='GlobalPool')
                    end_points['global_pool'] = net
                else:
                    # Pooling with a fixed kernel size.
                    kernel_size = _reduced_kernel_size_for_small_input(
                        net, [8, 8])
                    net = slim.avg_pool2d(
                        net,
                        kernel_size,
                        padding='VALID',
                        scope='AvgPool_1a_{}x{}'.format(*kernel_size))
                    end_points['AvgPool_1a'] = net
                if not num_classes:
                    return net, end_points
                # 1 x 1 x 2048
                net = slim.dropout(net,
                                   keep_prob=dropout_keep_prob,
                                   scope='Dropout_1b')
                end_points['PreLogits'] = net
                # 2048
                logits = slim.conv2d(net,
                                     num_classes, [1, 1],
                                     activation_fn=None,
                                     normalizer_fn=None,
                                     scope='Conv2d_1c_1x1')
                if spatial_squeeze:
                    logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
                # 1000
            end_points['Logits'] = logits
            end_points['Predictions'] = prediction_fn(logits,
                                                      scope='Predictions')
    return logits, end_points
Exemplo n.º 30
0
def inception_v3_head(
    net,
    end_points,
    final_endpoint='Mixed_7c',
    min_depth=16,
    depth_multiplier=1.0,
    scope=None,
):
    if depth_multiplier <= 0:
        raise ValueError('depth_multiplier is not greater than zero.')
    depth = lambda d: max(int(d * depth_multiplier), min_depth)

    with tf_v1.variable_scope(scope,
                              'InceptionV3', [net],
                              reuse=tf_v1.AUTO_REUSE):
        with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                            stride=1,
                            padding='SAME'):
            # mixed_8: 8 x 8 x 1280.
            end_point = 'Mixed_7a'
            with tf_v1.variable_scope(end_point):
                with tf_v1.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_0 = slim.conv2d(branch_0,
                                           depth(320), [3, 3],
                                           stride=2,
                                           padding='VALID',
                                           scope='Conv2d_1a_3x3')
                with tf_v1.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [1, 7],
                                           scope='Conv2d_0b_1x7')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [7, 1],
                                           scope='Conv2d_0c_7x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [3, 3],
                                           stride=2,
                                           padding='VALID',
                                           scope='Conv2d_1a_3x3')
                with tf_v1.variable_scope('Branch_2'):
                    branch_2 = slim.max_pool2d(net, [3, 3],
                                               stride=2,
                                               padding='VALID',
                                               scope='MaxPool_1a_3x3')
                net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # mixed_9: 8 x 8 x 2048.
            end_point = 'Mixed_7b'
            with tf_v1.variable_scope(end_point):
                with tf_v1.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(320), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf_v1.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(384), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = tf.concat(axis=3,
                                         values=[
                                             slim.conv2d(
                                                 branch_1,
                                                 depth(384), [1, 3],
                                                 scope='Conv2d_0b_1x3'),
                                             slim.conv2d(branch_1,
                                                         depth(384), [3, 1],
                                                         scope='Conv2d_0b_3x1')
                                         ])
                with tf_v1.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(448), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(384), [3, 3],
                                           scope='Conv2d_0b_3x3')
                    branch_2 = tf.concat(axis=3,
                                         values=[
                                             slim.conv2d(
                                                 branch_2,
                                                 depth(384), [1, 3],
                                                 scope='Conv2d_0c_1x3'),
                                             slim.conv2d(branch_2,
                                                         depth(384), [3, 1],
                                                         scope='Conv2d_0d_3x1')
                                         ])
                with tf_v1.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # mixed_10: 8 x 8 x 2048.
            end_point = 'Mixed_7c'
            with tf_v1.variable_scope(end_point):
                with tf_v1.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(320), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    end_points[end_point + '_b0'] = branch_0
                with tf_v1.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(384), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = tf.concat(axis=3,
                                         values=[
                                             slim.conv2d(
                                                 branch_1,
                                                 depth(384), [1, 3],
                                                 scope='Conv2d_0b_1x3'),
                                             slim.conv2d(branch_1,
                                                         depth(384), [3, 1],
                                                         scope='Conv2d_0c_3x1')
                                         ])
                with tf_v1.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(448), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(384), [3, 3],
                                           scope='Conv2d_0b_3x3')
                    branch_2 = tf.concat(axis=3,
                                         values=[
                                             slim.conv2d(
                                                 branch_2,
                                                 depth(384), [1, 3],
                                                 scope='Conv2d_0c_1x3'),
                                             slim.conv2d(branch_2,
                                                         depth(384), [3, 1],
                                                         scope='Conv2d_0d_3x1')
                                         ])
                with tf_v1.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
        raise ValueError('Unknown final endpoint %s' % final_endpoint)
    return net, end_points