Exemplo n.º 1
0
def mobilenet_v2_net(inputs, is_training=False):
    conv_def = globals()["V2_DEF_" + config.model_name]
    with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()):
        logits, end_points = mobilenet_v2.mobilenet_base(
            inputs, num_classes=0, conv_defs=conv_def, is_training=is_training)

        if conv_def == V2_DEF_tiny:
            layers = [5, 8, 11, 12]

        elif conv_def == V2_DEF_small:
            layers = [6, 10, 14, 16]

        elif conv_def == V2_DEF_medium:
            layers = [5, 8, 11, 13, 14]

        elif conv_def == V2_DEF_large:
            layers = [6, 10, 14, 17, 19]

        elif conv_def == V2_DEF_very_large:
            layers = [5, 8, 11, 13, 15, 16]

        if config.strides[0] == 2:
            layers.insert(0, 2)

        pool_no = 2
        end_point_map = []
        for layer_no in layers:
            end_point_map.append(end_points["layer_{}".format(layer_no)])
            pool_no += 1

    end_point_map.reverse()
    return logits, end_point_map
Exemplo n.º 2
0
 def testMobilenetBase(self):
     tf.reset_default_graph()
     # Verifies that mobilenet_base returns pre-pooling layer.
     with slim.arg_scope((mobilenet.depth_multiplier, ), min_depth=32):
         net, _ = mobilenet_v2.mobilenet_base(tf.placeholder(
             tf.float32, (10, 224, 224, 16)),
                                              conv_defs=mobilenet_v2.V2_DEF,
                                              depth_multiplier=0.1)
         self.assertEqual(net.get_shape().as_list(), [10, 7, 7, 128])
Exemplo n.º 3
0
def _mobilenet_v2(net,
                  depth_multiplier,
                  output_stride,
                  divisible_by=None,
                  reuse=None,
                  scope=None,
                  final_endpoint=None):
    """Auxiliary function to add support for 'reuse' to mobilenet_v2.

  Args:
    net: Input tensor of shape [batch_size, height, width, channels].
    depth_multiplier: Float multiplier for the depth (number of channels)
      for all convolution ops. The value must be greater than zero. Typical
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
    output_stride: An integer that specifies the requested ratio of input to
      output spatial resolution. If not None, then we invoke atrous convolution
      if necessary to prevent the network from reducing the spatial resolution
      of the activation maps. Allowed values are 8 (accurate fully convolutional
      mode), 16 (fast fully convolutional mode), 32 (classification mode).
    divisible_by: None (use default setting) or an integer that ensures all
      layers # channels will be divisible by this number. Used in MobileNet.
    reuse: Reuse model variables.
    scope: Optional variable scope.
    final_endpoint: The endpoint to construct the network up to.

  Returns:
    Features extracted by MobileNetv2.
  """
    if divisible_by is None:
        divisible_by = 8 if depth_multiplier == 1.0 else 1
    with tf.variable_scope(scope, 'MobilenetV2', [net], reuse=reuse) as scope:
        return mobilenet_v2.mobilenet_base(
            net,
            conv_defs=mobilenet_v2.V2_DEF,
            depth_multiplier=depth_multiplier,
            min_depth=8 if depth_multiplier == 1.0 else 1,
            divisible_by=divisible_by,
            final_endpoint=final_endpoint or _MOBILENET_V2_FINAL_ENDPOINT,
            output_stride=output_stride,
            scope=scope)
Exemplo n.º 4
0
    def testMultiplier(self):
        op = mobilenet.op
        new_def = copy.deepcopy(mobilenet_v2.V2_DEF)

        def inverse_multiplier(output_params, multiplier):
            output_params['num_outputs'] /= multiplier

        new_def['spec'][0] = op(slim.conv2d,
                                kernel_size=(3, 3),
                                multiplier_func=inverse_multiplier,
                                num_outputs=16)
        _ = mobilenet_v2.mobilenet_base(tf.placeholder(tf.float32,
                                                       (10, 224, 224, 16)),
                                        conv_defs=new_def,
                                        depth_multiplier=0.1)
        s = [
            op.outputs[0].get_shape().as_list()[-1]
            for op in find_ops('Conv2D')
        ]
        # Expect first layer to be 160 (16 / 0.1), and other layers
        # their max(original size * 0.1, 8)
        self.assertEqual([160, 8, 48, 8, 48], s[:5])
Exemplo n.º 5
0
 def testWithOutputStride8(self):
   out = mobilenet_v2.mobilenet_base(
       conv_defs=mobilenet_v2.V2_DEF,
       output_stride=8)(torch.randn(10, 16, 224, 224))
   
   self.assertEqual(list(out.shape)[2:4], [28, 28])
Exemplo n.º 6
0
 def testWithOutputStride16AndExplicitPadding(self):
   out = mobilenet_v2.mobilenet_base(output_stride=16, use_explicit_padding=True,
     conv_defs=mobilenet_v2.V2_DEF, multiplier=0.1)(torch.randn(10, 16, 224, 224))
   self.assertEqual(list(out.shape)[2:4], [14, 14])
Exemplo n.º 7
0
 def testMobilenetBase(self):
   # Verifies that mobilenet_base returns pre-pooling layer.
   out = mobilenet_v2.mobilenet_base(min_depth=32,
     conv_defs=mobilenet_v2.V2_DEF, multiplier=0.1)(torch.randn(10, 16, 224, 224))
   self.assertEqual(list(out.shape), [10, 128, 7, 7])
Exemplo n.º 8
0
    def _build_graph(self):
        _, endpoints = mobilenet_base(self.images,
                                      num_classes=self.num_classes)
        # Stop gradient doesn't work for unclear reasons. I confirmed that the Mobilenet
        # weights still changed.
        #endpoints['Stage2'] = tf.stop_gradient(endpoints['Stage2'])
        #endpoints['Stage3'] = tf.stop_gradient(endpoints['Stage3'])
        #endpoints['Stage4'] = tf.stop_gradient(endpoints['Stage4'])

        with tf.variable_scope('fcos'):
            with tf.variable_scope('pyramid'):
                c3 = self._bn_activation_conv(endpoints['Stage2'], 256, 1, 1)
                c4 = self._bn_activation_conv(endpoints['Stage3'], 256, 1, 1)
                c5 = self._bn_activation_conv(endpoints['Stage4'], 256, 1, 1)

                #_get_pyramid(feat, feature_size, top_feat=None)
                p5 = self._get_pyramid(c5, 256)
                p4, top_down = self._get_pyramid(c4, 256, p5)
                p3, _ = self._get_pyramid(c3, 256, top_down)
                p6 = self._bn_activation_conv(p5, 256, 3, 2)
                p7 = self._bn_activation_conv(p6, 256, 3, 2)
            with tf.variable_scope('head'):
                # The paper indicates that heads are shared between pyramids.
                # I don't think that's implemented here.
                p3conf, p3reg, p3center = self._detect_head(p3)
                p4conf, p4reg, p4center = self._detect_head(p4)
                p5conf, p5reg, p5center = self._detect_head(p5)
                p6conf, p6reg, p6center = self._detect_head(p6)
                p7conf, p7reg, p7center = self._detect_head(p7)
                if self.data_format == 'channels_first':
                    p3conf = tf.transpose(p3conf, [0, 2, 3, 1])
                    p3reg = tf.transpose(p3reg, [0, 2, 3, 1])
                    p3center = tf.transpose(p3center, [0, 2, 3, 1])
                    p4conf = tf.transpose(p4conf, [0, 2, 3, 1])
                    p4reg = tf.transpose(p4reg, [0, 2, 3, 1])
                    p4center = tf.transpose(p4center, [0, 2, 3, 1])
                    p5conf = tf.transpose(p5conf, [0, 2, 3, 1])
                    p5reg = tf.transpose(p5reg, [0, 2, 3, 1])
                    p5center = tf.transpose(p5center, [0, 2, 3, 1])
                    p6conf = tf.transpose(p6conf, [0, 2, 3, 1])
                    p6reg = tf.transpose(p6reg, [0, 2, 3, 1])
                    p6center = tf.transpose(p6center, [0, 2, 3, 1])
                    p7conf = tf.transpose(p7conf, [0, 2, 3, 1])
                    p7reg = tf.transpose(p7reg, [0, 2, 3, 1])
                    p7center = tf.transpose(p7center, [0, 2, 3, 1])

                p3shape = [tf.shape(p3center)[1], tf.shape(p3center)[2]]
                p4shape = [tf.shape(p4center)[1], tf.shape(p4center)[2]]
                p5shape = [tf.shape(p5center)[1], tf.shape(p5center)[2]]
                p6shape = [tf.shape(p6center)[1], tf.shape(p6center)[2]]
                p7shape = [tf.shape(p7center)[1], tf.shape(p7center)[2]]
                h3 = tf.range(0.,
                              tf.cast(p3shape[0], tf.float32),
                              dtype=tf.float32)
                w3 = tf.range(0.,
                              tf.cast(p3shape[1], tf.float32),
                              dtype=tf.float32)
                h4 = tf.range(0.,
                              tf.cast(p4shape[0], tf.float32),
                              dtype=tf.float32)
                w4 = tf.range(0.,
                              tf.cast(p4shape[1], tf.float32),
                              dtype=tf.float32)
                h5 = tf.range(0.,
                              tf.cast(p5shape[0], tf.float32),
                              dtype=tf.float32)
                w5 = tf.range(0.,
                              tf.cast(p5shape[1], tf.float32),
                              dtype=tf.float32)
                h6 = tf.range(0.,
                              tf.cast(p6shape[0], tf.float32),
                              dtype=tf.float32)
                w6 = tf.range(0.,
                              tf.cast(p6shape[1], tf.float32),
                              dtype=tf.float32)
                h7 = tf.range(0.,
                              tf.cast(p7shape[0], tf.float32),
                              dtype=tf.float32)
                w7 = tf.range(0.,
                              tf.cast(p7shape[1], tf.float32),
                              dtype=tf.float32)
                [grid_x3, grid_y3] = tf.meshgrid(w3, h3)
                [grid_x4, grid_y4] = tf.meshgrid(w4, h4)
                [grid_x5, grid_y5] = tf.meshgrid(w5, h5)
                [grid_x6, grid_y6] = tf.meshgrid(w6, h6)
                [grid_x7, grid_y7] = tf.meshgrid(w7, h7)

                stride_3, stride_4, stride_5, stride_6, stride_7 = 8, 16, 32, 64, 128
                if self.mode == 'train' or self.mode == 'val':
                    total_loss = []
                    for i in range(self.batch_size):
                        gt_i = self.ground_truth[i, ...]
                        slice_index = tf.argmin(gt_i, axis=0)[0]
                        gt_i = tf.gather(
                            gt_i, tf.range(0, slice_index, dtype=tf.int64))
                        gt_size = tf.sqrt(gt_i[..., 2] * gt_i[..., 3])
                        # As according to the paper Feature Pyramid Networks, only train on objects at a specific spatial scale
                        # ^^Hardcoded^^
                        #pyramid_sizes = [64., 128., 256., 512.]
                        pyramid_sizes = [32., 64., 128., 256.]
                        g3 = tf.boolean_mask(gt_i, gt_size <= pyramid_sizes[0])
                        g4 = tf.boolean_mask(
                            gt_i,
                            tf.cast(gt_size >= pyramid_sizes[0], tf.float32) *
                            tf.cast(gt_size <= pyramid_sizes[1], tf.float32) >
                            0.)
                        g5 = tf.boolean_mask(
                            gt_i,
                            tf.cast(gt_size >= pyramid_sizes[1], tf.float32) *
                            tf.cast(gt_size <= pyramid_sizes[2], tf.float32) >
                            0.)
                        g6 = tf.boolean_mask(
                            gt_i,
                            tf.cast(gt_size >= pyramid_sizes[2], tf.float32) *
                            tf.cast(gt_size <= pyramid_sizes[3], tf.float32) >
                            0.)
                        g7 = tf.boolean_mask(gt_i, gt_size >= pyramid_sizes[3])
                        # If the pyramid 3 layer has a training sample, compute the loss on that layer, else 0 loss
                        #_compute_one_image_loss(heatmap_pred, dist_pred, center_pred, ground_truth, grid_y, grid_x, stride, pshape):
                        loss3 = tf.cond(
                            tf.shape(g3)[0] > 0,
                            lambda: self._compute_one_image_loss(
                                p3conf[i, ...], p3reg[i, ...], p3center[
                                    i, ...], g3, grid_y3, grid_x3, stride_3,
                                p3shape), lambda: 0.)
                        loss4 = tf.cond(
                            tf.shape(g4)[0] > 0,
                            lambda: self._compute_one_image_loss(
                                p4conf[i, ...], p4reg[i, ...], p4center[
                                    i, ...], g4, grid_y4, grid_x4, stride_4,
                                p4shape), lambda: 0.)
                        loss5 = tf.cond(
                            tf.shape(g5)[0] > 0,
                            lambda: self._compute_one_image_loss(
                                p5conf[i, ...], p5reg[i, ...], p5center[
                                    i, ...], g5, grid_y5, grid_x5, stride_5,
                                p5shape), lambda: 0.)
                        loss6 = tf.cond(
                            tf.shape(g6)[0] > 0,
                            lambda: self._compute_one_image_loss(
                                p6conf[i, ...], p6reg[i, ...], p6center[
                                    i, ...], g6, grid_y6, grid_x6, stride_6,
                                p6shape), lambda: 0.)
                        loss7 = tf.cond(
                            tf.shape(g7)[0] > 0,
                            lambda: self._compute_one_image_loss(
                                p7conf[i, ...], p7reg[i, ...], p7center[
                                    i, ...], g7, grid_y7, grid_x7, stride_7,
                                p7shape), lambda: 0.)
                        total_loss.append(loss3 + loss4 + loss5 + loss6 +
                                          loss7)
                    self.loss = tf.reduce_mean(
                        total_loss) + self.weight_decay * tf.add_n([
                            tf.nn.l2_loss(var)
                            for var in tf.trainable_variables()
                        ])

                    optimizer = tf.train.AdamOptimizer(self.lr)
                    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                    train_vars = tf.get_collection(
                        tf.GraphKeys.TRAINABLE_VARIABLES, "fcos")
                    train_op = optimizer.minimize(self.loss,
                                                  global_step=self.global_step,
                                                  var_list=train_vars)
                    self.train_op = tf.group([update_ops, train_op])
                else:
                    # Test mode
                    p3conf = tf.reshape(
                        tf.sigmoid(p3conf[0, ...]) *
                        tf.sigmoid(p3center[0, ...]), [-1, self.num_classes])
                    p4conf = tf.reshape(
                        tf.sigmoid(p4conf[0, ...]) *
                        tf.sigmoid(p4center[0, ...]), [-1, self.num_classes])
                    p5conf = tf.reshape(
                        tf.sigmoid(p5conf[0, ...]) *
                        tf.sigmoid(p5center[0, ...]), [-1, self.num_classes])
                    p6conf = tf.reshape(
                        tf.sigmoid(p6conf[0, ...]) *
                        tf.sigmoid(p6center[0, ...]), [-1, self.num_classes])
                    p7conf = tf.reshape(
                        tf.sigmoid(p7conf[0, ...]) *
                        tf.sigmoid(p7center[0, ...]), [-1, self.num_classes])
                    pconf = tf.concat([p3conf, p4conf, p5conf, p6conf, p7conf],
                                      axis=0)

                    p3reg = p3reg[0, ...]
                    p4reg = p4reg[0, ...]
                    p5reg = p5reg[0, ...]
                    p6reg = p6reg[0, ...]
                    p7reg = p7reg[0, ...]
                    grid_y3 = tf.expand_dims(grid_y3, axis=-1)
                    grid_x3 = tf.expand_dims(grid_x3, axis=-1)
                    grid_y4 = tf.expand_dims(grid_y4, axis=-1)
                    grid_x4 = tf.expand_dims(grid_x4, axis=-1)
                    grid_y5 = tf.expand_dims(grid_y5, axis=-1)
                    grid_x5 = tf.expand_dims(grid_x5, axis=-1)
                    grid_y6 = tf.expand_dims(grid_y6, axis=-1)
                    grid_x6 = tf.expand_dims(grid_x6, axis=-1)
                    grid_y7 = tf.expand_dims(grid_y7, axis=-1)
                    grid_x7 = tf.expand_dims(grid_x7, axis=-1)

                    p3_y1 = grid_y3 - p3reg[..., 2:3]
                    p3_y2 = grid_y3 + p3reg[..., 3:4]
                    p3_x1 = grid_x3 - p3reg[..., 0:1]
                    p3_x2 = grid_x3 + p3reg[..., 1:2]
                    p4_y1 = grid_y4 - p4reg[..., 2:3]
                    p4_y2 = grid_y4 + p4reg[..., 3:4]
                    p4_x1 = grid_x4 - p4reg[..., 0:1]
                    p4_x2 = grid_x4 + p4reg[..., 1:2]
                    p5_y1 = grid_y5 - p5reg[..., 2:3]
                    p5_y2 = grid_y5 + p5reg[..., 3:4]
                    p5_x1 = grid_x5 - p5reg[..., 0:1]
                    p5_x2 = grid_x5 + p5reg[..., 1:2]
                    p6_y1 = grid_y6 - p6reg[..., 2:3]
                    p6_y2 = grid_y6 + p6reg[..., 3:4]
                    p6_x1 = grid_x6 - p6reg[..., 0:1]
                    p6_x2 = grid_x6 + p6reg[..., 1:2]
                    p7_y1 = grid_y7 - p7reg[..., 2:3]
                    p7_y2 = grid_y7 + p7reg[..., 3:4]
                    p7_x1 = grid_x7 - p7reg[..., 0:1]
                    p7_x2 = grid_x7 + p7reg[..., 1:2]

                    p3bbox = tf.reshape(
                        tf.concat([p3_y1, p3_x1, p3_y2, p3_x2], axis=-1),
                        [-1, 4]) * stride_3
                    p4bbox = tf.reshape(
                        tf.concat([p4_y1, p4_x1, p4_y2, p4_x2], axis=-1),
                        [-1, 4]) * stride_4
                    p5bbox = tf.reshape(
                        tf.concat([p5_y1, p5_x1, p5_y2, p5_x2], axis=-1),
                        [-1, 4]) * stride_5
                    p6bbox = tf.reshape(
                        tf.concat([p6_y1, p6_x1, p6_y2, p6_x2], axis=-1),
                        [-1, 4]) * stride_6
                    p7bbox = tf.reshape(
                        tf.concat([p7_y1, p7_x1, p7_y2, p7_x2], axis=-1),
                        [-1, 4]) * stride_7
                    pbbox = tf.concat([p3bbox, p4bbox, p5bbox, p6bbox, p7bbox],
                                      axis=0)

                    filter_mask = tf.greater_equal(pconf,
                                                   self.nms_score_threshold)
                    scores = []
                    class_id = []
                    bbox = []
                    for i in range(self.num_classes - 1):
                        scoresi = tf.boolean_mask(pconf[:, i], filter_mask[:,
                                                                           i])
                        bboxi = tf.boolean_mask(pbbox, filter_mask[:, i])
                        selected_indices = tf.image.non_max_suppression(
                            bboxi,
                            scoresi,
                            self.nms_max_boxes,
                            self.nms_iou_threshold,
                        )
                        scores.append(tf.gather(scoresi, selected_indices))
                        bbox.append(tf.gather(bboxi, selected_indices))
                        class_id.append(
                            tf.ones_like(tf.gather(scoresi, selected_indices),
                                         tf.int32) * i)
                    bbox = tf.concat(bbox, axis=0)
                    scores = tf.concat(scores, axis=0)
                    class_id = tf.concat(class_id, axis=0)
                    self.detection_pred = [scores, bbox, class_id]
Exemplo n.º 9
0
import cv2
import numpy as np
from mobilenet_v2 import mobilenet_base
import tensorflow as tf

image = tf.image.decode_jpeg(tf.read_file("test.jpg"))

images = tf.expand_dims(image, 0)
images = tf.cast(images, tf.float32) / 128.  - 1
images.set_shape((None, None, None, 3))
images = tf.image.resize_images(images, (128, 64))


net, end = mobilenet_base(images)
# print("========== net ============")
# for part in net:
print(end)
print("========== end ============")
for part in end:
    print(part)
x = end["Predictions"]
print(x)