Пример #1
0
def resnet_v1_50(inputs,
                 num_classes=None,
                 is_training=True,
                 global_pool=True,
                 output_stride=None,
                 include_root_block=True,
                 spatial_squeeze=True,
                 reuse=None,
                 scope='resnet_v1_50'):
    blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 3 + [(512, 128, 2)]),
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 5 + [(1024, 256, 2)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
    ]

    return resnet_v1(inputs,
                     blocks,
                     num_classes=num_classes,
                     is_training=is_training,
                     global_pool=global_pool,
                     output_stride=output_stride,
                     include_root_block=include_root_block,
                     spatial_squeeze=spatial_squeeze,
                     reuse=reuse,
                     scope=scope)
 def testEndPointsV2(self):
     """Test the end points of a tiny v2 bottleneck network."""
     bottleneck = resnet_v2.bottleneck
     blocks = [
         resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),
         resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 1)])
     ]
     inputs = create_test_input(2, 32, 16, 3)
     with slim.arg_scope(resnet_utils.resnet_arg_scope()):
         _, end_points = self._resnet_plain(inputs, blocks, scope='tiny')
     expected = [
         'tiny/block1/unit_1/bottleneck_v2/shortcut',
         'tiny/block1/unit_1/bottleneck_v2/conv1',
         'tiny/block1/unit_1/bottleneck_v2/conv2',
         'tiny/block1/unit_1/bottleneck_v2/conv3',
         'tiny/block1/unit_2/bottleneck_v2/conv1',
         'tiny/block1/unit_2/bottleneck_v2/conv2',
         'tiny/block1/unit_2/bottleneck_v2/conv3',
         'tiny/block2/unit_1/bottleneck_v2/shortcut',
         'tiny/block2/unit_1/bottleneck_v2/conv1',
         'tiny/block2/unit_1/bottleneck_v2/conv2',
         'tiny/block2/unit_1/bottleneck_v2/conv3',
         'tiny/block2/unit_2/bottleneck_v2/conv1',
         'tiny/block2/unit_2/bottleneck_v2/conv2',
         'tiny/block2/unit_2/bottleneck_v2/conv3'
     ]
     self.assertItemsEqual(expected, end_points)
def resnet_v1_200(inputs,
                  num_classes=None,
                  is_training=True,
                  global_pool=True,
                  output_stride=None,
                  spatial_squeeze=True,
                  reuse=None,
                  scope='resnet_v1_200'):
    """ResNet-200 model of [2]. See resnet_v1() for arg and return description."""
    blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 23 + [(512, 128, 2)]),
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
    ]
    return resnet_v1(inputs,
                     blocks,
                     num_classes,
                     is_training,
                     global_pool=global_pool,
                     output_stride=output_stride,
                     include_root_block=True,
                     spatial_squeeze=spatial_squeeze,
                     reuse=reuse,
                     scope=scope)
 def _resnet_small(self,
                   inputs,
                   num_classes=None,
                   is_training=True,
                   global_pool=True,
                   output_stride=None,
                   include_root_block=True,
                   reuse=None,
                   scope='resnet_v2_small'):
     """A shallow and thin ResNet v2 for faster tests."""
     bottleneck = resnet_v2.bottleneck
     blocks = [
         resnet_utils.Block('block1', bottleneck,
                            [(4, 1, 1)] * 2 + [(4, 1, 2)]),
         resnet_utils.Block('block2', bottleneck,
                            [(8, 2, 1)] * 2 + [(8, 2, 2)]),
         resnet_utils.Block('block3', bottleneck,
                            [(16, 4, 1)] * 2 + [(16, 4, 2)]),
         resnet_utils.Block('block4', bottleneck, [(32, 8, 1)] * 2)
     ]
     return resnet_v2.resnet_v2(inputs,
                                blocks,
                                num_classes,
                                is_training=is_training,
                                global_pool=global_pool,
                                output_stride=output_stride,
                                include_root_block=include_root_block,
                                reuse=reuse,
                                scope=scope)
Пример #5
0
def resnet_v1_50(inputs,
                 num_classes=None,
                 is_training=True,
                 global_pool=True,
                 output_stride=None,
                 reuse=None,
                 scope='resnet_v1_50'):
    """ResNet-50 model of [1]. See resnet_v1() for arg and return description.
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
    Deep Residual Learning for Image Recognition. arXiv:1512.03385

  """
    blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 3 + [(512, 128, 2)]),
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 5 + [(1024, 256, 2)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
    ]
    return resnet_v1(inputs,
                     blocks,
                     num_classes,
                     is_training,
                     global_pool=global_pool,
                     output_stride=output_stride,
                     include_root_block=True,
                     reuse=reuse,
                     scope=scope)
Пример #6
0
def resnet_frcnn(inputs,
                 rois=None,
                 global_pool=True,
                 reuse=None,
                 fc_layers=True,
                 scope='resnet_v1_50'):
    blocks = [
        resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 3),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 2)] + [(512, 128, 1)] * 3),
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 2)] + [(1024, 256, 1)] * 5),
        resnet_utils.Block('block4', bottleneck,
                           [(2048, 512, 1)] + [(2048, 512, 1)] * 2)
    ]

    if rois is None:
        log.warning("No RoI transmitted, recreating normal ResNet")
        if not fc_layers:
            blocks = blocks[:-1]
            global_pool = False
        else:
            blocks = blocks[:-1] + [
                resnet_utils.Block('block4', bottleneck,
                                   [(2048, 512, 2)] + [(2048, 512, 1)] * 2)
            ]
        net, endpoints = resnet_v1.resnet_v1(inputs,
                                             blocks,
                                             global_pool=global_pool,
                                             reuse=reuse,
                                             scope=scope)
    else:
        if not fc_layers:
            raise NotImplementedError
        net = inputs
        net, ep1 = resnet_v1.resnet_v1(net,
                                       blocks[:-1],
                                       global_pool=False,
                                       reuse=reuse,
                                       scope=scope)

        z = tf.zeros(tf.stack([tf.shape(rois)[0]]), dtype=tf.int32)
        net = tf.image.crop_and_resize(net,
                                       rois,
                                       z, [7, 7],
                                       name="roi_warping")

        net, ep2 = resnet_v1.resnet_v1(net,
                                       blocks[-1:],
                                       global_pool=global_pool,
                                       include_root_block=False,
                                       reuse=reuse,
                                       scope=scope)
        if global_pool:
            net = slim.flatten(net)
        endpoints = ep1.copy()
        endpoints.update(ep2)
        # endpoints = {**ep1, **ep2}  # python3.5, fix it when we ditch fedora

    return net, endpoints
Пример #7
0
def resnet_v1_152(inputs,
                  num_classes=None,
                  global_pool=True,
                  output_stride=None,
                  reuse=None,
                  scope='resnet_v1_152'):
  """ResNet-152 model of [1]. See resnet_v1() for arg and return description."""
  blocks = [
      resnet_utils.Block('block1', bottleneck,
                         [(256, 64, 1)] * 2 + [(256, 64, 2)]),
      resnet_utils.Block('block2', bottleneck,
                         [(512, 128, 1)] * 7 + [(512, 128, 2)]),
      resnet_utils.Block('block3', bottleneck,
                         [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
      resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
  ]
  return resnet_v1(
      inputs,
      blocks,
      num_classes,
      global_pool,
      output_stride,
      include_root_block=True,
      reuse=reuse,
      scope=scope)
Пример #8
0
def resnet_v1_101(inputs,
                  num_classes=None,
                  is_training=True,
                  global_pool=False,
                  output_stride=None,
                  reuse=None,
                  scope='resnet_v1_101'):
    """ResNet-101 model of [1]."""
    blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 3 + [(512, 128, 2)]),
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 22 + [(1024, 256, 2)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
    ]
    return resnet_v1(inputs,
                     blocks,
                     num_classes,
                     is_training,
                     global_pool=global_pool,
                     output_stride=output_stride,
                     include_root_block=True,
                     reuse=reuse,
                     scope=scope)
Пример #9
0
def resnet_v1_101(inputs,
                  num_classes=None,
                  is_training=True,
                  global_pool=True,
                  reuse=None,
                  noise_fn=None,
                  scope='resnet_v1_101'):
    """ResNet-101 model of [1]. See resnet_v1() for arg and return description."""
    blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 3 + [(512, 128, 2)]),
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 22 + [(1024, 256, 2)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
    ]
    return resnet_v1(inputs,
                     blocks,
                     num_classes,
                     is_training,
                     global_pool=global_pool,
                     include_root_block=True,
                     reuse=reuse,
                     noise_fn=noise_fn,
                     scope=scope)
Пример #10
0
    def create_trunk(self, images):
        red, green, blue = tf.split(images * 255, 3, axis=3)
        images = tf.concat([blue, green, red], 3) - MEAN_COLOR

        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(is_training=self.training,
                                           weight_decay=self.weight_decay,
                                           batch_norm_decay=args.bn_decay)):
            blocks = [
                resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 3),
                resnet_utils.Block('block2', bottleneck,
                                   [(512, 128, 2)] + [(512, 128, 1)] * 3),
                resnet_utils.Block('block3', bottleneck, [(1024, 256, 2)] +
                                   [(1024, 256, 1)] * self.num_block3),
                resnet_utils.Block('block4', bottleneck,
                                   [(2048, 512, 2)] + [(2048, 512, 1)] * 2)
            ]

            net, endpoints = resnet_v1.resnet_v1(images,
                                                 blocks,
                                                 global_pool=False,
                                                 reuse=self.reuse,
                                                 scope=self.scope)
            self.outputs = endpoints
        self.add_extra_layers(net)
Пример #11
0
def resnet_v2_26_2(inputs,
                   num_classes=None,
                   is_training=True,
                   global_pool=True,
                   output_stride=None,
                   reuse=None,
                   scope='resnet_v2_26_2'):
    """ResNet-50 model of [1]. See resnet_v2() for arg and return description."""
    k = 2
    blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256 * k, 64 * k, 1)] + [(256 * k, 64 * k, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512 * k, 128 * k, 1)] + [(512 * k, 128 * k, 2)]),
        resnet_utils.Block('block3', bottleneck, [(1024 * k, 256 * k, 1)] +
                           [(1024 * k, 256 * k, 2)]),
        resnet_utils.Block('block4', bottleneck, [(2048 * k, 512 * k, 1)] * 2)
    ]
    return resnet_v2(inputs,
                     blocks,
                     num_classes,
                     is_training=is_training,
                     global_pool=global_pool,
                     output_stride=output_stride,
                     include_root_block=True,
                     reuse=reuse,
                     scope=scope)
    def _atrousValues(self, bottleneck):
        """Verify the values of dense feature extraction by atrous convolution.

    Make sure that dense feature extraction by stack_blocks_dense() followed by
    subsampling gives identical results to feature extraction at the nominal
    network output stride using the simple self._stack_blocks_nondense() above.

    Args:
      bottleneck: The bottleneck function.
    """
        blocks = [
            resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),
            resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 2)]),
            resnet_utils.Block('block3', bottleneck, [(16, 4, 1), (16, 4, 2)]),
            resnet_utils.Block('block4', bottleneck, [(32, 8, 1), (32, 8, 1)])
        ]
        nominal_stride = 8

        # Test both odd and even input dimensions.
        height = 30
        width = 31
        with slim.arg_scope(resnet_utils.resnet_arg_scope()):
            with slim.arg_scope([slim.batch_norm], is_training=False):
                for output_stride in [1, 2, 4, 8, None]:
                    with tf.Graph().as_default():
                        with self.test_session() as sess:
                            tf.set_random_seed(0)
                            inputs = create_test_input(1, height, width, 3)
                            # Dense feature extraction followed by subsampling.
                            output = resnet_utils.stack_blocks_dense(
                                inputs, blocks, output_stride)
                            if output_stride is None:
                                factor = 1
                            else:
                                factor = nominal_stride // output_stride

                            output = resnet_utils.subsample(output, factor)
                            # Make the two networks use the same weights.
                            tf.get_variable_scope().reuse_variables()
                            # Feature extraction at the nominal network rate.
                            expected = self._stack_blocks_nondense(
                                inputs, blocks)
                            sess.run(tf.global_variables_initializer())
                            output, expected = sess.run([output, expected])
                            self.assertAllClose(output,
                                                expected,
                                                atol=1e-4,
                                                rtol=1e-4)
Пример #13
0
def resnet_v1_block(scope, base_depth, num_units, stride):
    """Helper function for creating a resnet_v1 bottleneck block.
       帮助函数用于创建一个resnet_v1的瓶颈块
    Args:
      scope: The scope of the block.
             块的scope
      base_depth: The depth of the bottleneck layer for each unit.
                  每个单元瓶颈层的深度
      num_units: The number of units in the block.
                 块中单元的数量
      stride: The stride of the block, implemented as a stride in the last unit. All other units have stride=1.
              块的步长,被用于在最后一个单元。其它所有单元的的stride=1

    Returns:
      A resnet_v1 bottleneck block.
      一个resnet_v1残差快
    """
    return resnet_utils.Block(scope, bottleneck, [{
        'depth': base_depth * 4,
        'depth_bottleneck': base_depth,
        'stride': 1
    }] * (num_units - 1) + [{
        'depth': base_depth * 4,
        'depth_bottleneck': base_depth,
        'stride': stride
    }])
Пример #14
0
def resnet_v1_block(scope, base_depth, num_units, stride):
    return utils.Block(scope, bottleneck, [{
        'depth': base_depth * 4,
        'depth_bottleneck': base_depth,
        'stride': 1
    }] * (num_units - 1) + [{
        'depth': base_depth * 4,
        'depth_bottleneck': base_depth,
        'stride': stride
    }])
Пример #15
0
 def resnext_v2_block(self, scope, base_depth, num_units, stride):
     return resnet_utils.Block(
         scope, self.bottle_x_neck, [{
             'depth': base_depth * 2,
             'depth_bottleneck': base_depth,
             'stride': 1
         }] * (num_units - 1) + [{
             'depth': base_depth * 2,
             'depth_bottleneck': base_depth,
             'stride': stride
         }])
Пример #16
0
    def resnet_v2_block(self, scope, base_depth, num_units, stride):
        """Helper function for creating a resnet_v2 bottleneck block.

        Args:
            scope: The scope of the block.
            base_depth: The depth of the bottleneck layer for each unit.
            num_units: The number of units in the block.
            stride: The stride of the block, implemented as a stride in the last unit.
                All other units have stride=1.

        Returns:
            A resnet_v2 bottleneck block.
        """

        if 'block4' in scope:
            return resnet_utils.Block(
                scope, self.bottleneck,
                [{
                    'depth': base_depth * 4,
                    'depth_bottleneck': base_depth,
                    'stride': 1,
                    'rate': 1,
                    'deformable': self.deformable,
                    'attention_option': self.attention_option
                }] * num_units)
        else:
            return resnet_utils.Block(
                scope, self.bottleneck, [{
                    'depth': base_depth * 4,
                    'depth_bottleneck': base_depth,
                    'stride': 1
                }] * (num_units - 1) + [{
                    'depth': base_depth * 4,
                    'depth_bottleneck': base_depth,
                    'stride': stride
                }])
Пример #17
0
def resnet_v2_block(scope, base_depth, num_units, stride):
    """Helper function for creating a resnet_v2 bottleneck block.

        Args:
            scope: The scope of the block.
            base_depth: The depth of the bottleneck layer for each unit.
            num_units: The number of units in the block.
            stride: The stride of the block, implemented as a stride in the last unit.
            All other units have stride=1.

        Returns:
            A resnet_v2 bottleneck block.
    """
    return resnet_utils.Block(
        scope=scope, unit_fn=bottleneck, 
        [{'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': 1}] * (num_units - 1) +
        [{'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': stride}]
    )
Пример #18
0
def resnet_v2_block(scope, base_depth, num_planes, stride):
    """
    Args:
        scope: The scope of the block
        base_depth: The depth of bottleneck layer for each unit
        num_planes: the number of planes in the block
        stride: The stride of the block, implemented as a stride in the last unit
          All other stride is 1

    Returns:
        A resnet_v2 bottleneck block object
    """
    return ru.Block(scope, bottleneck, [{
        'depth': base_depth * 4,
        'depth_bottleneck': base_depth,
        'stride': 1
    }] * (num_planes - 1) + [{
        'depth': base_depth * 4,
        'depth_bottleneck': base_depth,
        'stride': stride
    }])
Пример #19
0
    def add_extra_layers(self, net):
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(is_training=self.training,
                                           weight_decay=self.weight_decay,
                                           batch_norm_decay=args.bn_decay)):
            block_depth = 2
            num_fm = 2048
            '''''
            blocks = [
                resnet_utils.Block(
                    'block5', bottleneck, [(num_fm, num_fm//4, 2)] + [(num_fm, num_fm//4, 1)] * (block_depth-1)),
                resnet_utils.Block(
                    'block6', bottleneck, [(num_fm, num_fm//4, 2)] + [(num_fm, num_fm//4, 1)] * (block_depth-1)),
                resnet_utils.Block(
                    'block7', bottleneck, [(num_fm, num_fm//4, 2)] + [(num_fm, num_fm//4, 1)] * (block_depth-1)),
            ]
            '''
            blocks = [
                resnet_utils.Block(
                    'block5', bottleneck, [(num_fm // 2, num_fm // 2, 2)] +
                    [(num_fm // 2, num_fm // 2, 1)] * (block_depth - 1)),
                resnet_utils.Block(
                    'block6', bottleneck, [(num_fm // 2, num_fm // 2, 2)] +
                    [(num_fm // 2, num_fm // 2, 1)] * (block_depth - 1)),
                resnet_utils.Block(
                    'block7', bottleneck, [(num_fm // 2, num_fm // 2, 1)] +
                    [(num_fm // 2, num_fm // 2, 1)] * (block_depth - 1)),
            ]
            if args.image_size == 512:
                blocks += [
                    resnet_utils.Block(
                        'block8', bottleneck, [(num_fm, num_fm // 4, 2)] +
                        [(num_fm, num_fm // 4, 1)] * (block_depth - 1)),
                ]

            net, endpoints = resnet_v1.resnet_v1(net,
                                                 blocks,
                                                 global_pool=False,
                                                 include_root_block=False,
                                                 reuse=self.reuse,
                                                 scope=DEFAULT_SSD_SCOPE)
            self.outputs.update(endpoints)
            with tf.variable_scope(DEFAULT_SSD_SCOPE + "_back",
                                   reuse=self.reuse):
                end_points_collection = "reverse_ssd_end_points"
                #with slim.arg_scope([slim.conv2d, attention],
                #with slim.arg_scope([slim.conv2d, sub_pixel_skip],
                #with slim.arg_scope([slim.conv2d, noconcat],
                #with slim.arg_scope([slim.conv2d, bottleneck_skip],
                with slim.arg_scope([slim.conv2d, tail_att],
                                    outputs_collections=end_points_collection):
                    top_fm = args.top_fm
                    int_fm = top_fm // 4
                    if args.image_size == 512:
                        # as long as the number of pooling layers is bigger due to
                        # the higher resolution, an extra layer is appended
                        #net = attention(net, self.outputs[DEFAULT_SSD_SCOPE + '/block7'],
                        #                     top_fm, int_fm, scope='block_rev7')
                        #net = sub_pixel_skip(net, self.outputs[DEFAULT_SSD_SCOPE + '/block7'],
                        #                      top_fm, int_fm, scope='block_rev7')
                        #net = noconcat(net, self.outputs[DEFAULT_SSD_SCOPE+'/block7'],
                        #                      top_fm, int_fm, scope='block_rev7')
                        #net = bottleneck_skip(net, self.outputs[DEFAULT_SSD_SCOPE+'/block7'],
                        #                      top_fm, int_fm, scope='block_rev7')
                        net = tail_att(net,
                                       self.outputs[DEFAULT_SSD_SCOPE +
                                                    '/block7'],
                                       top_fm,
                                       int_fm,
                                       scope='block_rev7')
                    '''''
                    net = attention(net, self.outputs[DEFAULT_SSD_SCOPE + '/block6'],
                                         top_fm, int_fm, scope='block_rev6')
                    net = attention(net, self.outputs[DEFAULT_SSD_SCOPE + '/block5'],
                                         top_fm, int_fm, scope='block_rev5')
                    net = attention(net, self.outputs[self.scope + '/block4'],
                                         top_fm, int_fm, scope='block_rev4')
                    net = attention(net, self.outputs[self.scope + '/block3'],
                                         top_fm, int_fm, scope='block_rev3')
                    net = attention(net, self.outputs[self.scope + '/block2'],
                                         top_fm, int_fm, scope='block_rev2')
                    '''
                    '''''
                    net = sub_pixel_skip(net, self.outputs[DEFAULT_SSD_SCOPE + '/block6'],
                                          top_fm, int_fm, scope='block_rev6')
                    net = sub_pixel_skip(net, self.outputs[DEFAULT_SSD_SCOPE + '/block5'],
                                          top_fm, int_fm, scope='block_rev5')
                    net = sub_pixel_skip(net, self.outputs[self.scope + '/block4'],
                                          top_fm, int_fm, scope='block_rev4')
                    net = sub_pixel_skip(net, self.outputs[self.scope + '/block3'],
                                          top_fm, int_fm, scope='block_rev3')
                    net = sub_pixel_skip(net, self.outputs[self.scope + '/block2'],
                                          top_fm, int_fm, scope='block_rev2')
                    '''
                    '''''
                    net = noconcat(net, self.outputs[DEFAULT_SSD_SCOPE + '/block6'],
                                          top_fm, int_fm, scope='block_rev6')
                    net = noconcat(net, self.outputs[DEFAULT_SSD_SCOPE + '/block5'],
                                          top_fm, int_fm, scope='block_rev5')
                    net = noconcat(net, self.outputs[self.scope + '/block4'],
                                          top_fm, int_fm, scope='block_rev4')
                    net = noconcat(net, self.outputs[self.scope + '/block3'],
                                          top_fm, int_fm, scope='block_rev3')
                    net = noconcat(net, self.outputs[self.scope + '/block2'],
                                          top_fm, int_fm, scope='block_rev2')
                    ''' ''
                    '''''
                    net = bottleneck_skip(net, self.outputs[DEFAULT_SSD_SCOPE+'/block6'],
                                          top_fm, int_fm, scope='block_rev6')
                    net = bottleneck_skip(net, self.outputs[DEFAULT_SSD_SCOPE+'/block5'],
                                          top_fm, int_fm, scope='block_rev5')
                    net = bottleneck_skip(net, self.outputs[self.scope+'/block4'],
                                          top_fm, int_fm, scope='block_rev4')
                    net = bottleneck_skip(net, self.outputs[self.scope+'/block3'],
                                          top_fm, int_fm, scope='block_rev3')
                    net = bottleneck_skip(net, self.outputs[self.scope+'/block2'],
                                          top_fm, int_fm, scope='block_rev2')
                    '''
                    '''''
                    net = tail_att(net, self.outputs[DEFAULT_SSD_SCOPE + '/block6'],
                                          top_fm, int_fm, scope='block_rev6')
                    net = tail_att(net, self.outputs[DEFAULT_SSD_SCOPE + '/block5'],
                                          top_fm, int_fm, scope='block_rev5')
                    net = tail_att(net, self.outputs[self.scope + '/block4'],
                                          top_fm, int_fm, scope='block_rev4')
                    net = tail_att(net, self.outputs[self.scope + '/block3'],
                                          top_fm, int_fm, scope='block_rev3')
                    net = tail_att(net, self.outputs[self.scope + '/block2'],
                                          top_fm, int_fm, scope='block_rev2')
                    '''
                    net = tail_att(net,
                                   self.outputs[DEFAULT_SSD_SCOPE + '/block6'],
                                   top_fm,
                                   top_fm,
                                   scope='block_rev6')
                    net = tail_att(net,
                                   self.outputs[DEFAULT_SSD_SCOPE + '/block5'],
                                   top_fm,
                                   top_fm,
                                   scope='block_rev5')
                    net = tail_att(net,
                                   self.outputs[self.scope + '/block4'],
                                   top_fm,
                                   top_fm,
                                   scope='block_rev4')
                    net = tail_att(net,
                                   self.outputs[self.scope + '/block3'],
                                   top_fm,
                                   top_fm,
                                   scope='block_rev3')
                    net = tail_att(net,
                                   self.outputs[self.scope + '/block2'],
                                   top_fm,
                                   top_fm,
                                   scope='block_rev2')
                    if args.x4:
                        # To provide stride 4 we add one more layer with upsampling
                        #net = sub_pixel_skip(net, self.outputs[self.scope + '/block1'],
                        #                     top_fm, int_fm, scope='block_rev1')
                        #net = sub_pixel_skip(net, self.outputs[self.scope + '/block1'],
                        #                      top_fm, int_fm, scope='block_rev1')
                        #net = noconcat(net, self.outputs[self.scope+'/block1'],
                        #                      top_fm, int_fm, scope='block_rev1')
                        #net = bottleneck_skip(net, self.outputs[self.scope+'/block1'],
                        #                      top_fm, int_fm, scope='block_rev1')
                        net = tail_att(net,
                                       self.outputs[self.scope + '/block1'],
                                       top_fm,
                                       int_fm,
                                       scope='block_rev1')
                endpoints = slim.utils.convert_collection_to_dict(
                    end_points_collection)
            self.outputs.update(endpoints)

            # Creating an output of spatial resolution 1x1 with conventional name 'pool6'
            if args.image_size == 512:
                self.outputs[DEFAULT_SSD_SCOPE+'/pool6'] =\
                        tf.reduce_mean(self.outputs['ssd_back/block_rev7/shortcut'],
                                       [1, 2], name='pool6', keep_dims=True)
            else:
                self.outputs[DEFAULT_SSD_SCOPE+'/pool6'] =\
                        tf.reduce_mean(self.outputs['ssd_back/block_rev6/shortcut'],
                                       [1, 2], name='pool6', keep_dims=True)
Пример #20
0
    def inference(self, mode, inputs):
        is_training = mode == 'TRAIN'

        ###decode your inputs
        image = inputs[0]
        im_info = inputs[1]
        gt_boxes = inputs[2]
        gt_masks = inputs[3]
        seg_loss_gate = inputs[4]
        iter = inputs[5]
        image.set_shape([1, None, None, 3])
        im_info.set_shape([1, 3])
        if mode == 'TRAIN':
            gt_boxes.set_shape([None, 5])
        ##end of decode

        num_anchors = len(cfg.anchor_scales) * len(cfg.anchor_ratios)
        bottleneck = resnet_v1.bottleneck
        initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
        initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)

        blocks = [
            resnet_utils.Block('block1', bottleneck,
                               [(256, 64, 1, 1)] * 2 + [(256, 64, 2, 1)]),
            resnet_utils.Block('block2', bottleneck,
                               [(512, 128, 1, 1)] * 3 + [(512, 128, 2, 1)]),
            resnet_utils.Block('block3', bottleneck,
                               [(1024, 256, 1, 1)] * 5 + [(1024, 256, 2, 1)]),
            resnet_utils.Block('block4', bottleneck, [(2048, 512, 1, 1)] * 3)
        ]

        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            with tf.variable_scope('resnet_v1_50', 'resnet_v1_50'):
                net = resnet_utils.conv2d_same(image,
                                               64,
                                               7,
                                               stride=2,
                                               scope='conv1')
                net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
                net = slim.max_pool2d(net, [3, 3],
                                      stride=2,
                                      padding='VALID',
                                      scope='pool1')
            net, _ = resnet_v1.resnet_v1(net,
                                         blocks[0:1],
                                         global_pool=False,
                                         include_root_block=False,
                                         scope='resnet_v1_50')

        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            net2, _ = resnet_v1.resnet_v1(net,
                                          blocks[1:2],
                                          global_pool=False,
                                          include_root_block=False,
                                          scope='resnet_v1_50')
        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            net3, _ = resnet_v1.resnet_v1(net2,
                                          blocks[2:3],
                                          global_pool=False,
                                          include_root_block=False,
                                          scope='resnet_v1_50')
        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            net4, _ = resnet_v1.resnet_v1(net3,
                                          blocks[3:4],
                                          global_pool=False,
                                          include_root_block=False,
                                          scope='resnet_v1_50')

        namescope = tf.no_op(name='.').name[:-1]
        resnet_features_name = [
            namescope + 'resnet_v1_50_1/block1/unit_2/bottleneck_v1/Relu:0',
            namescope + 'resnet_v1_50_2/block2/unit_3/bottleneck_v1/Relu:0',
            namescope + 'resnet_v1_50_3/block3/unit_5/bottleneck_v1/Relu:0',
            namescope + 'resnet_v1_50_4/block4/unit_3/bottleneck_v1/Relu:0'
        ]

        resnet_features = []
        for i in range(len(resnet_features_name)):
            resnet_features.append(tf.get_default_graph().get_tensor_by_name(
                resnet_features_name[i]))

        mid_channels = 256

        with tf.variable_scope('resnet_v1_50',
                               'resnet_v1_50',
                               regularizer=tf.contrib.layers.l2_regularizer(
                                   cfg.TRAIN.WEIGHT_DECAY)):
            finer = slim.conv2d(resnet_features[-1],
                                mid_channels, [1, 1],
                                trainable=is_training,
                                weights_initializer=initializer,
                                activation_fn=None,
                                scope='pyramid/res5')
            pyramid_features = [finer]
            for i in range(4, 1, -1):
                lateral = slim.conv2d(resnet_features[i - 2],
                                      mid_channels, [1, 1],
                                      trainable=is_training,
                                      weights_initializer=initializer,
                                      activation_fn=None,
                                      scope='lateral/res{}'.format(i))
                upsample = tf.image.resize_bilinear(
                    finer, (tf.shape(lateral)[1], tf.shape(lateral)[2]),
                    name='upsample/res{}'.format(i))
                finer = upsample + lateral
                pyramid = slim.conv2d(finer,
                                      mid_channels, [3, 3],
                                      trainable=is_training,
                                      weights_initializer=initializer,
                                      activation_fn=None,
                                      scope='pyramid/res{}'.format(i))
                pyramid_features.append(pyramid)
            pyramid_features.reverse()
            pyramid = slim.avg_pool2d(pyramid_features[-1], [2, 2],
                                      stride=2,
                                      padding='SAME',
                                      scope='pyramid/res6')
            pyramid_features.append(pyramid)
        # pyramid_features downsampling rate:   4, 8, 16, 32, 64

        allowed_borders = [16, 32, 64, 128, 256]
        feat_strides = np.array([4, 8, 16, 32, 64])
        anchor_scaleses = np.array([[1], [2], [4], [8], [16]])

        with tf.variable_scope('resnet_v1_50',
                               'resnet_v1_50',
                               regularizer=tf.contrib.layers.l2_regularizer(
                                   cfg.TRAIN.WEIGHT_DECAY)) as scope:
            num_anchors = len(cfg.anchor_ratios)
            rpn_cls_prob_pyramid = []
            rpn_bbox_pred_pyramid = []
            anchors_pyramid = []
            rpn_cls_score_reshape_pyramid = []

            rpn_label_pyramid = []
            labels_cat_pyramid = []
            rpn_bbox_targets_pyramid = []
            rpn_bbox_inside_weights_pyramid = []
            rpn_bbox_outside_weights_pyramid = []

            with tf.variable_scope('resnet_v1_50_rpn',
                                   'resnet_v1_50_rpn') as scope:
                for i, pyramid_feature in enumerate(pyramid_features):
                    with tf.variable_scope('anchor/res{}'.format(i + 2)):
                        shape = tf.shape(pyramid_feature)
                        height, width = shape[1], shape[2]
                        anchors, _ = tf.py_func(generate_anchors_pre, [
                            height, width, feat_strides[i], anchor_scaleses[i],
                            cfg.anchor_ratios
                        ], [tf.float32, tf.int32])

                    # rpn
                    rpn = slim.conv2d(pyramid_feature,
                                      512, [3, 3],
                                      trainable=is_training,
                                      weights_initializer=initializer,
                                      activation_fn=nn_ops.relu,
                                      scope='rpn_conv')
                    # head
                    rpn_cls_score = slim.conv2d(
                        rpn,
                        num_anchors * 2, [3, 3],
                        trainable=is_training,
                        weights_initializer=initializer,
                        activation_fn=None,
                        scope='rpn_cls_score')
                    rpn_cls_score_reshape = tf.reshape(
                        rpn_cls_score, [-1, 2],
                        name='rpn_cls_score_reshape/res{}'.format(i + 2))
                    rpn_cls_prob = tf.nn.softmax(
                        rpn_cls_score_reshape,
                        name="rpn_cls_prob_reshape/res{}".format(i + 2))
                    rpn_bbox_pred = slim.conv2d(
                        rpn,
                        num_anchors * 4, [3, 3],
                        trainable=is_training,
                        weights_initializer=initializer,
                        activation_fn=None,
                        scope='rpn_bbox_pred')
                    rpn_bbox_pred = tf.reshape(rpn_bbox_pred, [-1, 4])

                    # share rpn
                    scope.reuse_variables()

                    rpn_cls_prob_pyramid.append(rpn_cls_prob)
                    rpn_bbox_pred_pyramid.append(rpn_bbox_pred)
                    anchors_pyramid.append(anchors)
                    rpn_cls_score_reshape_pyramid.append(rpn_cls_score_reshape)

                    if is_training:
                        with tf.variable_scope(
                                'anchors_targets/res{}'.format(i + 2)):
                            rpn_labels, rpn_bbox_targets, \
                            rpn_bbox_inside_weights, rpn_bbox_outside_weights, labels_cat, gt_id = \
                                tf.py_func(
                                    anchor_target_layer,
                                    [rpn_cls_score, gt_boxes, im_info,
                                     feat_strides[i], anchors, num_anchors, gt_masks],
                                    [tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.int64])
                            rpn_labels = tf.to_int32(
                                rpn_labels, name="to_int32")  # (1, H, W, A)
                            labels_cat = tf.to_int32(
                                labels_cat, name="to_int32")  # (1, H, W, A)

                            rpn_labels = tf.reshape(rpn_labels, [-1])
                            labels_cat = tf.reshape(labels_cat, [-1])
                            rpn_bbox_targets = tf.reshape(
                                rpn_bbox_targets, [-1, 4])
                            rpn_bbox_inside_weights = tf.reshape(
                                rpn_bbox_inside_weights, [-1, 4])
                            rpn_bbox_outside_weights = tf.reshape(
                                rpn_bbox_outside_weights, [-1, 4])

                        rpn_label_pyramid.append(rpn_labels)
                        labels_cat_pyramid.append(labels_cat)
                        rpn_bbox_targets_pyramid.append(rpn_bbox_targets)
                        rpn_bbox_inside_weights_pyramid.append(
                            rpn_bbox_inside_weights)
                        rpn_bbox_outside_weights_pyramid.append(
                            rpn_bbox_outside_weights)

            rpn_cls_prob_pyramid = tf.concat(axis=0,
                                             values=rpn_cls_prob_pyramid)
            rpn_bbox_pred_pyramid = tf.concat(axis=0,
                                              values=rpn_bbox_pred_pyramid)
            anchors_pyramid = tf.concat(axis=0, values=anchors_pyramid)
            rpn_cls_score_reshape_pyramid = tf.concat(
                axis=0, values=rpn_cls_score_reshape_pyramid)

        with tf.variable_scope('rois') as scope:
            rpn_cls_prob_bg = rpn_cls_prob_pyramid[:, 0]
            rpn_cls_prob_fg = 1 - rpn_cls_prob_bg

            rpn_proposals, rpn_proposal_scores, \
            rpn_proposals_addone, keep_pre = tf.py_func(
                proposal_without_nms_layer,
                [rpn_cls_prob_fg, rpn_bbox_pred_pyramid,
                 im_info, anchors_pyramid],
                [tf.float32, tf.float32, tf.float32, tf.int64])

            rpn_cls_prob_pyramid = tf.gather(rpn_cls_prob_pyramid, keep_pre)

            keep = tf.image.non_max_suppression(
                rpn_proposals_addone,
                rpn_proposal_scores,
                cfg.TRAIN.RPN_POST_NMS_TOP_N,
                iou_threshold=cfg.TRAIN.RPN_NMS_THRESH)
            bbox_pred = tf.gather(rpn_proposals, keep)
            roi_scores = tf.gather(rpn_proposal_scores, keep)
            anchors_pyramid = tf.gather(anchors_pyramid, keep)
            rpn_cls_prob_pyramid = tf.gather(rpn_cls_prob_pyramid, keep)

        with tf.variable_scope('seg',
                               'seg',
                               regularizer=tf.contrib.layers.l2_regularizer(
                                   cfg.TRAIN.WEIGHT_DECAY)):
            x = pyramid_features[1]
            seg_pred = slim.conv2d(x,
                                   128, [3, 3],
                                   trainable=is_training,
                                   weights_initializer=initializer,
                                   scope="pixel_seg_conv_1")
            # br = slim.conv2d(
            #     x, 256, [3, 3], trainable=is_training,
            #     weights_initializer=initializer, scope="pixel_seg_conv_1")
            # br = slim.conv2d(
            #     br, 256, [3, 3], trainable=is_training,
            #     weights_initializer=initializer, scope="pixel_seg_conv_2")
            # x += br
            # br = slim.conv2d(
            #     x, 256, [3, 3], trainable=is_training,
            #     weights_initializer=initializer, scope="pixel_seg_conv_3")
            # br = slim.conv2d(
            #     br, 256, [3, 3], trainable=is_training,
            #     weights_initializer=initializer, scope="pixel_seg_conv_4")
            # x += br
            # br = slim.conv2d(
            #     x, 256, [3, 3], trainable=is_training,
            #     weights_initializer=initializer, scope="pixel_seg_conv_5")
            # br = slim.conv2d(
            #     br, 256, [3, 3], trainable=is_training,
            #     weights_initializer=initializer, scope="pixel_seg_conv_6")
            # x += br

            # x = slim.conv2d_transpose(x, 256, [3, 3], [2, 2], "SAME", scope="pixel_seg_deconv_1")

            if is_training:
                # bbox_pred_seg = tf.concat([bbox_pred, gt_boxes[:, :4]], axis=0)
                bbox_pred_seg = gt_boxes[:, :4]
            else:
                bbox_pred_seg = bbox_pred
            num_proposals = tf.shape(bbox_pred_seg)[0]
            num_proposals = tf.stack([num_proposals])
            one = tf.constant([1], dtype=tf.int32)

            seg_pred_pyramid = tf.tile(
                seg_pred, tf.concat([num_proposals, one, one, one], axis=0))
            masks, bimasks = tf.py_func(generate_bimasks, [bbox_pred_seg],
                                        [tf.float32, tf.float32])
            masks.set_shape([None, None, None, None])
            masks = tf.stop_gradient(masks)
            bimasks.set_shape([None, None, None, None])
            bimasks = tf.stop_gradient(bimasks)

            seg_pred_pyramid = seg_pred_pyramid * bimasks
            x = seg_pred_pyramid

            x = tf.nn.max_pool(x,
                               ksize=[1, 3, 3, 1],
                               strides=[1, 1, 1, 1],
                               padding='SAME')

            br = slim.conv2d(x,
                             128, [3, 3],
                             trainable=is_training,
                             weights_initializer=initializer,
                             scope="final_conv_1")
            br = slim.conv2d(br,
                             128, [3, 3],
                             trainable=is_training,
                             weights_initializer=initializer,
                             scope="final_conv_2")
            x += br

            x = tf.nn.max_pool(x,
                               ksize=[1, 3, 3, 1],
                               strides=[1, 1, 1, 1],
                               padding='SAME')

            x = slim.conv2d(x,
                            64, [3, 3],
                            rate=2,
                            trainable=is_training,
                            weights_initializer=initializer,
                            scope="final_conv_3")

            br = slim.conv2d(x,
                             64, [3, 3],
                             trainable=is_training,
                             weights_initializer=initializer,
                             scope="final_conv_4")
            br = slim.conv2d(br,
                             64, [3, 3],
                             trainable=is_training,
                             weights_initializer=initializer,
                             scope="final_conv_5")
            x += br

            # x = tf.image.resize_bilinear(x, (40, 40))

            seg_pred_pyramid = slim.conv2d(x,
                                           2, [3, 3],
                                           trainable=is_training,
                                           weights_initializer=initializer,
                                           scope="final_conv_6")

            if is_training:
                labels_seg, = tf.py_func(generate_seg_gt,
                                         [bbox_pred_seg, gt_boxes, gt_masks],
                                         [tf.int32])

        if is_training:
            rpn_label_pyramid = tf.concat(axis=0, values=rpn_label_pyramid)
            labels_cat_pyramid = tf.concat(axis=0, values=labels_cat_pyramid)
            rpn_bbox_targets_pyramid = tf.concat(
                axis=0, values=rpn_bbox_targets_pyramid)
            rpn_bbox_inside_weights_pyramid = tf.concat(
                axis=0, values=rpn_bbox_inside_weights_pyramid)
            rpn_bbox_outside_weights_pyramid = tf.concat(
                axis=0, values=rpn_bbox_outside_weights_pyramid)

        ##############add prediction#####################
        tf.add_to_collection("rpn_cls_prob", rpn_cls_prob_pyramid)
        tf.add_to_collection("rpn_bbox_pred", bbox_pred)
        tf.add_to_collection("anchors", anchors_pyramid)
        tf.add_to_collection("seg_pred_pyramid", seg_pred_pyramid)

        if is_training:
            with tf.variable_scope('loss') as scope:
                #############rpn loss################
                rpn_cls_score = rpn_cls_score_reshape_pyramid
                rpn_label = rpn_label_pyramid
                rpn_select = tf.where(tf.not_equal(rpn_label, -1))
                rpn_cls_score = tf.reshape(
                    tf.gather(rpn_cls_score, rpn_select), [-1, 2])
                labels_cat = labels_cat_pyramid
                labels_cat = tf.reshape(tf.gather(labels_cat, rpn_select),
                                        [-1])

                inds_pos = tf.where(tf.not_equal(labels_cat, 0))
                inds_neg = tf.where(tf.equal(labels_cat, 0))

                rpn_cls_score_pos = tf.reshape(
                    tf.gather(rpn_cls_score, inds_pos), [-1, 2])
                rpn_cls_score_neg = tf.reshape(
                    tf.gather(rpn_cls_score, inds_neg), [-1, 2])
                labels_cat_pos = tf.reshape(tf.gather(labels_cat, inds_pos),
                                            [-1])
                labels_cat_neg = tf.reshape(tf.gather(labels_cat, inds_neg),
                                            [-1])

                rpn_cross_entropy_pos = tf.reduce_mean(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=rpn_cls_score_pos, labels=labels_cat_pos))
                rpn_cross_entropy_neg = softmax_loss_ohem(
                    rpn_cls_score_neg, labels_cat_neg, 256)

                rpn_cross_entropy_pos *= 0.3
                rpn_cross_entropy_neg *= 0.3

                bPos = tf.shape(inds_pos)[0] > 0
                zero = tf.constant(0.)
                rpn_cross_entropy_pos = tf.cond(bPos,
                                                lambda: rpn_cross_entropy_pos,
                                                lambda: zero)

                masks = masks[:, :, :, 0]

                seg_loss = tf.nn.softmax_cross_entropy_with_logits(
                    logits=seg_pred_pyramid, labels=labels_seg)

                seg_loss *= masks
                sum_mask = tf.reduce_sum(masks)
                bPos = sum_mask > 1

                seg_loss = tf.reduce_sum(seg_loss) / sum_mask
                # seg_loss = tf.cond(bPos, lambda: seg_loss, lambda: zero)
                # seg_loss *= seg_loss_gate

                rpn_cross_entropy = rpn_cross_entropy_pos + rpn_cross_entropy_neg

                rpn_loss_box = smooth_l1_loss_valid(
                    rpn_bbox_pred_pyramid,
                    rpn_bbox_targets_pyramid,
                    rpn_bbox_inside_weights_pyramid,
                    rpn_bbox_outside_weights_pyramid,
                    labels_cat_pyramid,
                    sigma=cfg.simga_rpn,
                    dim=[0])

                loss_wd = sum(
                    tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

                loss = rpn_cross_entropy + rpn_loss_box + seg_loss + loss_wd

                tf.add_to_collection('rpn_cross_entropy_pos',
                                     rpn_cross_entropy_pos)
                tf.add_to_collection('rpn_cross_entropy_neg',
                                     rpn_cross_entropy_neg)
                tf.add_to_collection('rpn_cross_entropy', rpn_cross_entropy)
                tf.add_to_collection('rpn_loss_box', rpn_loss_box)
                tf.add_to_collection('rpn_loss_seg', seg_loss)
                tf.add_to_collection('loss_wd', loss_wd)
                tf.add_to_collection('total_loss', loss)

            return loss