def resnet_v1_50(inputs, num_classes=None, is_training=True, global_pool=True, output_stride=None, include_root_block=True, spatial_squeeze=True, reuse=None, scope='resnet_v1_50'): blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] return resnet_v1(inputs, blocks, num_classes=num_classes, is_training=is_training, global_pool=global_pool, output_stride=output_stride, include_root_block=include_root_block, spatial_squeeze=spatial_squeeze, reuse=reuse, scope=scope)
def testEndPointsV2(self): """Test the end points of a tiny v2 bottleneck network.""" bottleneck = resnet_v2.bottleneck blocks = [ resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]), resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 1)]) ] inputs = create_test_input(2, 32, 16, 3) with slim.arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = self._resnet_plain(inputs, blocks, scope='tiny') expected = [ 'tiny/block1/unit_1/bottleneck_v2/shortcut', 'tiny/block1/unit_1/bottleneck_v2/conv1', 'tiny/block1/unit_1/bottleneck_v2/conv2', 'tiny/block1/unit_1/bottleneck_v2/conv3', 'tiny/block1/unit_2/bottleneck_v2/conv1', 'tiny/block1/unit_2/bottleneck_v2/conv2', 'tiny/block1/unit_2/bottleneck_v2/conv3', 'tiny/block2/unit_1/bottleneck_v2/shortcut', 'tiny/block2/unit_1/bottleneck_v2/conv1', 'tiny/block2/unit_1/bottleneck_v2/conv2', 'tiny/block2/unit_1/bottleneck_v2/conv3', 'tiny/block2/unit_2/bottleneck_v2/conv1', 'tiny/block2/unit_2/bottleneck_v2/conv2', 'tiny/block2/unit_2/bottleneck_v2/conv3' ] self.assertItemsEqual(expected, end_points)
def resnet_v1_200(inputs, num_classes=None, is_training=True, global_pool=True, output_stride=None, spatial_squeeze=True, reuse=None, scope='resnet_v1_200'): """ResNet-200 model of [2]. See resnet_v1() for arg and return description.""" blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]), resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] return resnet_v1(inputs, blocks, num_classes, is_training, global_pool=global_pool, output_stride=output_stride, include_root_block=True, spatial_squeeze=spatial_squeeze, reuse=reuse, scope=scope)
def _resnet_small(self, inputs, num_classes=None, is_training=True, global_pool=True, output_stride=None, include_root_block=True, reuse=None, scope='resnet_v2_small'): """A shallow and thin ResNet v2 for faster tests.""" bottleneck = resnet_v2.bottleneck blocks = [ resnet_utils.Block('block1', bottleneck, [(4, 1, 1)] * 2 + [(4, 1, 2)]), resnet_utils.Block('block2', bottleneck, [(8, 2, 1)] * 2 + [(8, 2, 2)]), resnet_utils.Block('block3', bottleneck, [(16, 4, 1)] * 2 + [(16, 4, 2)]), resnet_utils.Block('block4', bottleneck, [(32, 8, 1)] * 2) ] return resnet_v2.resnet_v2(inputs, blocks, num_classes, is_training=is_training, global_pool=global_pool, output_stride=output_stride, include_root_block=include_root_block, reuse=reuse, scope=scope)
def resnet_v1_50(inputs, num_classes=None, is_training=True, global_pool=True, output_stride=None, reuse=None, scope='resnet_v1_50'): """ResNet-50 model of [1]. See resnet_v1() for arg and return description. [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun Deep Residual Learning for Image Recognition. arXiv:1512.03385 """ blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] return resnet_v1(inputs, blocks, num_classes, is_training, global_pool=global_pool, output_stride=output_stride, include_root_block=True, reuse=reuse, scope=scope)
def resnet_frcnn(inputs, rois=None, global_pool=True, reuse=None, fc_layers=True, scope='resnet_v1_50'): blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 3), resnet_utils.Block('block2', bottleneck, [(512, 128, 2)] + [(512, 128, 1)] * 3), resnet_utils.Block('block3', bottleneck, [(1024, 256, 2)] + [(1024, 256, 1)] * 5), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] + [(2048, 512, 1)] * 2) ] if rois is None: log.warning("No RoI transmitted, recreating normal ResNet") if not fc_layers: blocks = blocks[:-1] global_pool = False else: blocks = blocks[:-1] + [ resnet_utils.Block('block4', bottleneck, [(2048, 512, 2)] + [(2048, 512, 1)] * 2) ] net, endpoints = resnet_v1.resnet_v1(inputs, blocks, global_pool=global_pool, reuse=reuse, scope=scope) else: if not fc_layers: raise NotImplementedError net = inputs net, ep1 = resnet_v1.resnet_v1(net, blocks[:-1], global_pool=False, reuse=reuse, scope=scope) z = tf.zeros(tf.stack([tf.shape(rois)[0]]), dtype=tf.int32) net = tf.image.crop_and_resize(net, rois, z, [7, 7], name="roi_warping") net, ep2 = resnet_v1.resnet_v1(net, blocks[-1:], global_pool=global_pool, include_root_block=False, reuse=reuse, scope=scope) if global_pool: net = slim.flatten(net) endpoints = ep1.copy() endpoints.update(ep2) # endpoints = {**ep1, **ep2} # python3.5, fix it when we ditch fedora return net, endpoints
def resnet_v1_152(inputs, num_classes=None, global_pool=True, output_stride=None, reuse=None, scope='resnet_v1_152'): """ResNet-152 model of [1]. See resnet_v1() for arg and return description.""" blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]), resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] return resnet_v1( inputs, blocks, num_classes, global_pool, output_stride, include_root_block=True, reuse=reuse, scope=scope)
def resnet_v1_101(inputs, num_classes=None, is_training=True, global_pool=False, output_stride=None, reuse=None, scope='resnet_v1_101'): """ResNet-101 model of [1].""" blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] return resnet_v1(inputs, blocks, num_classes, is_training, global_pool=global_pool, output_stride=output_stride, include_root_block=True, reuse=reuse, scope=scope)
def resnet_v1_101(inputs, num_classes=None, is_training=True, global_pool=True, reuse=None, noise_fn=None, scope='resnet_v1_101'): """ResNet-101 model of [1]. See resnet_v1() for arg and return description.""" blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] return resnet_v1(inputs, blocks, num_classes, is_training, global_pool=global_pool, include_root_block=True, reuse=reuse, noise_fn=noise_fn, scope=scope)
def create_trunk(self, images): red, green, blue = tf.split(images * 255, 3, axis=3) images = tf.concat([blue, green, red], 3) - MEAN_COLOR with slim.arg_scope( resnet_v1.resnet_arg_scope(is_training=self.training, weight_decay=self.weight_decay, batch_norm_decay=args.bn_decay)): blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 3), resnet_utils.Block('block2', bottleneck, [(512, 128, 2)] + [(512, 128, 1)] * 3), resnet_utils.Block('block3', bottleneck, [(1024, 256, 2)] + [(1024, 256, 1)] * self.num_block3), resnet_utils.Block('block4', bottleneck, [(2048, 512, 2)] + [(2048, 512, 1)] * 2) ] net, endpoints = resnet_v1.resnet_v1(images, blocks, global_pool=False, reuse=self.reuse, scope=self.scope) self.outputs = endpoints self.add_extra_layers(net)
def resnet_v2_26_2(inputs, num_classes=None, is_training=True, global_pool=True, output_stride=None, reuse=None, scope='resnet_v2_26_2'): """ResNet-50 model of [1]. See resnet_v2() for arg and return description.""" k = 2 blocks = [ resnet_utils.Block('block1', bottleneck, [(256 * k, 64 * k, 1)] + [(256 * k, 64 * k, 2)]), resnet_utils.Block('block2', bottleneck, [(512 * k, 128 * k, 1)] + [(512 * k, 128 * k, 2)]), resnet_utils.Block('block3', bottleneck, [(1024 * k, 256 * k, 1)] + [(1024 * k, 256 * k, 2)]), resnet_utils.Block('block4', bottleneck, [(2048 * k, 512 * k, 1)] * 2) ] return resnet_v2(inputs, blocks, num_classes, is_training=is_training, global_pool=global_pool, output_stride=output_stride, include_root_block=True, reuse=reuse, scope=scope)
def _atrousValues(self, bottleneck): """Verify the values of dense feature extraction by atrous convolution. Make sure that dense feature extraction by stack_blocks_dense() followed by subsampling gives identical results to feature extraction at the nominal network output stride using the simple self._stack_blocks_nondense() above. Args: bottleneck: The bottleneck function. """ blocks = [ resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]), resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 2)]), resnet_utils.Block('block3', bottleneck, [(16, 4, 1), (16, 4, 2)]), resnet_utils.Block('block4', bottleneck, [(32, 8, 1), (32, 8, 1)]) ] nominal_stride = 8 # Test both odd and even input dimensions. height = 30 width = 31 with slim.arg_scope(resnet_utils.resnet_arg_scope()): with slim.arg_scope([slim.batch_norm], is_training=False): for output_stride in [1, 2, 4, 8, None]: with tf.Graph().as_default(): with self.test_session() as sess: tf.set_random_seed(0) inputs = create_test_input(1, height, width, 3) # Dense feature extraction followed by subsampling. output = resnet_utils.stack_blocks_dense( inputs, blocks, output_stride) if output_stride is None: factor = 1 else: factor = nominal_stride // output_stride output = resnet_utils.subsample(output, factor) # Make the two networks use the same weights. tf.get_variable_scope().reuse_variables() # Feature extraction at the nominal network rate. expected = self._stack_blocks_nondense( inputs, blocks) sess.run(tf.global_variables_initializer()) output, expected = sess.run([output, expected]) self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4)
def resnet_v1_block(scope, base_depth, num_units, stride): """Helper function for creating a resnet_v1 bottleneck block. 帮助函数用于创建一个resnet_v1的瓶颈块 Args: scope: The scope of the block. 块的scope base_depth: The depth of the bottleneck layer for each unit. 每个单元瓶颈层的深度 num_units: The number of units in the block. 块中单元的数量 stride: The stride of the block, implemented as a stride in the last unit. All other units have stride=1. 块的步长,被用于在最后一个单元。其它所有单元的的stride=1 Returns: A resnet_v1 bottleneck block. 一个resnet_v1残差快 """ return resnet_utils.Block(scope, bottleneck, [{ 'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': 1 }] * (num_units - 1) + [{ 'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': stride }])
def resnet_v1_block(scope, base_depth, num_units, stride): return utils.Block(scope, bottleneck, [{ 'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': 1 }] * (num_units - 1) + [{ 'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': stride }])
def resnext_v2_block(self, scope, base_depth, num_units, stride): return resnet_utils.Block( scope, self.bottle_x_neck, [{ 'depth': base_depth * 2, 'depth_bottleneck': base_depth, 'stride': 1 }] * (num_units - 1) + [{ 'depth': base_depth * 2, 'depth_bottleneck': base_depth, 'stride': stride }])
def resnet_v2_block(self, scope, base_depth, num_units, stride): """Helper function for creating a resnet_v2 bottleneck block. Args: scope: The scope of the block. base_depth: The depth of the bottleneck layer for each unit. num_units: The number of units in the block. stride: The stride of the block, implemented as a stride in the last unit. All other units have stride=1. Returns: A resnet_v2 bottleneck block. """ if 'block4' in scope: return resnet_utils.Block( scope, self.bottleneck, [{ 'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': 1, 'rate': 1, 'deformable': self.deformable, 'attention_option': self.attention_option }] * num_units) else: return resnet_utils.Block( scope, self.bottleneck, [{ 'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': 1 }] * (num_units - 1) + [{ 'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': stride }])
def resnet_v2_block(scope, base_depth, num_units, stride): """Helper function for creating a resnet_v2 bottleneck block. Args: scope: The scope of the block. base_depth: The depth of the bottleneck layer for each unit. num_units: The number of units in the block. stride: The stride of the block, implemented as a stride in the last unit. All other units have stride=1. Returns: A resnet_v2 bottleneck block. """ return resnet_utils.Block( scope=scope, unit_fn=bottleneck, [{'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': 1}] * (num_units - 1) + [{'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': stride}] )
def resnet_v2_block(scope, base_depth, num_planes, stride): """ Args: scope: The scope of the block base_depth: The depth of bottleneck layer for each unit num_planes: the number of planes in the block stride: The stride of the block, implemented as a stride in the last unit All other stride is 1 Returns: A resnet_v2 bottleneck block object """ return ru.Block(scope, bottleneck, [{ 'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': 1 }] * (num_planes - 1) + [{ 'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': stride }])
def add_extra_layers(self, net): with slim.arg_scope( resnet_v1.resnet_arg_scope(is_training=self.training, weight_decay=self.weight_decay, batch_norm_decay=args.bn_decay)): block_depth = 2 num_fm = 2048 ''''' blocks = [ resnet_utils.Block( 'block5', bottleneck, [(num_fm, num_fm//4, 2)] + [(num_fm, num_fm//4, 1)] * (block_depth-1)), resnet_utils.Block( 'block6', bottleneck, [(num_fm, num_fm//4, 2)] + [(num_fm, num_fm//4, 1)] * (block_depth-1)), resnet_utils.Block( 'block7', bottleneck, [(num_fm, num_fm//4, 2)] + [(num_fm, num_fm//4, 1)] * (block_depth-1)), ] ''' blocks = [ resnet_utils.Block( 'block5', bottleneck, [(num_fm // 2, num_fm // 2, 2)] + [(num_fm // 2, num_fm // 2, 1)] * (block_depth - 1)), resnet_utils.Block( 'block6', bottleneck, [(num_fm // 2, num_fm // 2, 2)] + [(num_fm // 2, num_fm // 2, 1)] * (block_depth - 1)), resnet_utils.Block( 'block7', bottleneck, [(num_fm // 2, num_fm // 2, 1)] + [(num_fm // 2, num_fm // 2, 1)] * (block_depth - 1)), ] if args.image_size == 512: blocks += [ resnet_utils.Block( 'block8', bottleneck, [(num_fm, num_fm // 4, 2)] + [(num_fm, num_fm // 4, 1)] * (block_depth - 1)), ] net, endpoints = resnet_v1.resnet_v1(net, blocks, global_pool=False, include_root_block=False, reuse=self.reuse, scope=DEFAULT_SSD_SCOPE) self.outputs.update(endpoints) with tf.variable_scope(DEFAULT_SSD_SCOPE + "_back", reuse=self.reuse): end_points_collection = "reverse_ssd_end_points" #with slim.arg_scope([slim.conv2d, attention], #with slim.arg_scope([slim.conv2d, sub_pixel_skip], #with slim.arg_scope([slim.conv2d, noconcat], #with slim.arg_scope([slim.conv2d, bottleneck_skip], with slim.arg_scope([slim.conv2d, tail_att], outputs_collections=end_points_collection): top_fm = args.top_fm int_fm = top_fm // 4 if args.image_size == 512: # as long as the number of pooling layers is bigger due to # the higher resolution, an extra layer is appended #net = attention(net, self.outputs[DEFAULT_SSD_SCOPE + '/block7'], # top_fm, int_fm, scope='block_rev7') #net = sub_pixel_skip(net, self.outputs[DEFAULT_SSD_SCOPE + '/block7'], # top_fm, int_fm, scope='block_rev7') #net = noconcat(net, self.outputs[DEFAULT_SSD_SCOPE+'/block7'], # top_fm, int_fm, scope='block_rev7') #net = bottleneck_skip(net, self.outputs[DEFAULT_SSD_SCOPE+'/block7'], # top_fm, int_fm, scope='block_rev7') net = tail_att(net, self.outputs[DEFAULT_SSD_SCOPE + '/block7'], top_fm, int_fm, scope='block_rev7') ''''' net = attention(net, self.outputs[DEFAULT_SSD_SCOPE + '/block6'], top_fm, int_fm, scope='block_rev6') net = attention(net, self.outputs[DEFAULT_SSD_SCOPE + '/block5'], top_fm, int_fm, scope='block_rev5') net = attention(net, self.outputs[self.scope + '/block4'], top_fm, int_fm, scope='block_rev4') net = attention(net, self.outputs[self.scope + '/block3'], top_fm, int_fm, scope='block_rev3') net = attention(net, self.outputs[self.scope + '/block2'], top_fm, int_fm, scope='block_rev2') ''' ''''' net = sub_pixel_skip(net, self.outputs[DEFAULT_SSD_SCOPE + '/block6'], top_fm, int_fm, scope='block_rev6') net = sub_pixel_skip(net, self.outputs[DEFAULT_SSD_SCOPE + '/block5'], top_fm, int_fm, scope='block_rev5') net = sub_pixel_skip(net, self.outputs[self.scope + '/block4'], top_fm, int_fm, scope='block_rev4') net = sub_pixel_skip(net, self.outputs[self.scope + '/block3'], top_fm, int_fm, scope='block_rev3') net = sub_pixel_skip(net, self.outputs[self.scope + '/block2'], top_fm, int_fm, scope='block_rev2') ''' ''''' net = noconcat(net, self.outputs[DEFAULT_SSD_SCOPE + '/block6'], top_fm, int_fm, scope='block_rev6') net = noconcat(net, self.outputs[DEFAULT_SSD_SCOPE + '/block5'], top_fm, int_fm, scope='block_rev5') net = noconcat(net, self.outputs[self.scope + '/block4'], top_fm, int_fm, scope='block_rev4') net = noconcat(net, self.outputs[self.scope + '/block3'], top_fm, int_fm, scope='block_rev3') net = noconcat(net, self.outputs[self.scope + '/block2'], top_fm, int_fm, scope='block_rev2') ''' '' ''''' net = bottleneck_skip(net, self.outputs[DEFAULT_SSD_SCOPE+'/block6'], top_fm, int_fm, scope='block_rev6') net = bottleneck_skip(net, self.outputs[DEFAULT_SSD_SCOPE+'/block5'], top_fm, int_fm, scope='block_rev5') net = bottleneck_skip(net, self.outputs[self.scope+'/block4'], top_fm, int_fm, scope='block_rev4') net = bottleneck_skip(net, self.outputs[self.scope+'/block3'], top_fm, int_fm, scope='block_rev3') net = bottleneck_skip(net, self.outputs[self.scope+'/block2'], top_fm, int_fm, scope='block_rev2') ''' ''''' net = tail_att(net, self.outputs[DEFAULT_SSD_SCOPE + '/block6'], top_fm, int_fm, scope='block_rev6') net = tail_att(net, self.outputs[DEFAULT_SSD_SCOPE + '/block5'], top_fm, int_fm, scope='block_rev5') net = tail_att(net, self.outputs[self.scope + '/block4'], top_fm, int_fm, scope='block_rev4') net = tail_att(net, self.outputs[self.scope + '/block3'], top_fm, int_fm, scope='block_rev3') net = tail_att(net, self.outputs[self.scope + '/block2'], top_fm, int_fm, scope='block_rev2') ''' net = tail_att(net, self.outputs[DEFAULT_SSD_SCOPE + '/block6'], top_fm, top_fm, scope='block_rev6') net = tail_att(net, self.outputs[DEFAULT_SSD_SCOPE + '/block5'], top_fm, top_fm, scope='block_rev5') net = tail_att(net, self.outputs[self.scope + '/block4'], top_fm, top_fm, scope='block_rev4') net = tail_att(net, self.outputs[self.scope + '/block3'], top_fm, top_fm, scope='block_rev3') net = tail_att(net, self.outputs[self.scope + '/block2'], top_fm, top_fm, scope='block_rev2') if args.x4: # To provide stride 4 we add one more layer with upsampling #net = sub_pixel_skip(net, self.outputs[self.scope + '/block1'], # top_fm, int_fm, scope='block_rev1') #net = sub_pixel_skip(net, self.outputs[self.scope + '/block1'], # top_fm, int_fm, scope='block_rev1') #net = noconcat(net, self.outputs[self.scope+'/block1'], # top_fm, int_fm, scope='block_rev1') #net = bottleneck_skip(net, self.outputs[self.scope+'/block1'], # top_fm, int_fm, scope='block_rev1') net = tail_att(net, self.outputs[self.scope + '/block1'], top_fm, int_fm, scope='block_rev1') endpoints = slim.utils.convert_collection_to_dict( end_points_collection) self.outputs.update(endpoints) # Creating an output of spatial resolution 1x1 with conventional name 'pool6' if args.image_size == 512: self.outputs[DEFAULT_SSD_SCOPE+'/pool6'] =\ tf.reduce_mean(self.outputs['ssd_back/block_rev7/shortcut'], [1, 2], name='pool6', keep_dims=True) else: self.outputs[DEFAULT_SSD_SCOPE+'/pool6'] =\ tf.reduce_mean(self.outputs['ssd_back/block_rev6/shortcut'], [1, 2], name='pool6', keep_dims=True)
def inference(self, mode, inputs): is_training = mode == 'TRAIN' ###decode your inputs image = inputs[0] im_info = inputs[1] gt_boxes = inputs[2] gt_masks = inputs[3] seg_loss_gate = inputs[4] iter = inputs[5] image.set_shape([1, None, None, 3]) im_info.set_shape([1, 3]) if mode == 'TRAIN': gt_boxes.set_shape([None, 5]) ##end of decode num_anchors = len(cfg.anchor_scales) * len(cfg.anchor_ratios) bottleneck = resnet_v1.bottleneck initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1, 1)] * 2 + [(256, 64, 2, 1)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1, 1)] * 3 + [(512, 128, 2, 1)]), resnet_utils.Block('block3', bottleneck, [(1024, 256, 1, 1)] * 5 + [(1024, 256, 2, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1, 1)] * 3) ] with slim.arg_scope(resnet_arg_scope(is_training=is_training)): with tf.variable_scope('resnet_v1_50', 'resnet_v1_50'): net = resnet_utils.conv2d_same(image, 64, 7, stride=2, scope='conv1') net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]]) net = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='pool1') net, _ = resnet_v1.resnet_v1(net, blocks[0:1], global_pool=False, include_root_block=False, scope='resnet_v1_50') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net2, _ = resnet_v1.resnet_v1(net, blocks[1:2], global_pool=False, include_root_block=False, scope='resnet_v1_50') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net3, _ = resnet_v1.resnet_v1(net2, blocks[2:3], global_pool=False, include_root_block=False, scope='resnet_v1_50') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net4, _ = resnet_v1.resnet_v1(net3, blocks[3:4], global_pool=False, include_root_block=False, scope='resnet_v1_50') namescope = tf.no_op(name='.').name[:-1] resnet_features_name = [ namescope + 'resnet_v1_50_1/block1/unit_2/bottleneck_v1/Relu:0', namescope + 'resnet_v1_50_2/block2/unit_3/bottleneck_v1/Relu:0', namescope + 'resnet_v1_50_3/block3/unit_5/bottleneck_v1/Relu:0', namescope + 'resnet_v1_50_4/block4/unit_3/bottleneck_v1/Relu:0' ] resnet_features = [] for i in range(len(resnet_features_name)): resnet_features.append(tf.get_default_graph().get_tensor_by_name( resnet_features_name[i])) mid_channels = 256 with tf.variable_scope('resnet_v1_50', 'resnet_v1_50', regularizer=tf.contrib.layers.l2_regularizer( cfg.TRAIN.WEIGHT_DECAY)): finer = slim.conv2d(resnet_features[-1], mid_channels, [1, 1], trainable=is_training, weights_initializer=initializer, activation_fn=None, scope='pyramid/res5') pyramid_features = [finer] for i in range(4, 1, -1): lateral = slim.conv2d(resnet_features[i - 2], mid_channels, [1, 1], trainable=is_training, weights_initializer=initializer, activation_fn=None, scope='lateral/res{}'.format(i)) upsample = tf.image.resize_bilinear( finer, (tf.shape(lateral)[1], tf.shape(lateral)[2]), name='upsample/res{}'.format(i)) finer = upsample + lateral pyramid = slim.conv2d(finer, mid_channels, [3, 3], trainable=is_training, weights_initializer=initializer, activation_fn=None, scope='pyramid/res{}'.format(i)) pyramid_features.append(pyramid) pyramid_features.reverse() pyramid = slim.avg_pool2d(pyramid_features[-1], [2, 2], stride=2, padding='SAME', scope='pyramid/res6') pyramid_features.append(pyramid) # pyramid_features downsampling rate: 4, 8, 16, 32, 64 allowed_borders = [16, 32, 64, 128, 256] feat_strides = np.array([4, 8, 16, 32, 64]) anchor_scaleses = np.array([[1], [2], [4], [8], [16]]) with tf.variable_scope('resnet_v1_50', 'resnet_v1_50', regularizer=tf.contrib.layers.l2_regularizer( cfg.TRAIN.WEIGHT_DECAY)) as scope: num_anchors = len(cfg.anchor_ratios) rpn_cls_prob_pyramid = [] rpn_bbox_pred_pyramid = [] anchors_pyramid = [] rpn_cls_score_reshape_pyramid = [] rpn_label_pyramid = [] labels_cat_pyramid = [] rpn_bbox_targets_pyramid = [] rpn_bbox_inside_weights_pyramid = [] rpn_bbox_outside_weights_pyramid = [] with tf.variable_scope('resnet_v1_50_rpn', 'resnet_v1_50_rpn') as scope: for i, pyramid_feature in enumerate(pyramid_features): with tf.variable_scope('anchor/res{}'.format(i + 2)): shape = tf.shape(pyramid_feature) height, width = shape[1], shape[2] anchors, _ = tf.py_func(generate_anchors_pre, [ height, width, feat_strides[i], anchor_scaleses[i], cfg.anchor_ratios ], [tf.float32, tf.int32]) # rpn rpn = slim.conv2d(pyramid_feature, 512, [3, 3], trainable=is_training, weights_initializer=initializer, activation_fn=nn_ops.relu, scope='rpn_conv') # head rpn_cls_score = slim.conv2d( rpn, num_anchors * 2, [3, 3], trainable=is_training, weights_initializer=initializer, activation_fn=None, scope='rpn_cls_score') rpn_cls_score_reshape = tf.reshape( rpn_cls_score, [-1, 2], name='rpn_cls_score_reshape/res{}'.format(i + 2)) rpn_cls_prob = tf.nn.softmax( rpn_cls_score_reshape, name="rpn_cls_prob_reshape/res{}".format(i + 2)) rpn_bbox_pred = slim.conv2d( rpn, num_anchors * 4, [3, 3], trainable=is_training, weights_initializer=initializer, activation_fn=None, scope='rpn_bbox_pred') rpn_bbox_pred = tf.reshape(rpn_bbox_pred, [-1, 4]) # share rpn scope.reuse_variables() rpn_cls_prob_pyramid.append(rpn_cls_prob) rpn_bbox_pred_pyramid.append(rpn_bbox_pred) anchors_pyramid.append(anchors) rpn_cls_score_reshape_pyramid.append(rpn_cls_score_reshape) if is_training: with tf.variable_scope( 'anchors_targets/res{}'.format(i + 2)): rpn_labels, rpn_bbox_targets, \ rpn_bbox_inside_weights, rpn_bbox_outside_weights, labels_cat, gt_id = \ tf.py_func( anchor_target_layer, [rpn_cls_score, gt_boxes, im_info, feat_strides[i], anchors, num_anchors, gt_masks], [tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.int64]) rpn_labels = tf.to_int32( rpn_labels, name="to_int32") # (1, H, W, A) labels_cat = tf.to_int32( labels_cat, name="to_int32") # (1, H, W, A) rpn_labels = tf.reshape(rpn_labels, [-1]) labels_cat = tf.reshape(labels_cat, [-1]) rpn_bbox_targets = tf.reshape( rpn_bbox_targets, [-1, 4]) rpn_bbox_inside_weights = tf.reshape( rpn_bbox_inside_weights, [-1, 4]) rpn_bbox_outside_weights = tf.reshape( rpn_bbox_outside_weights, [-1, 4]) rpn_label_pyramid.append(rpn_labels) labels_cat_pyramid.append(labels_cat) rpn_bbox_targets_pyramid.append(rpn_bbox_targets) rpn_bbox_inside_weights_pyramid.append( rpn_bbox_inside_weights) rpn_bbox_outside_weights_pyramid.append( rpn_bbox_outside_weights) rpn_cls_prob_pyramid = tf.concat(axis=0, values=rpn_cls_prob_pyramid) rpn_bbox_pred_pyramid = tf.concat(axis=0, values=rpn_bbox_pred_pyramid) anchors_pyramid = tf.concat(axis=0, values=anchors_pyramid) rpn_cls_score_reshape_pyramid = tf.concat( axis=0, values=rpn_cls_score_reshape_pyramid) with tf.variable_scope('rois') as scope: rpn_cls_prob_bg = rpn_cls_prob_pyramid[:, 0] rpn_cls_prob_fg = 1 - rpn_cls_prob_bg rpn_proposals, rpn_proposal_scores, \ rpn_proposals_addone, keep_pre = tf.py_func( proposal_without_nms_layer, [rpn_cls_prob_fg, rpn_bbox_pred_pyramid, im_info, anchors_pyramid], [tf.float32, tf.float32, tf.float32, tf.int64]) rpn_cls_prob_pyramid = tf.gather(rpn_cls_prob_pyramid, keep_pre) keep = tf.image.non_max_suppression( rpn_proposals_addone, rpn_proposal_scores, cfg.TRAIN.RPN_POST_NMS_TOP_N, iou_threshold=cfg.TRAIN.RPN_NMS_THRESH) bbox_pred = tf.gather(rpn_proposals, keep) roi_scores = tf.gather(rpn_proposal_scores, keep) anchors_pyramid = tf.gather(anchors_pyramid, keep) rpn_cls_prob_pyramid = tf.gather(rpn_cls_prob_pyramid, keep) with tf.variable_scope('seg', 'seg', regularizer=tf.contrib.layers.l2_regularizer( cfg.TRAIN.WEIGHT_DECAY)): x = pyramid_features[1] seg_pred = slim.conv2d(x, 128, [3, 3], trainable=is_training, weights_initializer=initializer, scope="pixel_seg_conv_1") # br = slim.conv2d( # x, 256, [3, 3], trainable=is_training, # weights_initializer=initializer, scope="pixel_seg_conv_1") # br = slim.conv2d( # br, 256, [3, 3], trainable=is_training, # weights_initializer=initializer, scope="pixel_seg_conv_2") # x += br # br = slim.conv2d( # x, 256, [3, 3], trainable=is_training, # weights_initializer=initializer, scope="pixel_seg_conv_3") # br = slim.conv2d( # br, 256, [3, 3], trainable=is_training, # weights_initializer=initializer, scope="pixel_seg_conv_4") # x += br # br = slim.conv2d( # x, 256, [3, 3], trainable=is_training, # weights_initializer=initializer, scope="pixel_seg_conv_5") # br = slim.conv2d( # br, 256, [3, 3], trainable=is_training, # weights_initializer=initializer, scope="pixel_seg_conv_6") # x += br # x = slim.conv2d_transpose(x, 256, [3, 3], [2, 2], "SAME", scope="pixel_seg_deconv_1") if is_training: # bbox_pred_seg = tf.concat([bbox_pred, gt_boxes[:, :4]], axis=0) bbox_pred_seg = gt_boxes[:, :4] else: bbox_pred_seg = bbox_pred num_proposals = tf.shape(bbox_pred_seg)[0] num_proposals = tf.stack([num_proposals]) one = tf.constant([1], dtype=tf.int32) seg_pred_pyramid = tf.tile( seg_pred, tf.concat([num_proposals, one, one, one], axis=0)) masks, bimasks = tf.py_func(generate_bimasks, [bbox_pred_seg], [tf.float32, tf.float32]) masks.set_shape([None, None, None, None]) masks = tf.stop_gradient(masks) bimasks.set_shape([None, None, None, None]) bimasks = tf.stop_gradient(bimasks) seg_pred_pyramid = seg_pred_pyramid * bimasks x = seg_pred_pyramid x = tf.nn.max_pool(x, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME') br = slim.conv2d(x, 128, [3, 3], trainable=is_training, weights_initializer=initializer, scope="final_conv_1") br = slim.conv2d(br, 128, [3, 3], trainable=is_training, weights_initializer=initializer, scope="final_conv_2") x += br x = tf.nn.max_pool(x, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME') x = slim.conv2d(x, 64, [3, 3], rate=2, trainable=is_training, weights_initializer=initializer, scope="final_conv_3") br = slim.conv2d(x, 64, [3, 3], trainable=is_training, weights_initializer=initializer, scope="final_conv_4") br = slim.conv2d(br, 64, [3, 3], trainable=is_training, weights_initializer=initializer, scope="final_conv_5") x += br # x = tf.image.resize_bilinear(x, (40, 40)) seg_pred_pyramid = slim.conv2d(x, 2, [3, 3], trainable=is_training, weights_initializer=initializer, scope="final_conv_6") if is_training: labels_seg, = tf.py_func(generate_seg_gt, [bbox_pred_seg, gt_boxes, gt_masks], [tf.int32]) if is_training: rpn_label_pyramid = tf.concat(axis=0, values=rpn_label_pyramid) labels_cat_pyramid = tf.concat(axis=0, values=labels_cat_pyramid) rpn_bbox_targets_pyramid = tf.concat( axis=0, values=rpn_bbox_targets_pyramid) rpn_bbox_inside_weights_pyramid = tf.concat( axis=0, values=rpn_bbox_inside_weights_pyramid) rpn_bbox_outside_weights_pyramid = tf.concat( axis=0, values=rpn_bbox_outside_weights_pyramid) ##############add prediction##################### tf.add_to_collection("rpn_cls_prob", rpn_cls_prob_pyramid) tf.add_to_collection("rpn_bbox_pred", bbox_pred) tf.add_to_collection("anchors", anchors_pyramid) tf.add_to_collection("seg_pred_pyramid", seg_pred_pyramid) if is_training: with tf.variable_scope('loss') as scope: #############rpn loss################ rpn_cls_score = rpn_cls_score_reshape_pyramid rpn_label = rpn_label_pyramid rpn_select = tf.where(tf.not_equal(rpn_label, -1)) rpn_cls_score = tf.reshape( tf.gather(rpn_cls_score, rpn_select), [-1, 2]) labels_cat = labels_cat_pyramid labels_cat = tf.reshape(tf.gather(labels_cat, rpn_select), [-1]) inds_pos = tf.where(tf.not_equal(labels_cat, 0)) inds_neg = tf.where(tf.equal(labels_cat, 0)) rpn_cls_score_pos = tf.reshape( tf.gather(rpn_cls_score, inds_pos), [-1, 2]) rpn_cls_score_neg = tf.reshape( tf.gather(rpn_cls_score, inds_neg), [-1, 2]) labels_cat_pos = tf.reshape(tf.gather(labels_cat, inds_pos), [-1]) labels_cat_neg = tf.reshape(tf.gather(labels_cat, inds_neg), [-1]) rpn_cross_entropy_pos = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=rpn_cls_score_pos, labels=labels_cat_pos)) rpn_cross_entropy_neg = softmax_loss_ohem( rpn_cls_score_neg, labels_cat_neg, 256) rpn_cross_entropy_pos *= 0.3 rpn_cross_entropy_neg *= 0.3 bPos = tf.shape(inds_pos)[0] > 0 zero = tf.constant(0.) rpn_cross_entropy_pos = tf.cond(bPos, lambda: rpn_cross_entropy_pos, lambda: zero) masks = masks[:, :, :, 0] seg_loss = tf.nn.softmax_cross_entropy_with_logits( logits=seg_pred_pyramid, labels=labels_seg) seg_loss *= masks sum_mask = tf.reduce_sum(masks) bPos = sum_mask > 1 seg_loss = tf.reduce_sum(seg_loss) / sum_mask # seg_loss = tf.cond(bPos, lambda: seg_loss, lambda: zero) # seg_loss *= seg_loss_gate rpn_cross_entropy = rpn_cross_entropy_pos + rpn_cross_entropy_neg rpn_loss_box = smooth_l1_loss_valid( rpn_bbox_pred_pyramid, rpn_bbox_targets_pyramid, rpn_bbox_inside_weights_pyramid, rpn_bbox_outside_weights_pyramid, labels_cat_pyramid, sigma=cfg.simga_rpn, dim=[0]) loss_wd = sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) loss = rpn_cross_entropy + rpn_loss_box + seg_loss + loss_wd tf.add_to_collection('rpn_cross_entropy_pos', rpn_cross_entropy_pos) tf.add_to_collection('rpn_cross_entropy_neg', rpn_cross_entropy_neg) tf.add_to_collection('rpn_cross_entropy', rpn_cross_entropy) tf.add_to_collection('rpn_loss_box', rpn_loss_box) tf.add_to_collection('rpn_loss_seg', seg_loss) tf.add_to_collection('loss_wd', loss_wd) tf.add_to_collection('total_loss', loss) return loss