def _image_to_head(self, is_training, reuse=None): # Now the base is always fixed during training #with slim.arg_scope(resnet_arg_scope(is_training=False)): with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv = self._build_base() if FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv, _ = resnet_v1.resnet_v1(net_conv, self._blocks[0:FIXED_BLOCKS], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) if FIXED_BLOCKS < 3: with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv, _ = resnet_v1.resnet_v1( net_conv, self._blocks[FIXED_BLOCKS:divider], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) self._layers['head'] = net_conv return net_conv
def _image_to_head(self, is_training, reuse=None): assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 3) # Now the base is always fixed during training with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv = self._build_base() end_points = {} if cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv, end_point = resnet_v1.resnet_v1( net_conv, self._blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) end_points.update(end_point) if cfg.RESNET.FIXED_BLOCKS < 3: with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv, end_points = resnet_v1.resnet_v1( net_conv, self._blocks[cfg.RESNET.FIXED_BLOCKS:], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) end_points.update(end_point) self.end_points['block1'] = end_points[self._scope + '/' + 'block1'] self.end_points['block2'] = end_points[self._scope + '/' + 'block2'] self.end_points['block3'] = end_points[self._scope + '/' + 'block3'] self.end_points['block4'] = end_points[self._scope + '/' + 'block4'] self._act_summaries.append(net_conv) self._layers['head'] = net_conv return net_conv
def _image_to_head(self, is_training, reuse=None): assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 3) # Now the base is always fixed during training with slim.arg_scope(resnet_arg_scope(is_training=False)): # Build the first layer manually under self._scope # conv2d[same] -> max_pool2d -> net_conv = self._build_base() # Based on the number of blocks that are fixed in first layers create resnet_v1 model. # Number of fixed blocks during training, by default the first of all 4 blocks is fixed # Range: 0 (none) to 3 (all) if cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv, _ = resnet_v1.resnet_v1(net_conv, self._blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) if cfg.RESNET.FIXED_BLOCKS < 3: with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv, _ = resnet_v1.resnet_v1(net_conv, self._blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) # act_summaries accumulates all stages of the network together into a list. self._act_summaries.append(net_conv) self._layers['head'] = net_conv return net_conv
def res5(self, pool5_H, pool5_O, name): with slim.arg_scope(resnet_arg_scope(is_training=self.train)): fc7_H, _ = resnet_v1.resnet_v1(pool5_H, self.blocks[-2:-1], global_pool=False, include_root_block=False, reuse=False, scope=self.scope) fc7_H = tf.reduce_mean( pool5_H, axis=[1, 2]) # TensorShape([Dimension(None), Dimension(2048)]) fc7_O, _ = resnet_v1.resnet_v1( pool5_O, self.blocks[-1:], # note global_pool=False, include_root_block=False, reuse=False, scope=self.scope ) # TensorShape([Dimension(None), Dimension(2048)]) fc7_O = tf.reduce_mean(pool5_O, axis=[1, 2]) return fc7_H, fc7_O
def _get_block_2_conv_net(self, is_training, reuse=True): with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv = self._build_base(reuse=True) if cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv, _ = resnet_v1.resnet_v1( net_conv, self._blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) if cfg.RESNET.FIXED_BLOCKS < 3: with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv, _ = resnet_v1.resnet_v1( net_conv, self._blocks[cfg.RESNET.FIXED_BLOCKS:2], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) self._act_summaries.append(net_conv) return net_conv
def _image_to_head(self, is_training, reuse=None): assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 3) # Now the base is always fixed during training with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv = self._build_base() if cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv, _ = resnet_v1.resnet_v1(net_conv, self._blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) if cfg.RESNET.FIXED_BLOCKS < 3: with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv, _ = resnet_v1.resnet_v1(net_conv, self._blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) self._act_summaries.append(net_conv) self._layers['head'] = net_conv return net_conv
def res5_part(self, pool5_H, pool5_O, pool_part, is_training, name): with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7_H, _ = resnet_v1.resnet_v1(pool5_H, self.blocks[-2:-1], global_pool=False, include_root_block=False, reuse=False, scope=self.scope) fc7_H = tf.reduce_mean(fc7_H, axis=[1, 2]) fc7_O, _ = resnet_v1.resnet_v1(pool5_O, self.blocks[-1:], global_pool=False, include_root_block=False, reuse=False, scope=self.scope) fc7_part, _ = resnet_v1.resnet_v1(pool_part, self.blocks[-2:-1], global_pool=False, include_root_block=False, reuse=True, scope=self.scope) pool_part = tf.reduce_mean(fc7_part, axis=[1, 2]) fc7_O = tf.reduce_mean(fc7_O, axis=[1, 2]) return fc7_H, fc7_O, pool_part
def _image_to_head(self, is_training, reuse=None): assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 3) # 基准在训练的时候总是固定的 with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv = self._build_base() if cfg.RESNET.FIXED_BLOCKS > 0: with arg_scope(resnet_arg_scope(is_training=False)): net_conv, _ = resnet_v1.resnet_v1( net_conv, self._blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) if cfg.RESNET.FIXED_BLOCKS < 3: # 后面部分是可以设置训练的 with arg_scope(resnet_arg_scope(is_training=is_training)): net_conv, _ = resnet_v1.resnet_v1( net_conv, self._blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) self._act_summaries.append(net_conv) self._layers['head'] = net_conv return net_conv
def res5(self, pool5_H, pool5_O, sp, is_training, name): with slim.arg_scope(resnet_arg_scope(is_training=is_training)): if pool5_H is None: fc7_H = None else: fc7_H, _ = resnet_v1.resnet_v1(pool5_H, self.blocks[-2:-1], global_pool=False, include_root_block=False, reuse=tf.AUTO_REUSE, scope=self.scope) # fc7_H = tf.reduce_mean(fc7_H, axis=[1, 2]) if pool5_O is None: fc7_O = None else: fc7_O, _ = resnet_v1.resnet_v1(pool5_O, self.blocks[-1:], global_pool=False, include_root_block=False, reuse=tf.AUTO_REUSE, scope=self.scope) # fc7_O = tf.reduce_mean(fc7_O, axis=[1, 2]) return fc7_H, fc7_O
def _image_to_head(self, is_training, reuse=False): assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 3) # Now the base is always fixed during training with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv = self._build_base() if cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv, _ = resnet_v1.resnet_v1( net_conv, self._blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) if cfg.RESNET.FIXED_BLOCKS < 3: with slim.arg_scope( resnet_arg_scope(is_training=False) ): ################################################## net_conv, _ = resnet_v1.resnet_v1( net_conv, self._blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) self._act_summaries.append(net_conv) self._layers['head'] = net_conv return net_conv
def _image_to_head(self, is_training, reuse=None): assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 3) # Now the base is always fixed (freeze) during training # First build first few layers manually # than freeze some layers based on cfg setting # than train remain layers with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv = self._build_base() if cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv, _ = resnet_v1.resnet_v1( net_conv, self._blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) if cfg.RESNET.FIXED_BLOCKS < 3: with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv, _ = resnet_v1.resnet_v1( net_conv, self._blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) self._act_summaries.append(net_conv) self._layers['head'] = net_conv return net_conv
def _head_to_tail(self, pool5, pw_pool5, is_training, name="", reuse=None, ent_features_size=512, rel_features_size=512): with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7, _ = resnet_v1.resnet_v1(pool5, self._blocks[-1:], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope + "__new") fc7 = tf.layers.conv2d(fc7, ent_features_size, 1, reuse=reuse) pw_fc7, _ = resnet_v1.resnet_v1(pw_pool5, self._blocks[-1:], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope + '_pw') pw_fc7 = tf.layers.conv2d(pw_fc7, rel_features_size, 1, reuse=reuse, name="pw") # average pooling done by reduce_mean fc7 = tf.reduce_mean(fc7, axis=[1, 2]) pw_fc7 = tf.reduce_mean(pw_fc7, axis=[1, 2]) return fc7, pw_fc7
def build(self, inputs, input_pixel_size, is_training, scope='resnet_v1_101'): ## scope is important variable to set """ resnet args: inputs: a tensor of size [batch_size, height, width, channels]. input_pixel_size: size of the input (H x W) is_training: True for training, False for validation/testing. scope: Optional scope for the variables. Returns: The last op containing the log predictions and end_points dict. """ res_config = self.config fixed_block = res_config.fixed_block self._scope = scope import pudb pudb.set_trace() # XXX BREAKPOINT with slim.arg_scope(resnet_arg_scope(is_training=False)): net_base = self._build_base(inputs) blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=23, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv1, net_dict1 = resnet_v1.resnet_v1( net_base, blocks[0:fixed_block], global_pool=False, include_root_block=False, ## no resue scope=self._scope) with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv2, net_dict2 = resnet_v1.resnet_v1( net_conv1, blocks[fixed_block:], global_pool=False, include_root_block=False, scope=self._scope) feature_maps_out = net_conv2 # pyramid_fusion3 #pyramid_fusion1 #end_points = slim.utils.convert_collection_to_dict( # end_points_collection) return feature_maps_out, net_dict2 #end_points
def decoder(self, p_x): with tf.variable_scope('valid'): p_x, _ = resnet_v1.resnet_v1(p_x, self._blocks_decoder_valid, global_pool=False, include_root_block=False, scope=self._resnet_scope) with tf.variable_scope('same'): p_x, _ = resnet_v1.resnet_v1(p_x, self._blocks_decoder_same, global_pool=False, include_root_block=False, scope=self._resnet_scope) return p_x
def image_to_head(self, is_training): with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net, _ = resnet_v1.resnet_v1(net, self.blocks[0:cfg.RESNET.FIXED_BLOCKS], # now 1, block 1 global_pool=False, include_root_block=False, scope=self.scope) with slim.arg_scope(resnet_arg_scope(is_training=is_training)): head, _ = resnet_v1.resnet_v1(net, self.blocks[cfg.RESNET.FIXED_BLOCKS:-2], # now 1, block 2~3 global_pool=False, include_root_block=False, scope=self.scope) return head
def resnet_v1_50(inputs, num_classes=None, is_training=True, global_pool=True, output_stride=None, include_root_block=True, reuse=None, scope='resnet_v1_50'): """ ResNet-50 model of [1]. See resnet_v1() for arg and return description. (same as what's in slim library now but reversing the 1 stride to accommodate the unet model) """ blocks = [ resnet_v1.resnet_v1_block('block1', base_depth=64, num_units=3, stride=1), resnet_v1.resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1.resnet_v1_block('block3', base_depth=256, num_units=6, stride=2), resnet_v1.resnet_v1_block('block4', base_depth=512, num_units=3, stride=2), ] return resnet_v1.resnet_v1( inputs, blocks, num_classes, is_training=is_training, global_pool=global_pool, output_stride=output_stride, include_root_block=include_root_block, reuse=reuse, scope=scope)
def res5(self, pool5, is_training, reuse): with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7, _ = resnet_v1.resnet_v1(pool5, self.blocks[-2:-1], global_pool=False, include_root_block=False, reuse=reuse, scope=self.scope) fc7 = tf.reduce_mean(fc7, axis=[1, 2]) return fc7
def _head_to_tail(self, pool5, is_training, reuse=None): with arg_scope(resnet_arg_scope(is_training=is_training)): fc7, _ = resnet_v1.resnet_v1(pool5, self._blocks[-1:], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) fc7 = tf.reduce_mean(fc7, axis=[1, 2]) return fc7
def res5_ho(self, pool5_HO, is_training, name): with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7_HO, _ = resnet_v1.resnet_v1(pool5_HO, self.blocks[-2:-1], global_pool=False, include_root_block=False, reuse=tf.AUTO_REUSE, scope=self.scope) return fc7_HO
def resNet(images, is_training=True, reuse=False, scope=None): """Constructs network based on resnet_v1_50. Args: images: A tensor of size [batch, height, width, channels]. weight_decay: The parameters for weight_decay regularizer. is_training: Whether or not in training mode. reuse: Whether or not the layer and its variables should be reused. Returns: feature_map: Features extracted from the model, which are not l2-normalized. """ # Construct Resnet50 features. with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=0.0001)): block = resnet_v1.resnet_v1_block blocks = [ block('block1', base_depth=64, num_units=3, stride=2), block('block2', base_depth=128, num_units=4, stride=2), block('block3', base_depth=256, num_units=6, stride=1), block('block4', base_depth=512, num_units=3, stride=1) ] x30, end_points = resnet_v1.resnet_v1(images, blocks, is_training=is_training, global_pool=False, reuse=reuse, scope=scope, include_root_block=True) x60 = end_points[scope + '/block1'] x60 = slim.conv2d(x60, 64, [1, 1], 1, padding='SAME', activation_fn=None, reuse=reuse, scope='conv2d_final_x60') x30 = slim.conv2d(x30, 512, [1, 1], 1, padding='SAME', activation_fn=None, reuse=reuse, scope='conv2d_final_x30') # get layer outputs we want end_points_ = {} # end_points_ = end_points['resnet_v1_50/block2'] # end_points_ = end_points['resnet_v1_50/block3'] # end_points_ = end_points['resnet_v1_50/block4'] # end_points_['x30'] = end_points['resnet_v1_50/final'] end_points_['x60'] = x60 end_points_['x30'] = x30 return end_points_
def _image_to_head(self, is_training, reuse=None): assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 3) # Now the base is always fixed during training with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv, net_conv2 = self._build_base() if cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv, _ = resnet_v1.resnet_v1(net_conv, self._blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv2, _ = resnet_v1.resnet_v1(net_conv2, self._blocks2[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, reuse=reuse, scope=self._prev_scope) if cfg.RESNET.FIXED_BLOCKS < 3: with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv, _ = resnet_v1.resnet_v1(net_conv, self._blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) net_conv2, _ = resnet_v1.resnet_v1(net_conv2, self._blocks2[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, reuse=reuse, scope=self._prev_scope) self._layers['head'] = net_conv self._layers['head_prev'] = net_conv2 return net_conv, net_conv2
def build_network(self): # select initializers bottleneck = resnet_v1.bottleneck blocks = [resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)] assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4) if cfg.RESNET.FIXED_BLOCKS == 3: with slim.arg_scope(resnet_arg_scope(trainable=False)): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1(net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) elif cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(trainable=False)): net = self.build_base() net, _ = resnet_v1.resnet_v1(net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) with slim.arg_scope(resnet_arg_scope()): net_conv4, _ = resnet_v1.resnet_v1(net, blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) else: # cfg.RESNET.FIXED_BLOCKS == 0 with slim.arg_scope(resnet_arg_scope()): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1(net, blocks[0:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) return net_conv4
def _head_to_tail(self, pool5, is_training, reuse=None): with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7, _ = resnet_v1.resnet_v1(pool5, self._blocks[-1:], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) # average pooling done by reduce_mean fc7 = tf.reduce_mean(fc7, axis=[1, 2]) return fc7
def head_to_tail(self, inputs, reuse=None): with slim.arg_scope(resnet_arg_scope(trainable=self.trainable)): fc, _ = resnet_v1.resnet_v1(inputs, self.blocks[-1:], global_pool=False, include_root_block=False, reuse=reuse, scope=self.scope) # average pooling done by reduce_mean fc = tf.reduce_mean(fc, axis=[1, 2]) return fc
def _head_to_tail_hm(self, pool5_hm, is_training, reuse=False): with slim.arg_scope( resnet_arg_scope_bn_trainable(is_training=is_training)): fc7_hm, _ = resnet_v1.resnet_v1(pool5_hm, self._blocks_hm[-1:], global_pool=False, include_root_block=False, reuse=reuse, scope='hm/' + self._scope) # average pooling done by reduce_mean fc7_hm = tf.reduce_mean(fc7_hm, axis=[1, 2]) return fc7_hm
def image_to_head(self, is_training): print('image to head, ', cfg.RESNET.FIXED_BLOCKS) with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net, _ = resnet_v1.resnet_v1( net, self.blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, reuse=tf.AUTO_REUSE, scope=self.scope) with slim.arg_scope(resnet_arg_scope(is_training=is_training)): stop = -2 head, _ = resnet_v1.resnet_v1( net, self.blocks[cfg.RESNET.FIXED_BLOCKS:stop], global_pool=False, include_root_block=False, reuse=tf.AUTO_REUSE, scope=self.scope) return head
def _head_to_tail(self, pool5, is_training, reuse=None, average_pool=True): with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7, _ = resnet_v1.resnet_v1(pool5, self._blocks[-1:], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) if average_pool: # average pooling done by reduce_mean fc7 = tf.reduce_mean(fc7, axis=[1, 2]) return fc7
def _image_to_head(self, is_training, reuse=None): assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 3) # Now the base is always fixed during training with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv = self._build_base() if cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=True)): net_conv, _ = resnet_v1.resnet_v1(net_conv, self._blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) #self._feature_map = net_conv if cfg.RESNET.FIXED_BLOCKS < 3: with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv, endpoints = resnet_v1.resnet_v1(net_conv, self._blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) #self._feature_map = net_conv # print("res101 构建完毕") # # net_conv = slim.max_pool2d(net_conv, [3, 3], stride=2, scope='pool4') for key, value in endpoints.items(): # print("{} vs {}".format(key, value)) if key == self._scope+'/block2': self._feature_map = value print('获取特征图 {}'.format(self._feature_map)) break # self._feature_map = slim.repeat(self._feature_map, 1, slim.conv2d, 1024, [1, 1], trainable=True, scope='conv_feature') self._feature_map = slim.conv2d(self._feature_map, 1024, [1,1], stride=1, padding='SAME', activation_fn=tf.nn.relu) self._act_summaries.append(net_conv) self._layers['head'] = net_conv return net_conv
def image_to_head(self, is_training): with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net, _ = resnet_v1.resnet_v1(net, self.blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, reuse=tf.AUTO_REUSE, scope=self.scope) with slim.arg_scope(resnet_arg_scope(is_training=is_training)): if self.model_name.__contains__('unique_weights'): print("unique_weights3") stop = -3 else: stop = -2 head, _ = resnet_v1.resnet_v1(net, self.blocks[cfg.RESNET.FIXED_BLOCKS:stop], global_pool=False, include_root_block=False, reuse=tf.AUTO_REUSE, scope=self.scope) return head
def select(self, num_select, reuse=None): assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 3) # Now the base is always fixed during training with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv = self._build_base() if cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv, _ = resnet_v1.resnet_v1( net_conv, self._blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) if cfg.RESNET.FIXED_BLOCKS < 3: with slim.arg_scope(resnet_arg_scope(is_training=self.training)): net_conv, _ = resnet_v1.resnet_v1( net_conv, self._blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) net_conv = slim.conv2d(net_conv, num_outputs=128, kernel_size=[1, 1], padding="SAME", scope="c1_s1") net_conv = slim.conv2d(net_conv, num_outputs=num_select, kernel_size=[1, 1], padding="SAME", activation_fn=None, scope="c1_s2") # Selection; Find the selector index idx = tf.argmax(tf.nn.softmax(net_conv, -1), -1) return idx
def _image_to_head_hm(self, is_training, reuse=False): with slim.arg_scope( resnet_arg_scope_bn_trainable(is_training=is_training)): net_conv_hm = self._build_base_hm() net_conv_hm, _ = resnet_v1.resnet_v1(net_conv_hm, self._blocks_hm[:-1], global_pool=False, include_root_block=False, reuse=reuse, scope='hm/' + self._scope) self._act_summaries.append(net_conv_hm) self._layers['head_hm'] = net_conv_hm return net_conv_hm
def _head_to_tail(self, pool5, is_training, reuse=None): with slim.arg_scope(resnet_arg_scope(is_training=is_training)): # 在pool5后再增加一层resnet101的block4 fc7, _ = resnet_v1.resnet_v1(pool5, self._blocks[-1:], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) """ average pooling done by reduce_mean 进行一次average pooling,只不过kernel size为整个feature map,生成full-connected7。 """ fc7 = tf.reduce_mean(fc7, axis=[1, 2]) return fc7
def _resnet_small(self, inputs, num_classes=None, global_pool=True, output_stride=None, include_root_block=True, reuse=None, scope='resnet_v1_small'): """A shallow and thin ResNet v1 for faster tests.""" block = resnet_v1.resnet_v1_block blocks = [ block('block1', base_depth=1, num_units=3, stride=2), block('block2', base_depth=2, num_units=3, stride=2), block('block3', base_depth=4, num_units=3, stride=2), block('block4', base_depth=8, num_units=2, stride=1), ] return resnet_v1.resnet_v1(inputs, blocks, num_classes, global_pool, output_stride, include_root_block, reuse, scope)
def _resnet_small(self, inputs, num_classes=None, global_pool=True, output_stride=None, include_root_block=True, reuse=None, scope='resnet_v1_small'): """A shallow and thin ResNet v1 for faster tests.""" bottleneck = resnet_v1.bottleneck blocks = [ resnet_utils.Block('block1', bottleneck, [(4, 1, 1)] * 2 + [(4, 1, 2)]), resnet_utils.Block('block2', bottleneck, [(8, 2, 1)] * 2 + [(8, 2, 2)]), resnet_utils.Block('block3', bottleneck, [(16, 4, 1)] * 2 + [(16, 4, 2)]), resnet_utils.Block('block4', bottleneck, [(32, 8, 1)] * 2) ] return resnet_v1.resnet_v1(inputs, blocks, num_classes, global_pool, output_stride, include_root_block, reuse, scope)
def build_network(self, sess, is_training=True): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) bottleneck = resnet_v1.bottleneck blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4) if cfg.RESNET.FIXED_BLOCKS == 3: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net_conv5, _ = resnet_v1.resnet_v1(net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope='resnet_v1_101') elif cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net, _ = resnet_v1.resnet_v1(net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope='resnet_v1_101') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv5, _ = resnet_v1.resnet_v1(net, blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, scope='resnet_v1_101') else: # cfg.RESNET.FIXED_BLOCKS == 0 with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net = self.build_base() net_conv5, _ = resnet_v1.resnet_v1(net, blocks[0:-1], global_pool=False, include_root_block=False, scope='resnet_v1_101') self._act_summaries.append(net_conv5) self._layers['conv5_3'] = net_conv5 with tf.variable_scope('resnet_v1_101', 'resnet_v1_101', regularizer=tf.contrib.layers.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)): # build the anchors for the image self._anchor_component() # rpn rpn = slim.conv2d(net_conv5, 512, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3") self._act_summaries.append(rpn) rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') # change it so that the score has 2 as its channel size rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape') rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape") rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob") rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') if is_training: rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor") # Try to have a determinestic order for the computing graph, for reproducibility with tf.control_dependencies([rpn_labels]): rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois") else: if cfg.TEST.MODE == 'nms': rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") elif cfg.TEST.MODE == 'top': rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois") else: raise NotImplementedError # rcnn if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net_conv5, rois, "pool5") else: raise NotImplementedError with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7, _ = resnet_v1.resnet_v1(pool5, blocks[-1:], global_pool=False, include_root_block=False, scope='resnet_v1_101') with tf.variable_scope('resnet_v1_101', 'resnet_v1_101', regularizer=tf.contrib.layers.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)): # Average pooling done by reduce_mean fc7 = tf.reduce_mean(fc7, axis=[1, 2]) cls_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope='cls_score') cls_prob = self._softmax_layer(cls_score, "cls_prob") bbox_pred = slim.fully_connected(fc7, self._num_classes * 4, weights_initializer=initializer_bbox, trainable=is_training, activation_fn=None, scope='bbox_pred') self._predictions["rpn_cls_score"] = rpn_cls_score self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape self._predictions["rpn_cls_prob"] = rpn_cls_prob self._predictions["rpn_bbox_pred"] = rpn_bbox_pred self._predictions["cls_score"] = cls_score self._predictions["cls_prob"] = cls_prob self._predictions["bbox_pred"] = bbox_pred self._predictions["rois"] = rois self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def build_network(self, sess, is_training=True): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) bottleneck = resnet_v1.bottleneck # choose different blocks for different number of layers if self._num_layers == 50: blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=6, stride=2), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), ] elif self._num_layers == 101: blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=23, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), ] elif self._num_layers == 152: blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=8, stride=2), resnet_v1_block('block3', base_depth=256, num_units=36, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), ] else: # other numbers are not supported raise NotImplementedError assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4) if cfg.RESNET.FIXED_BLOCKS == 3: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1(net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) elif cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net, _ = resnet_v1.resnet_v1(net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv4, _ = resnet_v1.resnet_v1(net, blocks[cfg.RESNET.FIXED_BLOCKS:], global_pool=False, include_root_block=False, scope=self._resnet_scope) else: # cfg.RESNET.FIXED_BLOCKS == 0 with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1(net, blocks[0:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) self._act_summaries.append(net_conv4) self._layers['head'] = net_conv4 with tf.variable_scope(self._resnet_scope, self._resnet_scope): # build the anchors for the image self._anchor_component() # rpn rpn = slim.conv2d(net_conv4, 512, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3") self._act_summaries.append(rpn) rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') # change it so that the score has 2 as its channel size rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape') rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape") rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob") rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') if is_training: rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor") # Try to have a determinestic order for the computing graph, for reproducibility with tf.control_dependencies([rpn_labels]): rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois") else: if cfg.TEST.MODE == 'nms': rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") elif cfg.TEST.MODE == 'top': rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois") else: raise NotImplementedError # rcnn if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net_conv4, rois, "pool5") else: raise NotImplementedError with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7, _ = resnet_v1.resnet_v1(pool5, blocks[-1:], global_pool=False, include_root_block=False, scope=self._resnet_scope) with tf.variable_scope(self._resnet_scope, self._resnet_scope): # Average pooling done by reduce_mean fc7 = tf.reduce_mean(fc7, axis=[1, 2]) cls_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope='cls_score') cls_prob = self._softmax_layer(cls_score, "cls_prob") bbox_pred = slim.fully_connected(fc7, self._num_classes * 4, weights_initializer=initializer_bbox, trainable=is_training, activation_fn=None, scope='bbox_pred') self._predictions["rpn_cls_score"] = rpn_cls_score self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape self._predictions["rpn_cls_prob"] = rpn_cls_prob self._predictions["rpn_bbox_pred"] = rpn_bbox_pred self._predictions["cls_score"] = cls_score self._predictions["cls_prob"] = cls_prob self._predictions["bbox_pred"] = bbox_pred self._predictions["rois"] = rois self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def _build_network(self, sess, is_training=True): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) # choose different blocks for different number of layers if self._num_layers == 50: blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), # use stride 1 for the last conv4 layer resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)] elif self._num_layers == 101: blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), # use stride 1 for the last conv4 layer resnet_v1_block('block3', base_depth=256, num_units=23, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)] elif self._num_layers == 152: blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=8, stride=2), # use stride 1 for the last conv4 layer resnet_v1_block('block3', base_depth=256, num_units=36, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)] else: # other numbers are not supported raise NotImplementedError assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 3) # Now the base is always fixed during training with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv = self._build_base() if cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv, _ = resnet_v1.resnet_v1(net_conv, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) if cfg.RESNET.FIXED_BLOCKS < 3: with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv, _ = resnet_v1.resnet_v1(net_conv, blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) self._act_summaries.append(net_conv) self._layers['head'] = net_conv with tf.variable_scope(self._resnet_scope, self._resnet_scope): # build the anchors for the image self._anchor_component() # region proposal network rois = self._region_proposal(net_conv, is_training, initializer) # region of interest pooling if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net_conv, rois, "pool5") else: raise NotImplementedError with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7, _ = resnet_v1.resnet_v1(pool5, blocks[-1:], global_pool=False, include_root_block=False, scope=self._resnet_scope) with tf.variable_scope(self._resnet_scope, self._resnet_scope): # average pooling done by reduce_mean fc7 = tf.reduce_mean(fc7, axis=[1, 2]) # region classification cls_prob, bbox_pred = self._region_classification(fc7, is_training, initializer, initializer_bbox) self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def build_network(self, sess, is_training=True): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) bottleneck = resnet_v1.bottleneck # choose different blocks for different number of layers if self._num_layers == 50: blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=6, stride=2), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), ] elif self._num_layers == 101: blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=23, stride=1), resnet_v1_block_hole('block4', base_depth=512, num_units=3, stride=1), ] elif self._num_layers == 152: blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=8, stride=2), resnet_v1_block('block3', base_depth=256, num_units=36, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), ] else: # other numbers are not supported raise NotImplementedError assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4) if cfg.RESNET.FIXED_BLOCKS == 3: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1(net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) elif cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net, _ = resnet_v1.resnet_v1(net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv4, _ = resnet_v1.resnet_v1(net, blocks[cfg.RESNET.FIXED_BLOCKS:], global_pool=False, include_root_block=False, scope=self._resnet_scope) else: # cfg.RESNET.FIXED_BLOCKS == 0 with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1(net, blocks[0:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) self._act_summaries.append(net_conv4) self._layers['head'] = net_conv4 with tf.variable_scope(self._resnet_scope, self._resnet_scope): # build the anchors for the image self._anchor_component() # rpn rpn = slim.conv2d(net_conv4, 512, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3") self._act_summaries.append(rpn) rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') rpn_cls_score_shape = tf.shape(rpn_cls_score) rpn_cls_score_reshape = tf.reshape(rpn_cls_score, shape=[rpn_cls_score_shape[0], rpn_cls_score_shape[1], rpn_cls_score_shape[2]*self._num_anchors, 2]) rpn_cls_prob = tf.nn.softmax(rpn_cls_score_reshape) rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer_bbox, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') if is_training: rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor") # Try to have a determinestic order for the computing graph, for reproducibility with tf.control_dependencies([rpn_labels]): rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois") else: if cfg.TEST.MODE == 'nms': rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") elif cfg.TEST.MODE == 'top': rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois") else: raise NotImplementedError # rfcn a 1024 1*1 conv layer rfcn_net = slim.conv2d(net_conv4, 1024, [1, 1], padding='SAME', weights_initializer=tf.random_normal_initializer(stddev=0.01), weights_regularizer=slim.l2_regularizer(scale=0.0005), scope='refined_reduce_depth', activation_fn=tf.nn.relu) # generate k*k*(C+1) score maps rfcn_net_classes = slim.conv2d(rfcn_net, cfg.K*cfg.K*(20+1), [1, 1], weights_initializer=tf.random_normal_initializer(stddev=0.01), weights_regularizer=slim.l2_regularizer(scale=0.0005), scope='refined_classes', activation_fn=None) rfcn_net_bbox = slim.conv2d(rfcn_net, cfg.K*cfg.K*4*21, [1, 1], weights_regularizer=slim.l2_regularizer(scale=0.0005), weights_initializer=tf.random_normal_initializer(stddev=0.01), scope='refined_bbox', activation_fn=None) box_ind, bbox = self._normalize_bbox(net_conv4, rois, name='rois2bbox') # rfcn pooling layer position_sensitive_boxes = [] ymin, xmin, ymax, xmax = tf.unstack(bbox, axis=1) step_y = (ymax - ymin) / cfg.K step_x = (xmax - xmin) / cfg.K for bin_y in range(cfg.K): for bin_x in range(cfg.K): box_coordinates = [ymin+bin_y*step_y, xmin+bin_x*step_x, ymin+(bin_y+1)*step_y, xmin+(bin_x+1)*step_x] position_sensitive_boxes.append(tf.stack(box_coordinates, axis=1)) # class with background feature_class_split = tf.split(rfcn_net_classes, num_or_size_splits=9, axis=3) image_crops = [] for (split, box) in zip(feature_class_split, position_sensitive_boxes): crop = tf.image.crop_and_resize(split, box, tf.to_int32(box_ind), [6, 6]) image_crops.append(crop) position_sensitive_features = tf.add_n(image_crops)/len(image_crops) position_sensitive_classes = tf.reduce_mean(position_sensitive_features, axis=[1, 2]) cls_prob = tf.nn.softmax(position_sensitive_classes) # bounding box features bbox_target_crops = [] feature_bbox_split = tf.split(rfcn_net_bbox, num_or_size_splits=9, axis=3) for (split, box) in zip(feature_bbox_split, position_sensitive_boxes): crop = tf.image.crop_and_resize(split, box, tf.to_int32(box_ind), [6, 6]) bbox_target_crops.append(crop) position_sensitive_bbox_feature = tf.add_n(bbox_target_crops)/len(bbox_target_crops) position_sensitive_bbox_feature = tf.reduce_mean(position_sensitive_bbox_feature, axis=[1, 2]) self._predictions["rpn_cls_score"] = rpn_cls_score self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape self._predictions["rpn_cls_prob"] = rpn_cls_prob self._predictions["rpn_bbox_pred"] = rpn_bbox_pred self._predictions["cls_score"] = position_sensitive_classes self._predictions["cls_prob"] = cls_prob self._predictions["bbox_pred"] = position_sensitive_bbox_feature self._predictions["rois"] = rois self._score_summaries.update(self._predictions) return rois, cls_prob, position_sensitive_bbox_feature