def forward(self, inputs, is_training=False, reuse=False): # the input img_size, form: [height, weight] # it will be used later self.img_size = tf.shape(inputs)[1:3] # set batch norm params batch_norm_params = { 'decay': self.batch_norm_decay, 'epsilon': 1e-05, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse): with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1), weights_regularizer=slim.l2_regularizer(self.weight_decay)) as arg_sc: with tf.variable_scope('darknet53_body'): route_1, route_2, route_3 = darknet53_body(inputs, inj_type=self.inj_type, quant_min_max=self.quant_min_max, inj_layer=self.inj_layer, delta_4d=self.delta_4d, batch_norm_params=batch_norm_params) with tf.variable_scope('yolov3_head'): inter1, net = yolo_block(route_3, 512, inj_type=self.inj_type, quant_min_max=self.quant_min_max, inj_layer=self.inj_layer, delta_4d=self.delta_4d, num_layer=self.num_layer, batch_norm_params=batch_norm_params) self.num_layer += 6 feature_map_1 = inj_conv2d(net, 3 * (5 + self.class_num), 1, strides=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer(), fixed_padding=False, inj_type=self.inj_type, quant_min_max=self.quant_min_max, inj_layer=self.inj_layer, delta_4d=self.delta_4d, num_layer=self.num_layer, batch_norm_params=batch_norm_params) self.num_layer += 1 feature_map_1 = tf.identity(feature_map_1, name='feature_map_1') inter1 = inj_conv2d(inter1, 256, 1, inj_type=self.inj_type, quant_min_max=self.quant_min_max, inj_layer=self.inj_layer, delta_4d=self.delta_4d, num_layer=self.num_layer, batch_norm_params=batch_norm_params) self.num_layer += 1 inter1 = upsample_layer(inter1, route_2.get_shape().as_list() if self.use_static_shape else tf.shape(route_2)) concat1 = tf.concat([inter1, route_2], axis=3) inter2, net = yolo_block(concat1, 256, inj_type=self.inj_type, quant_min_max=self.quant_min_max, inj_layer=self.inj_layer, delta_4d=self.delta_4d, num_layer=self.num_layer, batch_norm_params=batch_norm_params) self.num_layer += 6 feature_map_2 = inj_conv2d(net, 3 * (5 + self.class_num), 1, strides=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer(), fixed_padding=False, inj_type=self.inj_type, quant_min_max=self.quant_min_max, inj_layer=self.inj_layer, delta_4d=self.delta_4d, num_layer=self.num_layer, batch_norm_params=batch_norm_params) self.num_layer += 1 feature_map_2 = tf.identity(feature_map_2, name='feature_map_2') inter2 = inj_conv2d(inter2, 128, 1, inj_type=self.inj_type, quant_min_max=self.quant_min_max, inj_layer=self.inj_layer, delta_4d=self.delta_4d, num_layer=self.num_layer, batch_norm_params=batch_norm_params) self.num_layer += 1 inter2 = upsample_layer(inter2, route_1.get_shape().as_list() if self.use_static_shape else tf.shape(route_1)) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block(concat2, 128, inj_type=self.inj_type, quant_min_max=self.quant_min_max, inj_layer=self.inj_layer, delta_4d=self.delta_4d, num_layer=self.num_layer, batch_norm_params=batch_norm_params) self.num_layer += 6 feature_map_3 = inj_conv2d(feature_map_3, 3 * (5 + self.class_num), 1, strides=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer(), fixed_padding=False, inj_type=self.inj_type, quant_min_max=self.quant_min_max, inj_layer=self.inj_layer, delta_4d=self.delta_4d, num_layer=self.num_layer, batch_norm_params=batch_norm_params) self.num_layer += 1 feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') return feature_map_1, feature_map_2, feature_map_3
def forward(self, inputs, is_training=False, reuse=False): # the input img_size, form: [height, weight] # it will be used later self.img_size = tf.shape(inputs)[1:3] # set batch norm params batch_norm_params = { 'decay': self.batch_norm_decay, 'epsilon': 1e-05, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse): with slim.arg_scope( [slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1), weights_initializer=tf.contrib.layers. variance_scaling_initializer(dtype=tf.float32) # weights_regularizer=slim.l2_regularizer(self.weight_decay) ): with tf.variable_scope('darknet53_body'): route_1, route_2, route_3 = darknet53_body(inputs) with tf.variable_scope('yolov3_head'): inter1, net = yolo_block(route_3, 512) inter1 = conv2d(inter1, 256, 1) inter1 = upsample_layer( inter1, route_2.get_shape().as_list() if self.use_static_shape else tf.shape(route_2)) concat1 = tf.concat([inter1, route_2], axis=3) inter2, net = yolo_block(concat1, 256) inter2 = conv2d(inter2, 128, 1) inter2 = upsample_layer( inter2, route_1.get_shape().as_list() if self.use_static_shape else tf.shape(route_1)) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block(concat2, 128) feature_map_3 = slim.conv2d( feature_map_3, (3 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') return [feature_map_3]
def forward(self, inputs, is_training=False, reuse=False): # the input img_size, form: [height, weight] # it will be used later self.img_size = tf.shape(inputs)[1:3] # set batch norm params batch_norm_params = { 'decay': self.batch_norm_decay, 'epsilon': 1e-05, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse): with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1)): with tf.variable_scope('inference_1'): route_1, route_2, route_3 = darknet53_body(inputs) route_1 = tf.identity(route_1, 'feature_output_1') route_2 = tf.identity(route_2, 'feature_output_2') route_3 = tf.identity(route_3, 'feature_output_3') with tf.variable_scope('inference_2'): route_1 = tf.identity(route_1, 'feature_input_1') route_2 = tf.identity(route_2, 'feature_input_2') route_3 = tf.identity(route_3, 'feature_input_3') inter1, net = yolo_block(route_3, 512) feature_map_1 = slim.conv2d(net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_1 = tf.identity(feature_map_1, name='feature_output_1') inter1 = conv2d(inter1, 256, 1) inter1 = upsample_layer(inter1, route_2.get_shape().as_list()) concat1 = tf.concat([inter1, route_2], axis=3) inter2, net = yolo_block(concat1, 256) feature_map_2 = slim.conv2d(net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_2 = tf.identity(feature_map_2, name='feature_output_2') inter2 = conv2d(inter2, 128, 1) inter2 = upsample_layer(inter2, route_1.get_shape().as_list()) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block(concat2, 128) feature_map_3 = slim.conv2d(feature_map_3, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_3 = tf.identity(feature_map_3, name='feature_output_3') return feature_map_1, feature_map_2, feature_map_3
def forward(self, inputs, is_training=False, reuse=False): # the input img_size, form: [height, weight] # it will be used later self.img_size = tf.shape(inputs)[1:3] # set batch norm params batch_norm_params = { 'decay': self.batch_norm_decay, 'epsilon': 1e-05, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse): with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.relu6(x)): with tf.variable_scope('darknet53_body'): route_1, route_2, route_3 = darknet53_body(inputs) with tf.variable_scope('yolov3_head'): downsample_1 = conv2d(route_1, 128, 3, strides=2) concat1 = tf.concat([downsample_1, route_2], axis=3) downsample_2 = conv2d(concat1, 128, 3, strides=2) concat2 = tf.concat([downsample_2, route_3], axis=3) out_1 = conv2d(concat2, 128, 3, strides=2) out_2 = conv2d(out_1, 128, 3, strides=1) out_3 = conv2d(out_2, 128, 3, strides=1) out_4 = tf.layers.flatten(out_3) out_5 = tf.layers.dense( out_4, 128, activation=lambda x: tf.nn.relu6(x), kernel_initializer=tf.contrib.layers. xavier_initializer(), bias_initializer=tf.zeros_initializer(), use_bias=True) logits_output = tf.layers.dense( out_5, self.class_num, kernel_initializer=tf.contrib.layers. xavier_initializer(), bias_initializer=tf.zeros_initializer(), use_bias=False) return logits_output, out_5
def forward_body(self, image, image_half, image_quar, is_training=False, reuse=tf.AUTO_REUSE): # the input img_size, form: [height, weight] # it will be used later self.img_size = tf.shape(image)[1:3] # set batch norm params batch_norm_params = { 'decay': self.batch_norm_decay, 'epsilon': 1e-05, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1), weights_regularizer=slim.l2_regularizer(self.weight_decay)): with tf.variable_scope('darknet53_body', reuse=reuse): _, _, route_1 = darknet53_body(image) _, _, route_2 = darknet53_body(image_half) _, _, route_3 = darknet53_body(image_quar) return route_1, route_2, route_3
def forward_get_result(self, inputs, is_training=False, reuse=False): # the input img_size, form: [height, weight] # it will be used later self.img_size = tf.shape(inputs)[1:3] # set batch norm params batch_norm_params = { 'decay': self.batch_norm_decay, 'epsilon': 1e-05, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse): with slim.arg_scope( [slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1), weights_regularizer=slim.l2_regularizer( self.weight_decay)): with tf.variable_scope('darknet53_body'): route_1, route_2, route_3 = darknet53_body(inputs) with tf.variable_scope('yolov3_head'): inter1, net = yolo_block_spp(route_3, 512) feature_map_1 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_1 = tf.identity(feature_map_1, name='feature_map_1') inter1 = conv2d(inter1, 256, 1) inter1 = upsample_layer(inter1, tf.shape(route_2)) concat1 = tf.concat([inter1, route_2], axis=3) inter2, net = yolo_block_spp(concat1, 256) feature_map_2 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_2 = tf.identity(feature_map_2, name='feature_map_2') inter2 = conv2d(inter2, 128, 1) inter2 = upsample_layer(inter2, tf.shape(route_1)) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block_spp(concat2, 128) feature_map_3 = slim.conv2d( feature_map_3, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') ''' predict layer and nms ''' boxes, scores, labels = self.post_process( (feature_map_1, feature_map_2, feature_map_3)) # pred_boxes, pred_confs, pred_probs = self.predict((feature_map_1, feature_map_2, feature_map_3)) # pred_scores = pred_confs * pred_probs # boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, self.class_num, max_boxes=30, # score_thresh=0.4, nms_thresh=0.5) boxes = tf.identity(boxes, name='detect_bbox') scores = tf.identity(scores, name="detect_scores") labels = tf.identity(labels, name="detect_labels") return boxes, scores, labels
def forward(self, inputs, is_training=False, reuse=False): # the input img_size, form: [height, weight] # it will be used later self.img_size = tf.shape(inputs)[1:3] # set batch norm params batch_norm_params = { 'decay': self.batch_norm_decay, 'epsilon': 1e-05, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } #设置卷积和BN时的一些参数,因为前面的utils.layer_utils里面的conv2d用的slim.conv2d,所以这里设置默认参数可以生效 with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse): #允许复用 #用了BN,还带上了leaky——relu和L3正则化 with slim.arg_scope( [slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1), weights_regularizer=slim.l2_regularizer( self.weight_decay)): with tf.variable_scope('darknet53_body'): route_1, route_2, route_3 = darknet53_body(inputs) with tf.variable_scope('yolov3_head'): inter1, net = yolo_block(route_3, 512) #feature map 1 就是y1的输出,13*13*255 feature_map_1 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_1 = tf.identity(feature_map_1, name='feature_map_1') inter1 = conv2d(inter1, 256, 1) #inter1跟route2一样的shape #axis = 3,在channel方向做concat inter1 = upsample_layer( inter1, route_2.get_shape().as_list() if self.use_static_shape else tf.shape(route_2)) concat1 = tf.concat([inter1, route_2], axis=3) inter2, net = yolo_block(concat1, 256) #feature map 2是26*26*255,相当于y2 feature_map_2 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_2 = tf.identity(feature_map_2, name='feature_map_2') inter2 = conv2d(inter2, 128, 1) #inter2跟route1一样的shape inter2 = upsample_layer( inter2, route_1.get_shape().as_list() if self.use_static_shape else tf.shape(route_1)) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block(concat2, 128) #feature map 2是52*52*255,相当于y3 feature_map_3 = slim.conv2d( feature_map_3, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') return feature_map_1, feature_map_2, feature_map_3
def forward(self, inputs, is_training=False, train_with_gray=True, reuse=False): # the input img_size, form: [height, weight] # it will be used later self.img_size = tf.shape(inputs)[1:3] # set batch norm params batch_norm_params = { 'decay': self.batch_norm_decay, 'epsilon': 1e-05, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse): with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.relu6(x)): with tf.variable_scope('darknet53_body'): # 转为灰度图 if train_with_gray: inputs = inputs * self.rgb_factor inputs = tf.reduce_sum(inputs, axis=-1) inputs = tf.expand_dims(inputs, -1) inputs = tf.tile(inputs, [1, 1, 1, 3]) if self.backbone_name == "darknet53": routes = darknet53_body( inputs, self.train_with_two_feature_map) elif self.backbone_name == "darknet53_prun": routes = darknet53_body_prun( inputs, self.train_with_two_feature_map) elif self.backbone_name == "mobilenetv2": routes = mobilenetv2(inputs, self.train_with_two_feature_map, is_training) elif self.backbone_name == "mobilenetv3": routes = mobilenetv3(inputs, self.train_with_two_feature_map, is_training) elif self.backbone_name == "mobilenetv3_add_zoom_factor": routes = mobilenetv3_add_zoom_factor( inputs, self.train_with_two_feature_map, is_training) else: print( "backbone name is not right, it is mast in [darknet53, darknet53_prun, mobilenetv2, mobilenetv3, mobilenetv3_add_zoom_factor]" ) sys.exit() with tf.variable_scope('yolov3_head'): if not self.train_with_two_feature_map: route_1, route_2, route_3 = routes inter1, net = yolo_block(route_3, 512) feature_map_1 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_1 = tf.identity(feature_map_1, name='feature_map_1') inter1 = conv2d(inter1, 256, 1) inter1 = upsample_layer(inter1, route_2.get_shape().as_list()) # # inter1 = slim.conv2d(inter1, inter1.get_shape().as_list()[3], 3, # stride=1, biases_initializer=tf.zeros_initializer()) concat1 = tf.concat([inter1, route_2], axis=3) inter2, net = yolo_block(concat1, 256) feature_map_2 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_2 = tf.identity(feature_map_2, name='feature_map_2') inter2 = conv2d(inter2, 128, 1) inter2 = upsample_layer(inter2, route_1.get_shape().as_list()) # inter2 = slim.conv2d(inter2, inter2.get_shape().as_list()[3], 3, # stride=1, biases_initializer=tf.zeros_initializer()) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block(concat2, 128) feature_map_3 = slim.conv2d( feature_map_3, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') return feature_map_1, feature_map_2, feature_map_3 else: route_1, route_2 = routes inter2, net = yolo_block(route_2, 256) feature_map_2 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_2 = tf.identity(feature_map_2, name='feature_map_2') inter2 = conv2d(inter2, 128, 1) inter2 = upsample_layer(inter2, route_1.get_shape().as_list()) # inter2 = slim.conv2d(inter2, inter2.get_shape().as_list()[3], 3, # stride=1, biases_initializer=tf.zeros_initializer()) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block(concat2, 128) feature_map_3 = slim.conv2d( feature_map_3, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') return feature_map_2, feature_map_3
def forward(self, inputs, is_training=False, reuse=False): # the input img_size, form: [height, weight] # it will be used later self.img_size = tf.shape(inputs)[1:3] # set batch norm params batch_norm_params = { 'is_training': is_training, 'center': True, 'scale': True, 'decay': self.batch_norm_decay, 'epsilon': 1e-05, 'updates_collections': tf.GraphKeys.UPDATE_OPS #'fused': None, # Use fused batch norm if possible. } # hyperparams to use activation_fn = tf.nn.relu6 normalizer_fn = tf.contrib.slim.batch_norm normalizer_params = { 'is_training': True, 'center': True, 'scale': True, 'decay': 0.9997, 'epsilon': 0.001, 'updates_collections': tf.GraphKeys.UPDATE_OPS } weights_initializer = tf.truncated_normal_initializer(stddev=0.09) weights_regularizer = tf.contrib.layers.l2_regularizer(0.00004) with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse): with slim.arg_scope([slim.conv2d], activation_fn=activation_fn, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, weights_initializer=weights_initializer, weights_regularizer=weights_regularizer, ): with tf.variable_scope('darknet53_body'): route_1, route_2, route_3 = darknet53_body(inputs) with tf.variable_scope('yolov3_head'): inter1, net = yolo_block(route_3, 512) feature_map_1 = slim.conv2d(net, 3 * (5 + self.class_num), 1, stride=1, activation_fn=activation_fn, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, weights_initializer=weights_initializer, weights_regularizer=weights_regularizer, scope='feature_map_00') feature_map_1 = tf.identity(feature_map_1, name='feature_map_1') inter1 = conv2d(inter1, 256, 1) inter1 = upsample_layer(inter1, route_2.get_shape().as_list() if self.use_static_shape else tf.shape(route_2)) concat1 = tf.concat([inter1, route_2], axis=3) inter2, net = yolo_block(concat1, 256) feature_map_2 = slim.conv2d(net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=activation_fn, biases_initializer=tf.zeros_initializer()) feature_map_2 = tf.identity(feature_map_2, name='feature_map_2') inter2 = conv2d(inter2, 128, 1) inter2 = upsample_layer(inter2, route_1.get_shape().as_list() if self.use_static_shape else tf.shape(route_1)) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block(concat2, 128) feature_map_3 = slim.conv2d(feature_map_3, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') return feature_map_1, feature_map_2, feature_map_3
def forward(self, inputs, is_training=False, reuse=False): """ 得到的feature_map就是映射到原图像的网格的尺寸 :param inputs: :param is_training: :param reuse: :return: """ # the input img_size, form: [height, weight] # it will be used later self.img_size = tf.shape(inputs)[1:3] #[416,416] # set batch norm params batch_norm_params = { 'decay': self.batch_norm_decay, 'epsilon': 1e-05, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse): with slim.arg_scope( [slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1)): #参数是x 函数里面的内容是 :后面 with tf.variable_scope('darknet53_body'): route_1, route_2, route_3 = darknet53_body(inputs) # (-1, 52, 52, 256) (-1, 26, 26, 512) (-1, 13, 13, 1024) with tf.variable_scope('yolov3_head'): #返回(-1, 13, 13, 512) (-1, 13, 13, 1024) #里面做了5个 和6个DBL inter1 5个DBL的输出 net 是6个 inter1, net = yolo_block(route_3, 512) #------------------------------------------------------------------------------------- #第一个尺度预测的输出,这里要把normalizer_fn,activation_fn设置为None #输入(-1, 13, 13, 1024) 输出 (-1,13,13,255) 3 * (5 + self.class_num)=255 #3 * (5 + self.class_num)的意思是 一个像素点预测三个框,一个框有5个值 需要预测几个类别 feature_map_1 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_1 = tf.identity(feature_map_1, name='feature_map_1') #------------------------------------------------------------------------------------- #第二个尺度的预测输出代码 #inter1输入 (-1, 13, 13, 512)==>256 inter1 = conv2d(inter1, 256, 1) #将route_3那条路线输出的图片进行 上采样 使与 route_2尺寸相同 深度不变 inter1 = upsample_layer(inter1, route_2.get_shape().as_list()) #route_2 = (-1, 26, 26, 512) (-1, 26, 26, 256) ==> (-1, 26, 26, 768) concat1 = tf.concat([inter1, route_2], axis=3) # 里面做了5个 和6个DBL inter2 5个DBL的输出 net 是6个 inter2, net = yolo_block(concat1, 256) #------------------------------------------------------------------------------------- #第二个尺度预测的输出,这里要把normalizer_fn,activation_fn设置为None #输出(-1, 26, 26, 255) feature_map_2 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_2 = tf.identity(feature_map_2, name='feature_map_2') #------------------------------------------------------------------------------------- #第三个尺度 inter2 = conv2d(inter2, 128, 1) inter2 = upsample_layer(inter2, route_1.get_shape().as_list()) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block(concat2, 128) #输出尺寸(-1, 52, 52, 256) feature_map_3 = slim.conv2d( feature_map_3, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') #------------------------------------------------------------------------------------- return feature_map_1, feature_map_2, feature_map_3
def forward(self, inputs, is_training=False, reuse=False): ''' 向前传播算法,接收图像矩阵输入,输出置信度和坐标框 ''' self.img_size = tf.shape(inputs)[1:3] batch_norm_params = { 'decay': self.batch_norm_decay, 'epsilon': 1e-05, 'scale': True, 'is_training': is_training, 'fused': None, } with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse): with slim.arg_scope( [slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1), weights_regularizer=slim.l2_regularizer( self.weight_decay)): with tf.variable_scope('darknet53_body'): route_1, route_2, route_3 = darknet53_body(inputs) with tf.variable_scope('yolov3_head'): inter1, net = yolo_block(route_3, 512) feature_map_1 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_1 = tf.identity(feature_map_1, name='feature_map_1') inter1 = conv2d(inter1, 256, 1) inter1 = upsample_layer( inter1, route_2.get_shape().as_list() if self.use_static_shape else tf.shape(route_2)) concat1 = tf.concat([inter1, route_2], axis=3) inter2, net = yolo_block(concat1, 256) feature_map_2 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_2 = tf.identity(feature_map_2, name='feature_map_2') inter2 = conv2d(inter2, 128, 1) inter2 = upsample_layer( inter2, route_1.get_shape().as_list() if self.use_static_shape else tf.shape(route_1)) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block(concat2, 128) feature_map_3 = slim.conv2d( feature_map_3, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') return feature_map_1, feature_map_2, feature_map_3
def forward(self, inputs, is_training=False, reuse=False): # the input img_size, form: [height, weight] # it will be used later self.img_size = tf.shape(inputs)[1:3] # set batch norm params batch_norm_params = { 'decay': self.batch_norm_decay, 'epsilon': 1e-05, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=False): with slim.arg_scope( [slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1)): with tf.variable_scope('darknet53_body'): # 三个输出值就是3 different scales. # route_1:52*52*256, route_2=26*26*512, route_3=13*13*1024 route_1, route_2, route_3 = darknet53_body(inputs) with tf.variable_scope('yolov3_head'): '''实现first scale layer的predict''' # 输出inter1是用于下一层的upsampled feature。 8*8*512*1024 inter1, net = yolo_block(route_3, 512) # 针对输入416*416的图,输出值是13*13*255. a 3-d tensor encoding bounding box, objectness, and class predictions. # 单个1*1的卷积将维度降到预测channel。 feature_map_1 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) # 这一步的目的好像是在使用GPU时在不同设备间传递变量的值 # refer: https://blog.csdn.net/qq_23981335/article/details/81361748 feature_map_1 = tf.identity(feature_map_1, name='feature_map_1') '''实现second scale layer的predict''' inter1 = conv2d(inter1, 256, 1) inter1 = upsample_layer(inter1, route_2.get_shape().as_list()) concat1 = tf.concat([inter1, route_2], axis=3) inter2, net = yolo_block(concat1, 256) feature_map_2 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_2 = tf.identity(feature_map_2, name='feature_map_2') '''实现third scale layer的predict''' inter2 = conv2d(inter2, 128, 1) inter2 = upsample_layer(inter2, route_1.get_shape().as_list()) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block(concat2, 128) feature_map_3 = slim.conv2d( feature_map_3, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') return feature_map_1, feature_map_2, feature_map_3