示例#1
0
def ssd_multibox_layer(inputs,
                       num_classes,
                       sizes,
                       ratios=[1],
                       normalization=-1,
                       bn_normalization=False):
    """Construct a multibox layer, return a class and localization predictions.
    """
    net = inputs
    if normalization > 0:
        net = custom_layers.l2_normalization(net, scaling=True)
    # Number of anchors.
    num_anchors = len(sizes) + len(ratios)

    # Location.
    num_loc_pred = num_anchors * 4
    loc_pred = slim.conv2d(net,
                           num_loc_pred, [3, 3],
                           activation_fn=None,
                           scope='conv_loc')
    loc_pred = custom_layers.channel_to_last(loc_pred)
    loc_pred = tf.reshape(loc_pred,
                          tensor_shape(loc_pred, 4)[:-1] + [num_anchors, 4])
    # Class prediction.
    num_cls_pred = num_anchors * num_classes
    cls_pred = slim.conv2d(net,
                           num_cls_pred, [3, 3],
                           activation_fn=None,
                           scope='conv_cls')
    cls_pred = custom_layers.channel_to_last(cls_pred)
    cls_pred = tf.reshape(
        cls_pred,
        tensor_shape(cls_pred, 4)[:-1] + [num_anchors, num_classes])
    return cls_pred, loc_pred
def ssd_multibox_layer(inputs,
                       num_classes,
                       sizes,
                       ratios=[1],
                       normalization=-1,
                       bn_normalization=False):
    """Construct a multibox layer, return a class and localization predictions.
    """
    net = inputs
    if normalization > 0:
        net = custom_layers.l2_normalization(net, scaling=True)
    # Number of anchors.
    num_anchors = len(sizes) + len(ratios)

    # Location.
    num_loc_pred = num_anchors * 4
    loc_pred = slim.conv2d(net, num_loc_pred, [3, 3], activation_fn=None,
                           scope='conv_loc')
    loc_pred = custom_layers.channel_to_last(loc_pred)
    loc_pred = tf.reshape(loc_pred,
                          tensor_shape(loc_pred, 4)[:-1]+[num_anchors, 4])
    # Class prediction.
    num_cls_pred = num_anchors * num_classes
    cls_pred = slim.conv2d(net, num_cls_pred, [3, 3], activation_fn=None,
                           scope='conv_cls')
    cls_pred = custom_layers.channel_to_last(cls_pred)
    cls_pred = tf.reshape(cls_pred,
                          tensor_shape(cls_pred, 4)[:-1]+[num_anchors, num_classes])
    return cls_pred, loc_pred
示例#3
0
def ssd_multibox_layer(inputs,
                       num_classes,
                       sizes,
                       ratios=list([1]),
                       normalization=-1,
                       bn_normalization=False):
    """Construct a multibox layer, return a class and localization predictions.
    """
    net = inputs
    if normalization > 0:
        net = custom_layers.l2_normalization(net, scaling=True)
    # Number of anchors.
    num_anchors = len(sizes) + len(ratios)

    # Location.:比如第一层:38 * 38 * 4 * 4,每一个点(28 * 38)的num_anchors(4)种框的四个坐标值(4,x_min, x_max, y_min, y_max)
    num_loc_pred = num_anchors * 4
    loc_pred = slim.conv2d(net,
                           num_loc_pred, [3, 3],
                           activation_fn=None,
                           scope='conv_loc')
    loc_pred = custom_layers.channel_to_last(loc_pred)
    loc_pred = tf.reshape(loc_pred,
                          tensor_shape(loc_pred, 4)[:-1] + [num_anchors, 4])

    # Class prediction.: 比如第一层:38 * 38 * 4 * 21,每一个点(28 * 38)的num_anchors(4)种框的每一类预测得分(21)
    num_cls_pred = num_anchors * num_classes
    cls_pred = slim.conv2d(net,
                           num_cls_pred, [3, 3],
                           activation_fn=None,
                           scope='conv_cls')
    cls_pred = custom_layers.channel_to_last(cls_pred)
    cls_pred = tf.reshape(
        cls_pred,
        tensor_shape(cls_pred, 4)[:-1] + [num_anchors, num_classes])
    return cls_pred, loc_pred
示例#4
0
def ssd_multibox_layer(inputs,                    #输入特征层
                       num_classes,               #类别数
                       sizes,                     #参考先验框的尺度
                       ratios=[1],                #默认的先验框长宽比为1
                       normalization=-1,          #默认不做正则化
                       bn_normalization=False):
    """Construct a multibox layer, return a class and localization predictions.
    """
    net = inputs
    if normalization > 0:    #如果输入整数,则进行L2正则化
        net = custom_layers.l2_normalization(net, scaling=True)    #对通道所在维度进行正则化,随后乘以gamma缩放系数
    # Number of anchors.
    num_anchors = len(sizes) + len(ratios)  #每层特征图参考先验框的个数[4,6,6,6,4,4]
 
    # Location.     #每个先验框对应4个坐标信息
    num_loc_pred = num_anchors * 4    #特征图上每个单元预测的坐标所需维度=锚点框数*4
    loc_pred = slim.conv2d(net, num_loc_pred, [3, 3], activation_fn=None,   #通过对特征图进行3x3卷积得到位置信息和类别权重信息
                           scope='conv_loc')                                #该部分是定位信息,输出维度为[特征图h,特征图w,每个单元所有锚点框坐标]
    loc_pred = custom_layers.channel_to_last(loc_pred)
    loc_pred = tf.reshape(loc_pred,                                         #最后整个特征图所有锚点框预测目标位置 tensor为[h*w*每个cell先验框数,4]
                          tensor_shape(loc_pred, 4)[:-1]+[num_anchors, 4])
    # Class prediction.                                                #类别预测
    num_cls_pred = num_anchors * num_classes                            #特征图上每个单元预测的类别所需维度=锚点框数*种类数
    cls_pred = slim.conv2d(net, num_cls_pred, [3, 3], activation_fn=None, #该部分是类别信息,输出维度为[特征图h,特征图w,每个单元所有锚点框对应类别信息]
                           scope='conv_cls')
    cls_pred = custom_layers.channel_to_last(cls_pred)
    cls_pred = tf.reshape(cls_pred,
                          tensor_shape(cls_pred, 4)[:-1]+[num_anchors, num_classes]) #最后整个特征图所有锚点框预测类别 tensor为[h*w*每个cell先验框数,种类数]
    return cls_pred, loc_pred  #返回预测得到的类别和box位置 tensor
示例#5
0
文件: ZRNet_vgg.py 项目: XRDai/ZRNet
def kitti_ssd_multibox_layer(inputs,
                       num_classes,
                       sizes):
    """Construct a multibox layer, return a class and localization predictions.
    """
    net = inputs

    # Number of anchors.
    num_anchors = len(sizes)

    # Location.
    num_loc_pred = num_anchors * 4
    loc_pred = slim.conv2d(net, num_loc_pred, [3, 3], activation_fn=None,
                           scope='conv_loc')
    loc_pred = custom_layers.channel_to_last(loc_pred)
    loc_pred = tf.reshape(loc_pred,
                          tensor_shape(loc_pred, 4)[:-1]+[num_anchors, 4])
    # Class prediction.
    num_cls_pred = num_anchors * num_classes
    cls_pred = slim.conv2d(net, num_cls_pred, [3, 3], activation_fn=None,
                           scope='conv_cls')
    cls_pred = custom_layers.channel_to_last(cls_pred)
    cls_pred = tf.reshape(cls_pred,
                          tensor_shape(cls_pred, 4)[:-1]+[num_anchors, num_classes])
    return cls_pred, loc_pred
def text_multibox_layer(layer,
					   inputs,
					   normalization=-1,
					   is_training=True,
					   use_batch=False):
	"""
	Construct a multibox layer, return a class and localization predictions.
	The  most different between textbox and ssd is the prediction shape
	where textbox has prediction score shape (38,38,2,6)
	and location has shape (38,38,2,6,4)
	besise,the kernel for fisrt 5 layers is 1*5 and padding is (0,2)
	kernel for the last layer is 1*1 and padding is 0
	"""
	batch_norm_params = {
	  # Decay for the moving averages.
	  'decay': 0.9997,
	  # epsilon to prevent 0s in variance.
	  'epsilon': 0.001,
	  'is_training': is_training,
	  'zero_debias_moving_mean':False,
	  'scale':False,
	}
	net = inputs
	if normalization > 0:
		net = custom_layers.l2_normalization(net, scaling=True)
	# Number of anchors.
	num_box = len(TextboxNet.default_params.anchor_ratios)
	num_classes = 2
	# Location.
	num_loc_pred = 2*num_box * 4

	if(layer == 'global'):
		loc_pred = conv2d(net, num_loc_pred, [1, 1], activation_fn=None, padding = 'VALID',
						   scope='conv_loc',use_batch=use_batch, batch_norm_params=batch_norm_params)
	else:
		loc_pred = conv2d(net, num_loc_pred, [1, 5], activation_fn=None, padding = 'SAME',
						   scope='conv_loc',use_batch=use_batch, batch_norm_params=batch_norm_params)

	loc_pred = custom_layers.channel_to_last(loc_pred)
	loc_pred = tf.reshape(loc_pred, loc_pred.get_shape().as_list()[:-1] + [2,num_box,4])
	# Class prediction.
	scores_pred = 2 * num_box * num_classes

	batch_norm_params = {
	  # Decay for the moving averages.
	  'decay': 0.9997,
	  # epsilon to prevent 0s in variance.
	  'epsilon': 0.001,
	  'is_training': is_training,
	}
	if(layer == 'global'):
		sco_pred = conv2d(net, scores_pred, [1, 1], activation_fn=None, padding = 'VALID',
						   scope='conv_cls',use_batch=use_batch, batch_norm_params=batch_norm_params)
	else:
		sco_pred = conv2d(net, scores_pred, [1, 5], activation_fn=None, padding = 'SAME',
						   scope='conv_cls',use_batch=use_batch, batch_norm_params=batch_norm_params)

	sco_pred = custom_layers.channel_to_last(sco_pred)
	sco_pred = tf.reshape(sco_pred, tensor_shape(sco_pred, 4)[:-1] + [2,num_box,num_classes])
	return sco_pred, loc_pred
    def _ssd_multibox_layer(self,
                            inputs,
                            num_classes,
                            sizes,
                            ratios=[1],
                            normalization=-1,
                            bn_normalization=False):
        """Construct a multibox layer, return a class and localization predictions.
        """
        from nets import custom_layers
        from nets import ssd_vgg_300

        with slim.arg_scope(self.arg_scope):

            net = inputs
            if normalization > 0:
                net = custom_layers.l2_normalization(net, scaling=True)

            with tf.variable_scope('residual'):
                # predict module of DSSD
                net1 = slim.stack(net,
                                  slim.conv2d, [(256, [1, 1]), (256, [1, 1]),
                                                (1024, [1, 1])],
                                  scope='conv_res')
                net2 = slim.conv2d(net, 1024, [1, 1], scope='conv_skip')
                net = net1 + net2

            # Number of anchors.
            num_anchors = len(sizes)

            # Location.
            num_loc_pred = num_anchors * 4
            loc_pred = slim.conv2d(net,
                                   num_loc_pred, [3, 3],
                                   activation_fn=None,
                                   scope='conv_loc')
            loc_pred = custom_layers.channel_to_last(loc_pred)
            loc_pred = tf.reshape(
                loc_pred,
                ssd_vgg_300.tensor_shape(loc_pred, 4)[:-1] + [num_anchors, 4])

            # Class prediction.
            num_cls_pred = num_anchors * num_classes
            cls_pred = slim.conv2d(net,
                                   num_cls_pred, [3, 3],
                                   activation_fn=None,
                                   scope='conv_cls')
            cls_pred = custom_layers.channel_to_last(cls_pred)
            cls_pred = tf.reshape(
                cls_pred,
                ssd_vgg_300.tensor_shape(cls_pred, 4)[:-1] +
                [num_anchors, num_classes])

        # 网络得出的结果,是通过卷积的形式,使得(w*h)feature layer,每个点有4个anchors,输出(w*h*4*4)或(w*h*4*21)的数据结果
        return cls_pred, loc_pred
示例#8
0
def ssd_multibox_layers(inputs,
                        num_classes,
                        sizes,
                        ratios=[1],
                        normalization=-1,
                        bn_normalization=False):
    net = inputs
    if normalization > 0:
        net = custom_layers.l2_normalization(net, scaling=True)
    '''
    Generate correspondingly 4,6,6,6,6,4 default boxes for each feature point
    on layer 4,7,8,9,10,11,12
    why len(sizes) + len(ratios)? not len(sizes) * len(ratios)?
    square: min_size, (min_size*max_size)^0.5                       2,2,2,2,2,2
                    +                                                    +
    rectangle: ratio^0.5 * min_size, 1/ration^0.5 *min_size         2,4,4,4,4,2
                    =                                                    =
                                                                    4,6,6,6,6,4  
    '''
    num_anchors = len(sizes) + len(ratios)
    #Location
    num_loc_pred = num_anchors * 4  # 4 coordinates for each anchor
    loc_pred = slim.conv2d(
        net,  # Output is [H of feature map, W of feature map, anchor_num * 4]
        num_loc_pred,
        [3, 3],
        activation_fn=None,
        scope='conv_loc')
    loc_pred = custom_layers.channel_to_last(loc_pred)
    loc_pred = tf.reshape(  # E.g. The output for layer4 will be:
        loc_pred,  # [0, 64, 64, num_anchors(4), 4]
        tensor_shape(loc_pred, 4)[:-1] + [num_anchors, 4])
    #Class
    num_cls_pre = num_anchors * num_classes
    cls_pred = slim.conv2d(net,
                           num_cls_pre, [3, 3],
                           activation_fn=None,
                           scope='conv_cls')
    cls_pred = custom_layers.channel_to_last(cls_pred)
    cls_pred = tf.reshape(
        cls_pred,
        tensor_shape(cls_pred, 4)[:-1] + [num_anchors, num_classes])
    return cls_pred, loc_pred
示例#9
0
def MultiboxLayer(addn, 
                       inputs,
                       num_classes,
                       is_normalization):
    net = inputs
    if is_normalization > 0:
        net = custom_layers.l2_normalization(net, scaling=True)
    print("nnnn------------begin MultiboxLayer----------nnnn")

    num_loc_pred = 4 # [4] no used if caffe convert.
    loc_pred = slim.conv2d(net, num_loc_pred, [3, 3], activation_fn=None,
                           scope='conv_loc')
    print("====loc_pred0:", loc_pred)

    loc_pred = custom_layers.channel_to_last(loc_pred)
    print("====loc_pred1:", loc_pred)
    tt0 = tensor_shape(loc_pred, 4)
    print("====tt0:", tt0)
    print("====tt0[:-1]:", tt0[:-1])
    num_anchors = 1 
    loc_pred = tf.reshape(loc_pred,
                          tt0[:-1]+[num_anchors, -1])


    print("====loc_pred2:", loc_pred)

    # Class prediction.
    num_cls_pred = (num_anchors+addn)*2 #no used if caffe convert
    cls_pred = slim.conv2d(net, num_cls_pred, [3, 3], activation_fn=None,
                           scope='conv_cls')
    print("====num_cls_pred:", num_cls_pred, ", cls_pred0:", cls_pred)

    cls_pred = custom_layers.channel_to_last(cls_pred)
    print("====cls_pred1:", cls_pred)

    tt = tensor_shape(cls_pred, 4)
    print("====tt:", tt)
    cls_pred = tf.reshape(cls_pred,
                          tt[:-1]+[num_anchors, -1])
    print("====cls_pred2:", cls_pred)
    print("uuuu------------end   MultiboxLayer----------uuuu")
    return cls_pred, loc_pred
示例#10
0
def ssd_multibox_layer(
    inputs,  #block4的feature, shape=[32,512,38,38]
    num_classes,
    sizes,
    ratios=[1],
    normalization=-1,
    bn_normalization=False,
):
    """Construct a multibox layer, return a class and localization predictions.
    """
    net = inputs
    if normalization > 0:
        net = custom_layers.l2_normalization(net, scaling=True)
    # Number of anchors.
    num_anchors = len(sizes) + len(ratios)  #4

    # Location.
    num_loc_pred = num_anchors * 4  #16
    loc_pred = slim.conv2d(
        net,
        num_loc_pred,
        [3, 3],
        activation_fn=None,  #[32,16,38,38]
        scope='conv_loc')
    loc_pred = custom_layers.channel_to_last(loc_pred)
    loc_pred = tf.reshape(
        loc_pred,
        tensor_shape(loc_pred, 4)[:-1] +
        [num_anchors, 4
         ])  #tensor_shape返回[32,38,38],然后+[4,4]。故返回shape=[32,38,38,4,4]
    # Class prediction.
    num_cls_pred = num_anchors * num_classes  #4*21
    cls_pred = slim.conv2d(net,
                           num_cls_pred, [3, 3],
                           activation_fn=None,
                           scope='conv_cls')
    cls_pred = custom_layers.channel_to_last(cls_pred)
    cls_pred = tf.reshape(
        cls_pred,
        tensor_shape(cls_pred, 4)[:-1] + [num_anchors, num_classes])
    return cls_pred, loc_pred
示例#11
0
def ssd_multibox_layer(inputs,
                       num_classes,
                       sizes,
                       ratios=[1],
                       normalization=-1,
                       bn_normalization=False):
    """Construct a multibox layer, return a class and localization predictions.
    """
    net = inputs
    if normalization > 0:
        net = custom_layers.l2_normalization(net, scaling=True)
    # Number of anchors.
    num_anchors = len(sizes) + len(ratios)

    # Location.
    num_loc_pred = num_anchors * 4
    #对conv4_3,conv7,conv8,conv9,conv10,conv11中我们选定的某一层进行conv操作,注意filter的输出,
    #这样我们可以转化为(N,g_c,g_c,nlp),注意nlp所代表的含义,即得到对应层的坐标预测输出!!!
    #拿conv4_3举例,得到的feature map的shape为(N,38,38,256),这样转化之后可得为(N,38,38,4*4)~
    loc_pred = slim.conv2d(net,
                           num_loc_pred, [3, 3],
                           activation_fn=None,
                           scope='conv_loc')
    loc_pred = custom_layers.channel_to_last(loc_pred)
    loc_pred = tf.reshape(loc_pred,
                          tensor_shape(loc_pred, 4)[:-1] + [num_anchors, 4])
    # Class prediction.
    #对conv4_3,conv7,conv8,conv9,conv10,conv11中我们选定的某一层进行conv操作,
    #注意filter的输出,这样我们可以转化为(N,g_c,g_c,ncp),注意ncp所代表的含义,即得到对应曾的分类预测输出!
    #拿conv4_3举例,得到的feature map的shape为(N,38,38,256),这样转化之后可得为(N,38,38,4*21)~
    num_cls_pred = num_anchors * num_classes
    cls_pred = slim.conv2d(net,
                           num_cls_pred, [3, 3],
                           activation_fn=None,
                           scope='conv_cls')
    cls_pred = custom_layers.channel_to_last(cls_pred)
    cls_pred = tf.reshape(
        cls_pred,
        tensor_shape(cls_pred, 4)[:-1] + [num_anchors, num_classes])
    return cls_pred, loc_pred
示例#12
0
def ssd_multibox_layer(inputs,
                       num_classes,
                       sizes,
                       ratios=[1],
                       normalization=-1,
                       bn_normalization=False):
    """Construct a multibox layer, return a class and localization predictions.
    """
    net = inputs
    if normalization > 0:
        net = custom_layers.l2_normalization(net, scaling=True)
    # Number of anchors.
    num_anchors = len(sizes) + len(ratios)

    # Location.
    num_loc_pred = num_anchors * 4  # 每一个框有四个坐标
    loc_pred = slim.conv2d(net,
                           num_loc_pred, [3, 3],
                           activation_fn=None,
                           scope='conv_loc')  # 输出C表示不同框的某个坐标
    # 强制转换为NHWC
    loc_pred = custom_layers.channel_to_last(loc_pred)
    # NHW(num_anchors+4)->NHW,num_anchors,4
    loc_pred = tf.reshape(loc_pred,
                          tensor_shape(loc_pred, 4)[:-1] + [num_anchors, 4])
    # Class prediction.
    num_cls_pred = num_anchors * num_classes  # 每一个框都要计算所有的类别
    cls_pred = slim.conv2d(net,
                           num_cls_pred, [3, 3],
                           activation_fn=None,
                           scope='conv_cls')  # 输出C表示不同框的对某个类的预测
    # 强制转换为NHWC
    cls_pred = custom_layers.channel_to_last(cls_pred)
    # NHW(num_anchors+类别)->NHW,num_anchors,类别
    cls_pred = tf.reshape(
        cls_pred,
        tensor_shape(cls_pred, 4)[:-1] + [num_anchors, num_classes])
    return cls_pred, loc_pred
示例#13
0
def ssd_multibox_layer(inputs,
                       num_classes,
                       sizes,
                       ratios=[1],
                       normalization=-1,
                       bn_normalization=False):
    """Construct a multibox layer, return a class and localization predictions.
        构造一个多盒的层,返回一个类和位置预测
    """
    net = inputs
    if normalization > 0:
        net = custom_layers.l2_normalization(net, scaling=True)
    # Number of anchors.   anchor的数量
    num_anchors = len(sizes) + len(ratios)  #计算default box的数量,分别为 4 6 6 6 4 4

    # Location.   位置
    num_loc_pred = num_anchors * 4  #预测的位置信息= 4*num_anchors , 即 ymin,xmin,ymax,xmax.
    loc_pred = slim.conv2d(net,
                           num_loc_pred, [3, 3],
                           activation_fn=None,
                           scope='conv_loc')
    loc_pred = custom_layers.channel_to_last(
        loc_pred)  # ensure data format be "NWHC"
    loc_pred = tf.reshape(loc_pred,
                          tensor_shape(loc_pred, 4)[:-1] + [num_anchors, 4])
    # Class prediction.   分类预测
    num_cls_pred = num_anchors * num_classes
    cls_pred = slim.conv2d(net,
                           num_cls_pred, [3, 3],
                           activation_fn=None,
                           scope='conv_cls')
    cls_pred = custom_layers.channel_to_last(cls_pred)
    cls_pred = tf.reshape(
        cls_pred,
        tensor_shape(cls_pred, 4)[:-1] + [num_anchors, num_classes])
    return cls_pred, loc_pred  #分类预测, 位置预测
示例#14
0
def pred_cls_module(net_input, var_scope, num_anchors, num_classes):
    with tf.variable_scope(var_scope + '_inception1'):
        with tf.variable_scope('Branch_0'):
            branch_0 = slim.conv2d(net_input,
                                   512, [3, 3],
                                   normalizer_fn=None,
                                   activation_fn=None,
                                   scope='Conv2d_3x3')
        with tf.variable_scope('Branch_1'):
            branch_1 = slim.conv2d(net_input,
                                   512, [1, 1],
                                   normalizer_fn=None,
                                   activation_fn=None,
                                   scope='Conv2d_1x1')

        net_input = array_ops.concat([branch_0, branch_1], 3)
        # only activation after concat
        net_input = slim.batch_norm(net_input, activation_fn=tf.nn.relu)

    with tf.variable_scope(var_scope + '_inception2'):
        with tf.variable_scope('Branch_0'):
            branch_0 = slim.conv2d(net_input,
                                   512, [3, 3],
                                   normalizer_fn=None,
                                   activation_fn=None,
                                   scope='Conv2d_3x3')
        with tf.variable_scope('Branch_1'):
            branch_1 = slim.conv2d(net_input,
                                   512, [1, 1],
                                   normalizer_fn=None,
                                   activation_fn=None,
                                   scope='Conv2d_1x1')

        net_input = array_ops.concat([branch_0, branch_1], 3)
        # only activation after concat
        net_input = slim.batch_norm(net_input, activation_fn=tf.nn.relu)

        cls_pred = slim.conv2d(net_input,
                               num_anchors * num_classes, [3, 3],
                               activation_fn=None,
                               scope='Conv2d_pred_3x3')

    cls_pred = custom_layers.channel_to_last(cls_pred)
    cls_pred = tf.reshape(
        cls_pred,
        tensor_shape(cls_pred, 4)[:-1] + [num_anchors, num_classes])

    return cls_pred
示例#15
0
def reg_bbox_module(net_input, var_scope, num_anchors):  # = 'reg_bbox_@4'
    with tf.variable_scope(var_scope):
        net_input = slim.conv2d(net_input,
                                512, [3, 3],
                                normalizer_fn=slim.batch_norm,
                                scope='Conv2d_0_3x3')

        loc_pred = slim.conv2d(net_input,
                               4 * num_anchors, [3, 3],
                               activation_fn=None,
                               scope='Conv2d_1_3x3')

        loc_pred = custom_layers.channel_to_last(loc_pred)
        loc_pred = tf.reshape(
            loc_pred,
            tensor_shape(loc_pred, 4)[:-1] + [num_anchors, 4])

    return loc_pred
示例#16
0
def ssd_multibox_layer(
        inputs,  #(end_points[layer]
        num_classes,
        sizes,
        ratios=[1],
        normalization=-1,
        bn_normalization=False):
    """Construct a multibox layer, return a class and localization predictions.
       size: 当前anchor的大小 [20.48  51.2】就是两个大小框
       ratios 应该长宽比  默认是[1]就是正方形, 如【2,0.5】是两个框 一个横着的长方形 一个竖着的长方形
    """

    net = inputs  # exg: shape = [4,512,64,64] 4是batchsize 512是通道数 64是featuremap大小
    if normalization > 0:
        net = custom_layers.l2_normalization(net, scaling=True)
    # Number of anchors.
    num_anchors = len(sizes) + len(ratios)  # 分不同大小 和 不同比例 算出总共框的数量 exg:4

    # Location.
    num_loc_pred = num_anchors * 4  # 框回归的数量  每个框要回归4个数  exg:16
    '''用3×3的卷积核卷积  卷积的数量是框回归结果的数量  
    exg: shape = 4×16×64×64  4是batchsize大小 16是4个框 每个框要占4个数 64*64是原来featuremap上每一个点的框回归
    '''
    loc_pred = slim.conv2d(net, 256, [3, 3], scope='conv_loc_pre')
    loc_pred = slim.conv2d(loc_pred,
                           num_loc_pred, [3, 3],
                           activation_fn=None,
                           scope='conv_loc')

    loc_pred = custom_layers.channel_to_last(
        loc_pred)  # 把通道数放到最后 exg: shape = 4×64×64×16
    '''这里有疑问:1.reshape()传的shape是[4, 64, 64, 4, 4, 4] reshape完了得到的shape=(4, 64, 64, 4, 4)
                2.获取tensorshape得到了(4, 64, 64, 4, 4)
        这里就是把4×64×64×16 变成4×64×64×4*4  后面俩4 一个是指有4个框 一个是指每个框有四个数
    '''
    loc_pred = tf.reshape(loc_pred,
                          tensor_shape(loc_pred, 4)[:-1] + [num_anchors, 4])

    num_cls_pred = num_anchors * (num_classes
                                  )  # 用于分类 每个框回归出num_class个类别  所有有这些个需要回归的数

    cls_pred = slim.conv2d(net, 256, [3, 3], scope='conv_cls_pre')
    cls_pred = slim.conv2d(cls_pred,
                           num_cls_pred, [3, 3],
                           activation_fn=None,
                           scope='conv_cls')
    # mhw add
    pest_pred = slim.conv2d(net, 256, [3, 3], scope='pest_pred_pre')
    pest_pred = slim.conv2d(pest_pred,
                            num_anchors * 2, [3, 3],
                            activation_fn=None,
                            scope='pest_pred_cls')

    # cls_pred = slim.conv2d(net, num_cls_pred, [3, 3], activation_fn=None,
    #                        scope='conv_cls')

    cls_pred = custom_layers.channel_to_last(cls_pred)

    cls_pred = tf.reshape(
        cls_pred,
        tensor_shape(cls_pred, 4)[:-1] + [num_anchors, num_classes])

    # mhw add
    pest_pred = custom_layers.channel_to_last(pest_pred)

    pest_pred = tf.reshape(pest_pred,
                           tensor_shape(pest_pred, 4)[:-1] + [num_anchors, 2])
    '''
    这里就是把4×64×64×4
    变成4×64×64×4 * 1
    后面的4
    
    '''

    return pest_pred, cls_pred, loc_pred