def ssd300_blocks(net, end_points): # block 6: 3x3 conv net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') net = slim.batch_norm(net) end_points['block6'] = net # block 7: 1x1 conv net = slim.conv2d(net, 1024, [1, 1], scope='conv7') net = slim.batch_norm(net) end_points['block7'] = net # block 8/9/10/11: 1x1 and 3x3 convolutions with stride 2 (except lasts) end_point = 'block8' with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = slim.batch_norm(net) net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') net = slim.batch_norm(net) end_points[end_point] = net end_point = 'block9' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.batch_norm(net) net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') net = slim.batch_norm(net) end_points[end_point] = net end_point = 'block10' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.batch_norm(net) net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') net = slim.batch_norm(net) end_points[end_point] = net end_point = 'block11' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.batch_norm(net) net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') net = slim.batch_norm(net) end_points[end_point] = net return net, end_points
def mobilenet_block(net, num_out_channels, stride=[1, 1], scope=None): """Basic MobileNet block combining: - depthwise conv + BN + relu - 1x1 conv + BN + relu """ with tf.variable_scope(scope, 'block', [net]) as sc: num_out_channels = int(num_out_channels * width_multiplier) if stride[0] == 1 and stride[1] == 1: # Depthwise convolution with stride=1 net = custom_layers.depthwise_convolution2d(net, kernel_size, depth_multiplier=1, stride=stride, scope='conv_dw') else: # Mimic CAFFE padding if stride > 1 => usually better accuracy. net = custom_layers.pad2d(net, pad=padding) net = custom_layers.depthwise_convolution2d(net, kernel_size, padding='VALID', depth_multiplier=1, stride=stride, scope='conv_dw') # Pointwise convolution. net = slim.conv2d(net, num_out_channels, [1, 1], scope='conv_pw') return net
def largenetwork(net, end_point, end_points): print('Large network') with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block10' print(net) with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block11' print(net) with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net print(net) return end_points
def max_avg_pool2d(net, stride=[2, 2], scope=None): with tf.variable_scope(scope, 'max_avg_pool', [net]) as sc: ksize = [3, 3] padding = [1, 1] # Additional Caffe padding. net = custom_layers.pad2d(net, pad=padding) # Max + Avg pooling. mnet = slim.max_pool2d(net, ksize, stride, padding='VALID') anet = slim.avg_pool2d(net, ksize, stride, padding='VALID') return mnet + anet
def mobilenet_block(net, num_out_channels, stride=[1, 1], leaders=False, scope=None): """Basic MobileNet block combining: - depthwise conv + BN + relu - 1x1 conv + BN + relu """ with tf.variable_scope(scope, 'block', [net]) as sc: num_out_channels = int(num_out_channels * width_multiplier) kernel_size = [3, 3] if stride[0] == 1 and stride[1] == 1: # Classic depthwise convolution with stride=1 net = custom_layers.depthwise_convolution2d( net, kernel_size, depth_multiplier=1, stride=stride, scope='conv_dw') else: if leaders: # Special Depthwise Leader convolution when stride > 1 # net = custom_layers.pad2d(net, pad=(1, 1)) net = custom_layers.depthwise_leaders_convolution2d( net, kernel_size, padding='SAME', stride=stride, rates=[1, 2, 3], pooling_sizes=[5, 3, 1], pooling_type='AVG', activation_fn=tf.nn.relu, scope='conv_lead_dw') else: # Mimic CAFFE padding if stride > 1. net = custom_layers.pad2d(net, pad=(1, 1)) net = custom_layers.depthwise_convolution2d( net, kernel_size, padding='VALID', depth_multiplier=1, stride=stride, scope='conv_dw') # Pointwise convolution. net = slim.conv2d(net, num_out_channels, [1, 1], scope='conv_pw') return net
def ssd_net(inputs, num_classes=SSDNet.default_params.num_classes, feat_layers=SSDNet.default_params.feat_layers, anchor_sizes=SSDNet.default_params.anchor_sizes, anchor_ratios=SSDNet.default_params.anchor_ratios, normalizations=SSDNet.default_params.normalizations, is_training=True, dropout_keep_prob=0.5, prediction_fn=slim.softmax, reuse=None, scope='ssd_300_vgg'): """SSD net definition. """ #if data_format == 'NCHW': # inputs = tf.transpose(inputs, perm=(0, 3, 1, 2)) # End_points collect relevant activations for external use. end_points = {} with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse): #inception ''' net1 = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') end_points['block1'] = net1 net1 = slim.max_pool2d(net1, [2, 2], scope='pool1') # Block 2. #net2 = slim.repeat(net1, 2, slim.conv2d, 128, [3, 3], scope='conv2') net2_1 = slim.conv2d(net1, 128, [3, 3], scope='conv2_0') net2_2 = slim.conv2d(net2_1, 128, [3, 3], scope='conv2_1') net2 = net2_1 + net2_2 end_points['block2'] = net2 net2 = slim.max_pool2d(net2, [2, 2], scope='pool2') # Block 3. #net3 = slim.repeat(net2, 3, slim.conv2d, 256, [3, 3], scope='conv3') net3_1 = slim.conv2d(net2, 256, [3, 3], scope='conv3_1') #net3_1 = slim.batch_norm(net3_1, scope='bn3_1') net3_2 = slim.conv2d(net3_1, 256, [3, 3], scope='conv3_2') #net3_2 = slim.batch_norm(net3_2, scope='bn3_2') net3_3 = slim.conv2d(net3_2, 256, [3, 3], scope='conv3_3') net3 = net3_1 + net3_3 end_points['block3'] = net3 net3 = slim.max_pool2d(net3, [2, 2], scope='pool3') # Block 4. net4 = slim.repeat(net3, 2, slim.conv2d, 512, [3, 3], scope='conv4') net4 = slim.batch_norm(net4, scope='conv4_bn') with tf.variable_scope('Mixed_4'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net4, 256, 1, scope='Conv2d_1x1') # 38,38,128 with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net4, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, scope='Conv2d_0b_3x3') # 38,38,128 with tf.variable_scope('Branch_2'): tower_conv2_0 = slim.conv2d(net4, 128, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2_0, 128, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 256, 3, scope='Conv2d_0c_3x3') # 38,38,128 with tf.variable_scope('Branch_3'): tower_pool = slim.avg_pool2d(net4, 3, stride=1, padding='SAME', scope='AvgPool_0a_3x3') tower_pool_1 = slim.conv2d(tower_pool, 256, 1, scope='Conv2d_0b_1x1') net4 = tf.concat([tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1], 1) end_points['block4'] = net4 net4 = slim.max_pool2d(net4, [2, 2], scope='pool4') # Block 5. net = slim.repeat(net4, 3, slim.conv2d, 512, [3, 3], scope='conv5') end_points['block5'] = net net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5') # Additional SSD blocks. # Block 6: let's dilate the hell out of it! net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') end_points['block6'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block 7: 1x1 conv. Because the f**k. net = slim.conv2d(net, 1024, [1, 1], scope='conv7') end_points['block7'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). end_point = 'block8' net = tf.layers.batch_normalization(net, training=True) with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block9' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block10' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block11' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net ''' #resnet net1_ = slim.conv2d(inputs, 64, [3,3], scope="conv1") # Block1. net1 = slim.repeat(net1_, 3, slim.conv2d, 64, [3, 3], scope='conv1') #net1_ = tf.concat([net1_]*2,1) #print(net1_.shape) net1 = net1 + net1_ end_points['block1'] = net1 net1 = slim.max_pool2d(net1, [2, 2], scope='pool1') # 150,150,64 # Block 2. net2 = slim.repeat(net1, 3, slim.conv2d, 128, [3, 3], scope='conv2') net1 = tf.concat([net1]*2,1) net2 = net2+net1 end_points['block2'] = net2 net2 = slim.max_pool2d(net2, [2, 2], scope='pool2') # 75,75,128 # Block 3. net3 = slim.repeat(net2, 3, slim.conv2d, 256, [3, 3], scope='conv3') net2 = tf.concat([net2]*2,1) net3_p = net3 + net2 #end_points['block3'] = net3 net3 = slim.max_pool2d(net3_p, [2, 2], scope='pool3') # 38,38,256 # Block 3. net4 = slim.repeat(net3, 3, slim.conv2d, 512, [3, 3], scope='conv4') net3 = tf.concat([net3] * 2, 1) net4_p = net4 + net3 end_points['block4'] = net4_p net4 = slim.max_pool2d(net4_p, [2, 2], scope='pool4') # 19,19,512 # Block 5. net5 = slim.repeat(net4, 3, slim.conv2d, 512, [3, 3], scope='conv5') net5 = net4 + net5 end_points['block5'] = net5 net5 = slim.max_pool2d(net5, [3, 3], stride=1, scope='pool5') # 17 # Additional SSD blocks. # Block 6: let's dilate the hell out of it! net6 = slim.conv2d(net5, 1024, [3, 3], rate=6, scope='conv6') end_points['block6'] = net6 net6 = tf.layers.dropout(net6, rate=dropout_keep_prob, training=is_training) # 17,17 # Block 7: 1x1 conv. Because the f**k. net7 = slim.conv2d(net6, 1024, [1, 1], scope='conv7') #end_points['block7'] = net7 net7_ = tf.layers.dropout(net7, rate=dropout_keep_prob, training=is_training) # 17,17 # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). end_point = 'block8' net8 = tf.layers.batch_normalization(net7_, training=True) with tf.variable_scope(end_point): net8 = slim.conv2d(net8, 256, [1, 1], scope='conv1x1') #17 net8 = custom_layers.pad2d(net8, pad=(1, 1)) #21 net8 = slim.conv2d(net8, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') #10,10 #end_points[end_point] = net8 end_point = 'block9' with tf.variable_scope(end_point): net9 = slim.conv2d(net8, 128, [1, 1], scope='conv1x1') net9 = custom_layers.pad2d(net9, pad=(1, 1)) net9 = slim.conv2d(net9, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') #5,5 #end_points[end_point] = net9 end_point = 'block10' with tf.variable_scope(end_point): net10 = slim.conv2d(net9, 128, [1, 1], scope='conv1x1') net10 = slim.conv2d(net10, 256, [3, 3], scope='conv3x3', padding='VALID') #3,3 #end_points[end_point] = net10 end_point = 'block11' with tf.variable_scope(end_point): net11 = slim.conv2d(net10, 128, [1, 1], scope='conv1x1') net11 = slim.conv2d(net11, 256, [3, 3], scope='conv3x3', padding='VALID') #1,1 end_points[end_point] = net11 end_point = 'block10' with tf.variable_scope(end_point): net10_a = tf.transpose(net11, perm=(0, 2, 3, 1))#nchw net10_a = tf.image.resize_nearest_neighbor(net10_a, (3,3)) net10_a = tf.transpose(net10_a, perm=(0, 3, 1, 2))#nchw net10_a = slim.conv2d(net10_a, 256, [3,3], scope='pre10_3x3') net10_b = slim.conv2d(net10, 256, [1, 1], scope='pre10_1x1') net10_o = net10_a + net10_b end_points[end_point] = net10_o #3 end_point = 'block9' with tf.variable_scope(end_point): net9_a = tf.transpose(net10_o, perm=(0, 2, 3, 1))#nchw net9_a = tf.image.resize_nearest_neighbor(net9_a, (5,5)) net9_a = tf.transpose(net9_a, perm=(0, 3, 1, 2))#nchw net9_a = slim.conv2d(net9_a, 256, [3,3], scope='pre9_3x3') net9_b = slim.conv2d(net9, 256, [1, 1], scope='pre9_1x1') net9_o = net9_a + net9_b end_points[end_point] = net9_o#5 end_point = 'block8' with tf.variable_scope(end_point): net8_a = tf.transpose(net9_o, perm=(0, 2, 3, 1))#nchw net8_a = tf.image.resize_nearest_neighbor(net8_a, (10,10)) net8_a = tf.transpose(net8_a, perm=(0, 3, 1, 2))#nchw net8_a = slim.conv2d(net8_a, 512, [3,3], padding='SAME', scope='pre8_3x3') net8_b = slim.conv2d(net8, 512, [1, 1], scope='pre8_1x1')#10 net8_o = net8_a + net8_b end_points[end_point] = net8_o#10 end_point = 'block7' with tf.variable_scope(end_point): net7_a = tf.transpose(net8_o, perm=(0, 2, 3, 1))#nchw net7_a = tf.image.resize_nearest_neighbor(net7_a, (19, 19))# net7_a = tf.transpose(net7_a, perm=(0, 3, 1, 2))#nchw net7_a = slim.conv2d(net7_a, 1024, [3, 3], padding='SAME', scope='pre7_3x3') net7_b = slim.conv2d(net7, 1024, [1, 1], scope='pre7_1x1') net7_o = net7_a + net7_b end_points[end_point] = net7_o end_point = 'block4' with tf.variable_scope(end_point): net4_a = tf.transpose(net7_o, perm=(0, 2, 3, 1))#nchw net4_a = tf.image.resize_nearest_neighbor(net4_a, (38, 38))# net4_a = tf.transpose(net4_a, perm=(0, 3, 1, 2))#nchw net4_a = slim.conv2d(net4_a, 512, [3, 3], padding='SAME', scope='pre4_3x3') net4_b = slim.conv2d(net4_p, 512, [1, 1], scope='pre4_1x1') #print("asdfasdfasdf", net4_a.shape, net4_b.shape) net4_o = net4_a + net4_b#38 end_points[end_point] = net4_o end_point = 'block3' with tf.variable_scope(end_point): net3_a = tf.transpose(net4_o, perm=(0, 2, 3, 1))#nchw net3_a = tf.image.resize_nearest_neighbor(net3_a, (75, 75))# net3_a = tf.transpose(net3_a, perm=(0, 3, 1, 2))#nchw net3_a = slim.conv2d(net3_a, 512, [3, 3], padding='SAME', scope='pre3_3x3') net3_b = slim.conv2d(net3_p, 512, [1, 1], scope='pre3_1x1') print("asdfasdfasdf", net3_a.shape, net3_b.shape) net3_o = net3_a + net3_b#75 end_points[end_point] = net3_o #original net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') end_points['block1'] = net net = slim.max_pool2d(net, [2, 2], scope='pool1') # Block 2. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') end_points['block2'] = net net = slim.max_pool2d(net, [2, 2], scope='pool2') # Block 3. net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') end_points['block3'] = net net = slim.max_pool2d(net, [2, 2], scope='pool3') # Block 4. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') end_points['block4'] = net net = slim.max_pool2d(net, [2, 2], scope='pool4') # Block 5. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') end_points['block5'] = net net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5') # Additional SSD blocks. # Block 6: let's dilate the hell out of it! net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') end_points['block6'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block 7: 1x1 conv. Because the f**k. net = slim.conv2d(net, 1024, [1, 1], scope='conv7') end_points['block7'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). end_point = 'block8' with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block9' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block10' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block11' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net # Prediction and localisations layers. predictions = [] logits = [] localisations = [] for i, layer in enumerate(feat_layers): with tf.variable_scope(layer + '_box'): p, l = ssd_multibox_layer(end_points[layer], num_classes, anchor_sizes[i], anchor_ratios[i], normalizations[i]) predictions.append(prediction_fn(p)) logits.append(p) localisations.append(l) return predictions, localisations, logits, end_points
def ssd_net(inputs, num_classes=SSDNet.default_params.num_classes, feat_layers=SSDNet.default_params.feat_layers, anchor_sizes=SSDNet.default_params.anchor_sizes, anchor_ratios=SSDNet.default_params.anchor_ratios, normalizations=SSDNet.default_params.normalizations, is_training=True, dropout_keep_prob=0.5, prediction_fn=slim.softmax, reuse=None, scope='ssd_300_vgg'): """SSD net definition. """ # if data_format == 'NCHW': # inputs = tf.transpose(inputs, perm=(0, 3.txt, 1, 2)) # End_points collect relevant activations for external use. end_points = {} with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse): # Original VGG-16 blocks. net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') end_points['block1'] = net net = slim.max_pool2d(net, [2, 2], scope='pool1') # Block 2. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') end_points['block2'] = net net = slim.max_pool2d(net, [2, 2], scope='pool2') # Block 3.txt. net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') end_points['block3'] = net net = slim.max_pool2d(net, [2, 2], scope='pool3') # Block 4. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') end_points['block4'] = net net = slim.max_pool2d(net, [2, 2], scope='pool4') # Block 5. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') end_points['block5'] = net net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5') # Additional SSD blocks. # Block 6: let's dilate the hell out of it! net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') end_points['block6'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block 7: 1x1 conv. Because the f**k. net = slim.conv2d(net, 1024, [1, 1], scope='conv7') end_points['block7'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). end_point = 'block8' with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block9' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block10' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block11' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net # Prediction and localisations layers. predictions = [] logits = [] localisations = [] for i, layer in enumerate( feat_layers ): #feat_layers=['block4', 'block7', 'block8', 'block9', 'block10', 'block11'], with tf.variable_scope(layer + '_box'): p, l = ssd_multibox_layer(end_points[layer], num_classes, anchor_sizes[i], anchor_ratios[i], normalizations[i]) predictions.append(prediction_fn(p)) logits.append(p) localisations.append(l) ''' p和l是没有进行softmax的,而p预测的是每个框的类别所以要加入一个prediction_fn(softmax) ''' return predictions, localisations, logits, end_points
def text_net(inputs, feat_layers=TextboxNet.default_params.feat_layers, anchor_sizes=TextboxNet.default_params.anchor_sizes, anchor_ratios = TextboxNet.default_params.anchor_ratios, normalizations=TextboxNet.default_params.normalizations, is_training=True, dropout_keep_prob=0.5, reuse=None, scope='text_box_384'): end_points = {} with tf.variable_scope(scope, 'text_box_300', [inputs], reuse=reuse): # 300*300 384*383 # Original VGG-16 blocks. net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') # 300 384 end_points['conv1'] = net net = slim.max_pool2d(net, [2, 2], scope='pool1') # 150 # Block 2. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') # 150 192 end_points['conv2'] = net net = slim.max_pool2d(net, [2, 2], scope='pool2') # 75 # Block 3. net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') # 75 81 end_points['conv3'] = net net = slim.max_pool2d(net, [2, 2], scope='pool3') # 38 # Block 4. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') # 38 40 end_point = 'conv4' end_points[end_point] = net net = slim.max_pool2d(net, [2, 2], scope='pool4') # 19 # Block 5. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') # 19 end_points['conv5'] = net net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5') # 19 # Additional SSD blocks. # Block 6: let's dilate the hell out of it! net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') # 19 end_points['conv6'] = net # Block 7: 1x1 conv. Because the f**k. net = slim.conv2d(net, 1024, [1, 1], scope='conv7') # 19 end_point = 'conv7' end_points[end_point] = net # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 end_point = 'conv8' with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net # 10 end_point = 'conv9' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net # 5 end_point = 'conv10' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1', padding= 'VALID') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net # 3 end_point = 'conv11' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net # end_point = feat_layers[0] with tf.variable_scope(end_point): net_dilation1 = slim.conv2d(end_points[end_point], 128, [3, 3], stride=1, scope='dilation1') # net_dilation2 = custom_layers.pad2d(net, pad=(0, 4)) net_dilation2 = slim.conv2d(end_points[end_point], 128, [1, 9], padding='SAME', stride=1, scope='dilation2') net_dilation3 = slim.conv2d(end_points[end_point], 128, [9, 1], stride=1, padding='SAME', scope='dilation3') # net_dilation3 = custom_layers.pad2d(net_dilation3, pad=(4, 0)) net_inception = tf.concat(values=[net_dilation1, net_dilation2, net_dilation3], axis=3) end_points[end_point] = net_inception end_point= feat_layers[1] with tf.variable_scope(end_point): net_dilation1 = slim.conv2d(end_points[end_point], 1024, [1, 1], stride=1, scope='dilation1') net_dilation2 = slim.conv2d(end_points[end_point], 1024, [1, 7], stride=1, scope='dilation2') # net_dilation2 = custom_layers.pad2d(net_dilation2, pad=(0, 3)) net_dilation3 = slim.conv2d(end_points[end_point], 1024, [7, 1], stride=1, scope='dilation3') # net_dilation3 = custom_layers.pad2d(net_dilation3, pad=(3, 0)) net_inception = tf.concat([net_dilation1, net_dilation2, net_dilation3], axis=3) end_points[end_point] = net_inception end_point = 'conv8' with tf.variable_scope(end_point): net_dilation1 = slim.conv2d(end_points[end_point], 128, [1, 1], stride=1,scope='dilation1') net_dilation2 = slim.conv2d(end_points[end_point], 128, [1, 7], stride=1, scope='dilation2') # net_dilation2 = custom_layers.pad2d(net_dilation2, pad=(0, 3)) net_dilation3 = slim.conv2d(end_points[end_point], 128, [7, 1], stride=1, scope='dilation3') # net_dilation3 = custom_layers.pad2d(net_dilation3, pad=(3, 0)) net_inception = tf.concat([net_dilation1, net_dilation2, net_dilation3], axis=3) end_points[end_point] = net_inception end_point = feat_layers[3] with tf.variable_scope(end_point): net_dilation1 = slim.conv2d(end_points[end_point], 128, [1, 1], stride=1,scope='dilation1') net_dilation2 = slim.conv2d(end_points[end_point], 128, [1, 7], stride=1, scope='dilation2') # net_dilation2 = custom_layers.pad2d(net_dilation2, pad=(0, 3)) net_dilation3 = slim.conv2d(end_points[end_point], 128, [7, 1], stride=1, scope='dilation3') # net_dilation3 = custom_layers.pad2d(net_dilation3, pad=(3, 0)) net_inception = tf.concat([net_dilation1, net_dilation2, net_dilation3], axis=3) end_points[end_point] = net_inception # 5 end_point = 'conv10' with tf.variable_scope(end_point): net_dilation1 = slim.conv2d(end_points[end_point], 128, [1, 1], stride=1, scope='dilation1') net_dilation2 = slim.conv2d(end_points[end_point], 128, [1, 7], stride=1, scope='dilation2') # net_dilation2 = custom_layers.pad2d(net_dilation2, pad=(0, 3)) net_dilation3 = slim.conv2d(end_points[end_point], 128, [7, 1], stride=1, scope='dilation3') # net_dilation3 = custom_layers.pad2d(net_dilation3, pad=(3, 0)) net_inception = tf.concat([net_dilation1, net_dilation2, net_dilation3], axis=3) end_points[end_point] = net_inception # 3 end_point = 'conv11' with tf.variable_scope(end_point): net_dilation1 = slim.conv2d(end_points[end_point], 128, [1, 1], stride=1,scope='dilation1') net_dilation2 = slim.conv2d(end_points[end_point], 128, [1, 5], stride=1, scope='dilation2') # net_dilation2 = custom_layers.pad2d(net_dilation2, pad=(0, 2)) net_dilation3 = slim.conv2d(end_points[end_point], 128, [5, 1], stride=1, scope='dilation3') # net_dilation3 = custom_layers.pad2d(net_dilation3, pad=(2, 0)) net_inception = tf.concat([net_dilation1, net_dilation2, net_dilation3], axis=3) end_points[end_point] = net_inception # 1 # Prediction and localisations layers. predictions = [] logits = [] localisations = [] for i, layer in enumerate(feat_layers): with tf.variable_scope(layer + '_box'): p, loc = text_multibox_layer(layer, end_points[layer], anchor_sizes[i], anchor_ratios[i], normalizations[i]) prediction_fn = slim.softmax predictions.append(prediction_fn(p)) logits.append(p) localisations.append(loc) return predictions, localisations, logits, end_points
def ssd_net(inputs, num_classes=SSDNet.default_params.num_classes, feat_layers=SSDNet.default_params.feat_layers, anchor_sizes=SSDNet.default_params.anchor_sizes, anchor_ratios=SSDNet.default_params.anchor_ratios, normalizations=SSDNet.default_params.normalizations, is_training=True, dropout_keep_prob=0.5, prediction_fn=slim.softmax, reuse=None, scope='ssd_512_vgg'): """SSD net definition. """ # End_points collect relevant activations for external use. end_points = {} with tf.variable_scope(scope, 'ssd_512_vgg', [inputs], reuse=reuse): # Original VGG-16 blocks. net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') end_points['block1'] = net net = slim.max_pool2d(net, [2, 2], scope='pool1') # Block 2. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') end_points['block2'] = net net = slim.max_pool2d(net, [2, 2], scope='pool2') # Block 3. net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') end_points['block3'] = net net = slim.max_pool2d(net, [2, 2], scope='pool3') # Block 4. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') end_points['block4'] = net net = slim.max_pool2d(net, [2, 2], scope='pool4') # Block 5. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') end_points['block5'] = net net = slim.max_pool2d(net, [3, 3], 1, scope='pool5') # Additional SSD blocks. # Block 6: let's dilate the hell out of it! net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') end_points['block6'] = net # Block 7: 1x1 conv. Because the f**k. net = slim.conv2d(net, 1024, [1, 1], scope='conv7') end_points['block7'] = net # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). end_point = 'block8' with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block9' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block10' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block11' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block12' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [4, 4], scope='conv4x4', padding='VALID') # Fix padding to match Caffe version (pad=1). # pad_shape = [(i-j) for i, j in zip(layer_shape(net), [0, 1, 1, 0])] # net = tf.slice(net, [0, 0, 0, 0], pad_shape, name='caffe_pad') end_points[end_point] = net # Prediction and localisations layers. predictions = [] logits = [] localisations = [] for i, layer in enumerate(feat_layers): with tf.variable_scope(layer + '_box'): p, l = ssd_vgg_300.ssd_multibox_layer(end_points[layer], num_classes, anchor_sizes[i], anchor_ratios[i], normalizations[i]) predictions.append(prediction_fn(p)) logits.append(p) localisations.append(l) return predictions, localisations, logits, end_points
def text_net(inputs, feat_layers=TextboxNet.default_params.feat_layers, normalizations=TextboxNet.default_params.normalizations, is_training=True, dropout_keep_prob=0.5, reuse=None, scope='text_box_300'): end_points = {} with tf.variable_scope(scope, 'text_box_300', [inputs], reuse=reuse): # Original VGG-16 blocks. net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') end_points['conv1'] = net net = slim.max_pool2d(net, [2, 2], scope='pool1') # Block 2. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') end_points['conv2'] = net # 150,150 128 net = slim.max_pool2d(net, [2, 2], scope='pool2') # Block 3. # 75 75 256 net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') end_points['conv3'] = net net = slim.max_pool2d(net, [2, 2], scope='pool3', padding='SAME') # Block 4. # 38 38 512 net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') end_points['conv4'] = net net = slim.max_pool2d(net, [2, 2], scope='pool4') # Block 5. # 19 19 512 net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') end_points['conv5'] = net net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5', padding='SAME') # Additional SSD blocks. # Block 6: let's dilate the hell out of it! net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') end_points['conv6'] = net # Block 7: 1x1 conv. Because the f**k. net = slim.conv2d(net, 1024, [1, 1], scope='conv7') end_points['conv7'] = net # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). end_point = 'conv8' with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'conv9' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'conv10' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'global' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net # Prediction and localisations layers. predictions = [] logits = [] localisations = [] for i, layer in enumerate(feat_layers): with tf.variable_scope(layer + '_box'): p, l = text_multibox_layer(layer, end_points[layer], normalizations[i]) #predictions.append(prediction_fn(p)) logits.append(p) localisations.append(l) return localisations, logits, end_points
def text_net(inputs, feat_layers=TextboxNet.default_params.feat_layers, anchor_sizes=TextboxNet.default_params.anchor_sizes, anchor_ratios=TextboxNet.default_params.anchor_ratios, normalizations=TextboxNet.default_params.normalizations, is_training=True, dropout_keep_prob=0.5, reuse=None, scope='text_box_384', update_feat_shapes=False): end_points = {} with tf.compat.v1.variable_scope(scope, 'text_box_300', [inputs], reuse=reuse): # 300*300 384*383 # Original VGG-16 blocks. net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') # 300 384 end_points['conv1'] = net net = slim.max_pool2d(net, [2, 2], scope='pool1') # 150 # Block 2. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') # 150 192 end_points['conv2'] = net net = slim.max_pool2d(net, [2, 2], scope='pool2') # 75 # Block 3. net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') # 75 81 end_points['conv3'] = net net = slim.max_pool2d(net, [2, 2], scope='pool3') # 38 # Block 4. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') # 38 40 end_point = 'conv4' end_points[end_point] = net net = slim.max_pool2d(net, [2, 2], scope='pool4') # 19 # Block 5. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') # 19 end_points['conv5'] = net net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5') # 19 # Additional SSD blocks. # Block 6: let's dilate the hell out of it! net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') # 19 end_points['conv6'] = net # Block 7: 1x1 conv. Because the f**k. net = slim.conv2d(net, 1024, [1, 1], scope='conv7') # 19 end_point = 'conv7' end_points[end_point] = net # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 end_point = 'conv8' with tf.compat.v1.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net # 10 end_point = 'conv9' with tf.compat.v1.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net # 5 end_point = 'conv10' with tf.compat.v1.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net # 3 end_point = 'conv11' with tf.compat.v1.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net # # Prediction and localisations layers. predictions = [] logits = [] localisations = [] shape_list = [] for i, layer in enumerate(feat_layers): with tf.compat.v1.variable_scope(layer + '_box'): p, loc, shape = text_multibox_layer(layer, end_points[layer], anchor_sizes[i], anchor_ratios[i], normalizations[i]) prediction_fn = slim.softmax predictions.append(prediction_fn(p)) logits.append(p) localisations.append(loc) shape_list.append(shape) if update_feat_shapes is True: return predictions, localisations, logits, end_points, shape_list else: return predictions, localisations, logits, end_points
def ssd_net(inputs, num_classes=SSDNet.default_params.num_classes, feat_layers=SSDNet.default_params.feat_layers, anchor_sizes=SSDNet.default_params.anchor_sizes, anchor_ratios=SSDNet.default_params.anchor_ratios, normalizations=SSDNet.default_params.normalizations, is_training=True, dropout_keep_prob=0.5, prediction_fn=slim.softmax, reuse=None, scope='ssd_300_vgg'): """SSD net definition. SSD网络的定义 ssd_net(输入,类的数量,特征层的名称列表,anchor的实际大小,anchor的长宽比,正则化,是否训练,扔出网络的概率 ,预测,重用,命名绑定) """ # if data_format == 'NCHW': # inputs = tf.transpose(inputs, perm=(0, 3, 1, 2)) # End_points collect relevant activations for external use. end_points = {} with tf.variable_scope( scope, 'ssd_300_vgg', [inputs], reuse=reuse): #命名空间管理:创建命名空间,reuse=none\false时只能在该命名空间创建变量 # Original VGG-16 blocks. net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') #conv1卷积 end_points['block1'] = net #300X300 net = slim.max_pool2d(net, [2, 2], scope='pool1') # Block 2. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') end_points['block2'] = net #150x150 net = slim.max_pool2d(net, [2, 2], scope='pool2') # Block 3. net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') end_points['block3'] = net #75x75 net = slim.max_pool2d(net, [2, 2], scope='pool3') # Block 4. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') end_points['block4'] = net #38x38 net = slim.max_pool2d(net, [2, 2], scope='pool4') # Block 5. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') end_points['block5'] = net #19x19 net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5') # Additional SSD blocks. # Block 6: let's dilate the hell out of it! net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') #13x13的感受野 end_points['block6'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block 7: 1x1 conv. Because the f**k. net = slim.conv2d(net, 1024, [1, 1], scope='conv7') end_points['block7'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). end_point = 'block8' with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net #10x10 end_point = 'block9' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net #5x5 end_point = 'block10' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net #3X3 end_point = 'block11' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net #1X1 # Prediction and localisations layers. 预测和位置层 predictions = [] logits = [] localisations = [] for i, layer in enumerate(feat_layers): with tf.variable_scope(layer + '_box'): p, l = ssd_multibox_layer(end_points[layer], num_classes, anchor_sizes[i], anchor_ratios[i], normalizations[i]) predictions.append(prediction_fn(p)) logits.append(p) #对每个特征图类别的网络 localisations.append(l) #对每个特征图测量位置的网络 return predictions, localisations, logits, end_points
def ssd_net(inputs, num_classes=SSDNet.default_params.num_classes, feat_layers=SSDNet.default_params.feat_layers, anchor_sizes=SSDNet.default_params.anchor_sizes, anchor_ratios=SSDNet.default_params.anchor_ratios, normalizations=SSDNet.default_params.normalizations, is_training=True, dropout_keep_prob=0.5, prediction_fn=slim.softmax, reuse=None, scope='ssd_300_mobilenetv1'): """SSD net definition. """ # if data_format == 'NCHW': # inputs = tf.transpose(inputs, perm=(0, 3, 1, 2)) # End_points collect relevant activations for external use. end_points = {} min_depth = 32 depth_multiplier = 1.0 with tf.variable_scope(scope, 'ssd_300_mobilenetv1', [inputs], reuse=reuse): input_shape = inputs.get_shape().as_list() if len(input_shape) != 4: raise ValueError('Invalid input tensor rank, expected 4, was: %d' % len(input_shape)) with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope(is_training=is_training)): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net, end_points = mobilenet_v1.mobilenet_v1_base(inputs, scope='MobilenetV1', min_depth=min_depth, depth_multiplier=depth_multiplier, conv_defs=None) ''' # Additional SSD blocks. # Block 6: let's dilate the hell out of it! end_point = 'block13' with tf.variable_scope(end_point) net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='atrous_conv') end_points['block13_atrous'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block 7: 1x1 conv. Because the f**k. net = slim.conv2d(net, 1024, [1, 1], scope='conv1x1') end_points['block13'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) ''' # Block 14/15/16/17: 1x1 and 3x3 convolutions stride 2 (except lasts). end_point = 'block14' with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], biases_initializer=None, trainable=is_training, scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], biases_initializer=None, trainable=is_training, stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block15' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], biases_initializer=None, trainable=is_training, scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], biases_initializer=None, trainable=is_training, stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block16' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], biases_initializer=None, trainable=is_training, scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], biases_initializer=None, trainable=is_training, scope='conv3x3', padding='VALID') end_points[end_point] = net ''' end_point = 'block17' with tf.variable_scope(end_point): net = slim.conv2d(net, 64, [1, 1], biases_initializer=None, trainable=is_training, scope='conv1x1') net = slim.conv2d(net, 128, [3, 3], biases_initializer=None, trainable=is_training, scope='conv3x3', padding='VALID') end_points[end_point] = net ''' # Prediction and localisations layers. predictions = [] logits = [] localisations = [] for i, layer in enumerate(feat_layers): with tf.variable_scope(layer + '_box'): p, l = ssd_multibox_layer(end_points[layer], num_classes, anchor_sizes[i], anchor_ratios[i], normalizations[i], is_training=is_training) predictions.append(prediction_fn(p)) logits.append(p) localisations.append(l) # end_points['logits'] = logits # end_points['predictions'] = predictions # end_points['localisations'] = localisations return predictions, localisations, logits, end_points
def mobilenets(inputs, num_classes=1000, width_multiplier=1.0, is_training=True, dropout_keep_prob=0.5, pad_logits=True, scope='MobileNets'): """MobileNets implementation. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ # MobileNets kernel size and padding (for layers with stride > 1). kernel_size = [3, 3] padding = [(kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2] def mobilenet_block(net, num_out_channels, stride=[1, 1], scope=None): """Basic MobileNet block combining: - depthwise conv + BN + relu - 1x1 conv + BN + relu """ with tf.variable_scope(scope, 'block', [net]) as sc: num_out_channels = int(num_out_channels * width_multiplier) if stride[0] == 1 and stride[1] == 1: # Depthwise convolution with stride=1 net = custom_layers.depthwise_convolution2d(net, kernel_size, depth_multiplier=1, stride=stride, scope='conv_dw') else: # Mimic CAFFE padding if stride > 1 => usually better accuracy. net = custom_layers.pad2d(net, pad=padding) net = custom_layers.depthwise_convolution2d(net, kernel_size, padding='VALID', depth_multiplier=1, stride=stride, scope='conv_dw') # Pointwise convolution. net = slim.conv2d(net, num_out_channels, [1, 1], scope='conv_pw') return net with tf.variable_scope(scope, 'MobileNets', [inputs]) as sc: end_points = {} # First full convolution... net = custom_layers.pad2d(inputs, pad=padding) net = slim.conv2d(net, 32, kernel_size, stride=[2, 2], padding='VALID', scope='conv1') # net = slim.conv2d(inputs, 32, kernel_size, stride=[2, 2], # padding='SAME', scope='conv1') # Then, MobileNet blocks! net = mobilenet_block(net, 64, scope='block2') net = mobilenet_block(net, 128, stride=[2, 2], scope='block3') net = mobilenet_block(net, 128, scope='block4') net = mobilenet_block(net, 256, stride=[2, 2], scope='block5') net = mobilenet_block(net, 256, scope='block6') net = mobilenet_block(net, 512, stride=[2, 2], scope='block7') # Intermediate blocks... for i in range(5): net = mobilenet_block(net, 512, scope='block%i' % (i + 8)) # Final blocks. net = mobilenet_block(net, 1024, stride=[2, 2], scope='block13') net = mobilenet_block(net, 1024, scope='block14') # Spatial pooling + fully connected layer. net = custom_layers.spatial_mean(net, keep_dims=True, scope='spatial_mean14') net = slim.conv2d(net, 1000, [1, 1], activation_fn=None, normalizer_fn=None, normalizer_params=None, biases_initializer=tf.zeros_initializer(), scope='conv_fc15') net = custom_layers.spatial_squeeze(net) # Logits padding: get everyone to the same number of classes. if pad_logits: net = custom_layers.pad_logits(net, pad=(num_classes - 1000, 0)) return net, end_points
def ssd_net(inputs, num_classes=SSDNet.default_params.num_classes, feat_layers=SSDNet.default_params.feat_layers, anchor_sizes=SSDNet.default_params.anchor_sizes, anchor_ratios=SSDNet.default_params.anchor_ratios, normalizations=SSDNet.default_params.normalizations, is_training=True, dropout_keep_prob=0.5, prediction_fn=slim.softmax, reuse=None, scope='ssd_300_vgg'): """SSD net definition. """ #if data_format == 'NCHW': #inputs = tf.transpose(inputs, perm=(0, 3, 1, 2)) print("====", inputs.shape) # End_points collect relevant activations for external use. end_points = {} with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse): #original ''' net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') end_points['block1'] = net net = slim.max_pool2d(net, [2, 2], scope='pool1') # Block 2. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') end_points['block2'] = net net = slim.max_pool2d(net, [2, 2], scope='pool2') # Block 3. net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') end_points['block3'] = net net = slim.max_pool2d(net, [2, 2], scope='pool3') # Block 4. net4 = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') # 38x38 #end_points['block4'] = net net = slim.max_pool2d(net4, [2, 2], scope='pool4') # Block 5. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') end_points['block5'] = net net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5') # Additional SSD blocks. # Block 6: let's dilate the hell out of it! net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') end_points['block6'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block 7: 1x1 conv. Because the f**k. net = slim.conv2d(net, 1024, [1, 1], scope='conv7') #end_points['block7'] = net net7 = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). end_point = 'block8' with tf.variable_scope(end_point): net = slim.conv2d(net7, 256, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net8 = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') #end_points[end_point] = net end_point = 'block9' with tf.variable_scope(end_point): net = slim.conv2d(net8, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net9 = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') #end_points[end_point] = net end_point = 'block10' with tf.variable_scope(end_point): net = slim.conv2d(net9, 128, [1, 1], scope='conv1x1') net10 = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') #end_points[end_point] = net end_point = 'block11' with tf.variable_scope(end_point): net11 = slim.conv2d(net10, 128, [1, 1], scope='conv1x1') net11 = slim.conv2d(net11, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net11 #print("================net11 shape", net11.shape) # NCHW ################################################################################# end_point = 'block10be' with tf.variable_scope(end_point): net10_a = tf.transpose(net11, perm=(0, 2, 3, 1))#nchw net10_a = tf.image.resize_nearest_neighbor(net10_a, (3,3)) net10_a = tf.transpose(net10_a, perm=(0, 3, 1, 2))#nchw net10_a = slim.conv2d(net10_a, 256, [3,3], scope='pre10_3x3') net10_b = slim.conv2d(net10, 256, [1, 1], scope='pre10_1x1') net10_o = net10_a + net10_b end_points[end_point] = net10_o #3 end_point = 'block9be' with tf.variable_scope(end_point): net9_a = tf.transpose(net10_o, perm=(0, 2, 3, 1))#nchw net9_a = tf.image.resize_nearest_neighbor(net9_a, (5,5)) net9_a = tf.transpose(net9_a, perm=(0, 3, 1, 2))#nchw net9_a = slim.conv2d(net9_a, 256, [3,3], scope='pre9_3x3') net9_b = slim.conv2d(net9, 256, [1, 1], scope='pre9_1x1') net9_o = net9_a + net9_b end_points[end_point] = net9_o#5 end_point = 'block8be' with tf.variable_scope(end_point): net8_a = tf.transpose(net9_o, perm=(0, 2, 3, 1))#nchw net8_a = tf.image.resize_nearest_neighbor(net8_a, (10,10)) net8_a = tf.transpose(net8_a, perm=(0, 3, 1, 2))#nchw net8_a = slim.conv2d(net8_a, 512, [3,3], padding='SAME', scope='pre8_3x3') net8_b = slim.conv2d(net8, 512, [1, 1], scope='pre8_1x1')#10 net8_o = net8_a + net8_b end_points[end_point] = net8_o#10 end_point = 'block7be' with tf.variable_scope(end_point): net7_a = tf.transpose(net8_o, perm=(0, 2, 3, 1))#nchw net7_a = tf.image.resize_nearest_neighbor(net7_a, (19, 19))# net7_a = tf.transpose(net7_a, perm=(0, 3, 1, 2))#nchw net7_a = slim.conv2d(net7_a, 1024, [3, 3], padding='SAME', scope='pre7_3x3') net7_b = slim.conv2d(net7, 1024, [1, 1], scope='pre7_1x1') net7_o = net7_a + net7_b end_points[end_point] = net7_o end_point = 'block4be' with tf.variable_scope(end_point): net4_a = tf.transpose(net7_o, perm=(0, 2, 3, 1))#nchw net4_a = tf.image.resize_nearest_neighbor(net4_a, (38, 38))# net4_a = tf.transpose(net4_a, perm=(0, 3, 1, 2))#nchw net4_a = slim.conv2d(net4_a, 512, [3, 3], padding='SAME', scope='pre4_3x3') net4_b = slim.conv2d(net4, 512, [1, 1], scope='pre4_1x1') #print("asdfasdfasdf", net4_a.shape, net4_b.shape) net4_o = net4_a + net4_b#38 end_points[end_point] = net4_o ''' net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') end_points['block1'] = net net = slim.max_pool2d(net, [2, 2], scope='pool1') # Block 2. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') end_points['block2'] = net net = slim.max_pool2d(net, [2, 2], scope='pool2') # Block 3. net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') end_points['block3'] = net net = slim.max_pool2d(net, [2, 2], scope='pool3') # Block 4. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') end_points['block4'] = net net = slim.max_pool2d(net, [2, 2], scope='pool4') # Block 5. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') end_points['block5'] = net net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5') # Additional SSD blocks. # Block 6: let's dilate the hell out of it! net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') end_points['block6'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block 7: 1x1 conv. Because the f**k. net = slim.conv2d(net, 1024, [1, 1], scope='conv7') end_points['block7'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). end_point = 'block8' with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block9' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block10' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block11' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net # Prediction and localisations layers. predictions = [] logits = [] localisations = [] for i, layer in enumerate(feat_layers): with tf.variable_scope(layer + '_box'): p, l = ssd_multibox_layer(end_points[layer], num_classes, anchor_sizes[i], anchor_ratios[i], normalizations[i]) predictions.append(prediction_fn(p)) logits.append(p) localisations.append(l) return predictions, localisations, logits, end_points
def text_net(inputs, feat_layers=TextboxNet.default_params.feat_layers, anchor_sizes=TextboxNet.default_params.anchor_sizes, anchor_ratios=TextboxNet.default_params.anchor_ratios, normalizations=TextboxNet.default_params.normalizations, is_training=True, dropout_keep_prob=0.5, reuse=None, scope='text_box_384', update_feat_shapes=False): """ Define the backbone (13 original vgg layers + 10 extra conv layers to extract multi-scale feature maps form as SSD) of the textboxes and the neck net -- 6 textbox layers. :param inputs: input image size :param feat_layers: feature map which connect to the textbox layer :param anchor_sizes: multi-scale anchor sizes :param anchor_ratios: multi-scale anchor aspect ratios [2.0, 1. / 2, 3.0, 1. / 3, 4.0, 1. / 4, 5., 1. / 5] :param normalizations: :param is_training: train or not :param dropout_keep_prob: :param reuse: :param scope: :param update_feat_shapes: :return: [predictions, localisations, logits, end_points, shape_list(if update_feat_shapes=True)] """ # End_points collect relevant activations for external use. end_points = {} with tf.variable_scope(scope, 'text_box_300', [inputs], reuse=reuse): # 300*300 384*384 ###################################### # 前五个 Blocks,首先照搬 VGG16 架构 # # 注意这里使用 end_points 标注中间结果 # ###################################### # ——————————————————Original VGG-16 blocks (total 13 conv layers)——————————————————————— # Block 1. net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') # 300 384 end_points['conv1'] = net net = slim.max_pool2d(net, [2, 2], scope='pool1') # 150 # Block 2. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') # 150 192 end_points['conv2'] = net net = slim.max_pool2d(net, [2, 2], scope='pool2') # 75 # Block 3. net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') # 75 81 end_points['conv3'] = net net = slim.max_pool2d(net, [2, 2], scope='pool3') # 38 # Block 4. end_point = 'conv4' net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') # 38 40 end_points[end_point] = net net = slim.max_pool2d(net, [2, 2], scope='pool4') # 19 # Block 5. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') # 19 end_points['conv5'] = net net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5') # 19 Pooling size 2 -> 3 #################################### # 后六个 Blocks,使用额外卷积层 # #################################### # ————————————Additional SSD blocks.—————————————————————— # Block 6: let's dilate the hell out of it! dilation -> 6 net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') # 19 end_points['conv6'] = net # Block 7: 1x1 conv. Because the f**k. end_point = 'conv7' net = slim.conv2d(net, 1024, [1, 1], scope='conv7') # 19 end_points[end_point] = net # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). end_point = 'conv8' with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net # 10 end_point = 'conv9' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net # 5 end_point = 'conv10' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net # 3 end_point = 'conv11' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net # ###################################### # 每个中间层 end_points 返回中间结果 # # 将各层预测结果存入列表,返回给优化函数 # ###################################### # Prediction and localisations layers. predictions = [] logits = [] localisations = [] shape_list = [] # feat_layers=['conv4', 'conv7', 'conv8', 'conv9', 'conv10', 'conv11'] for i, layer in enumerate(feat_layers): with tf.variable_scope(layer + '_box'): cls, loc, shape = text_multibox_layer(layer, end_points[layer], anchor_sizes[i], anchor_ratios[i], normalizations[i]) prediction_fn = slim.softmax # Prediction of conference and bbox location of each textbox layer. predictions.append(prediction_fn(cls)) logits.append(cls) localisations.append(loc) shape_list.append(shape) if update_feat_shapes is True: return predictions, localisations, logits, end_points, shape_list else: return predictions, localisations, logits, end_points
def ssd_net(inputs, num_classes=SSDNet.default_params.num_classes, feat_layers=SSDNet.default_params.feat_layers, anchor_sizes=SSDNet.default_params.anchor_sizes, anchor_ratios=SSDNet.default_params.anchor_ratios, normalizations=SSDNet.default_params.normalizations, is_training=True, dropout_keep_prob=0.5, prediction_fn=slim.softmax, reuse=None, scope='ssd_300_vgg'): """ SSD net definition. """ end_points = {} with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse): # Original VGG-16 blocks. # Block 1. net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') end_points['block1'] = net # shape = (batch_size, 300, 300, 64) net = slim.max_pool2d( net, [2, 2], scope='pool1') # shape = (batch_size, 150, 150, 64) # Block 2. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') end_points['block2'] = net # shape=(2, 150, 150, 128) net = slim.max_pool2d(net, [2, 2], scope='pool2') # shape=(2, 75, 75, 128) # Block 3. net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') end_points['block3'] = net # shape=(2, 75, 75, 256) net = slim.max_pool2d( net, [2, 2], scope='pool3') # shape=(2, 38, 38, 256), default padding='VALID' # Block 4. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') end_points['block4'] = net # shape=(2, 38, 38, 256) net = slim.max_pool2d(net, [2, 2], scope='pool4') # shape=(2, 19, 19, 512) # Block 5. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') end_points['block5'] = net # shape=(2, 19, 19, 512) net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5') # shape=(2, 19, 19, 512) # Additional SSD blocks. # Block 6: let's dilate the hell out of it! net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') end_points['block6'] = net # shape=(2, 19, 19, 1024) net = tf.layers.dropout( net, rate=dropout_keep_prob, training=is_training) # shape=(2, 16, 16, 1024) # Block 7: 1x1 conv. Because the f**k. net = slim.conv2d(net, 1024, [1, 1], scope='conv7') end_points['block7'] = net # shape=(2, 19, 19, 1024) net = tf.layers.dropout( net, rate=dropout_keep_prob, training=is_training) # shape=(2, 19, 19, 1024) # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). end_point = 'block8' with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') # shape=(2, 19, 19, 256) net = custom_layers.pad2d(net, pad=(1, 1)) # shape=(2, 21, 21, 256) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') # shape=(2, 10, 10, 512) end_points[end_point] = net end_point = 'block9' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') # shape=(2, 10, 10, 128) net = custom_layers.pad2d(net, pad=(1, 1)) # shape=(2, 12, 12, 128) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') # shape=(2, 5, 5, 256) end_points[end_point] = net end_point = 'block10' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') # shape=(2, 5, 5, 128) net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') # shape=(2, 3, 3, 256) end_points[end_point] = net end_point = 'block11' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') # shape=(2, 3, 3, 128) net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') # shape=(2, 1, 1, 256) end_points[end_point] = net # Prediction and localisations layers. predictions = [] logits = [] localisations = [] for i, layer in enumerate(feat_layers): with tf.variable_scope(layer + '_box'): p, l = ssd_multibox_layer(end_points[layer], num_classes, anchor_sizes[i], anchor_ratios[i], normalizations[i]) predictions.append(prediction_fn(p)) logits.append(p) localisations.append(l) return predictions, localisations, logits, end_points
def ssd_net(inputs, num_classes=SSDNet.default_params.num_classes, feat_layers=SSDNet.default_params.feat_layers, anchor_sizes=SSDNet.default_params.anchor_sizes, anchor_ratios=SSDNet.default_params.anchor_ratios, normalizations=SSDNet.default_params.normalizations, is_training=True, dropout_keep_prob=0.5, prediction_fn=slim.softmax, reuse=None, scope='ssd_300_vgg'): """SSD net definition. """ # if data_format == 'NCHW': # inputs = tf.transpose(inputs, perm=(0, 3, 1, 2)) # End_points collect relevant activations for external use. end_points = {} # 定义ssd网络,前半部分是vgg-16网络 with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse): # Original VGG-16 blocks. # 原始vgg-16 # 两个卷积层、一个最大池化层 net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') end_points['block1'] = net net = slim.max_pool2d(net, [2, 2], scope='pool1') # Block 2. # 特征层1:block2,两个卷积层、1个最大池化层 net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') end_points['block2'] = net net = slim.max_pool2d(net, [2, 2], scope='pool2') # Block 3. # 特征层2:block3,三个卷积层、1个最大池化层 net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') end_points['block3'] = net net = slim.max_pool2d(net, [2, 2], scope='pool3') # Block 4. # 特征层3:block4,三个卷积层、1个最大池化层 net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') end_points['block4'] = net net = slim.max_pool2d(net, [2, 2], scope='pool4') # Block 5. # 特征层4:block5,三个卷积层、1个最大池化层 net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') end_points['block5'] = net net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5') # Additional SSD blocks. # ssd相比起vgg多加的层 # Block 6: let's dilate the hell out of it! # 特征层5:block6,一个卷积层、1个最大池化层,使用了dropout训练技巧 net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') end_points['block6'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block 7: 1x1 conv. Because the f**k. # 特征层6:block7,一个卷积层(1x1卷积核,相当于在3个图像通道做叠加平均)、1个最大池化层,使用了dropout训练技巧 net = slim.conv2d(net, 1024, [1, 1], scope='conv7') end_points['block7'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). # block8-block11都是特征层 # padding='VALID'经过filter kernel之后,尺寸可能会变小;padding='SAME'表示经过filter kernel之后,尺寸保持跟输入不变 end_point = 'block8' with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block9' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block10' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block11' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net # Prediction and localisations layers. # 预测层与定位层 predictions = [] logits = [] localisations = [] for i, layer in enumerate(feat_layers): with tf.variable_scope(layer + '_box'): p, l = ssd_multibox_layer(end_points[layer], num_classes, anchor_sizes[i], anchor_ratios[i], normalizations[i]) predictions.append(prediction_fn(p)) logits.append(p) localisations.append(l) return predictions, localisations, logits, end_points
def ssd_net(inputs, num_classes=SSDNet.default_params.num_classes, feat_layers=SSDNet.default_params.feat_layers, anchor_sizes=SSDNet.default_params.anchor_sizes, anchor_ratios=SSDNet.default_params.anchor_ratios, normalizations=SSDNet.default_params.normalizations, is_training=True, dropout_keep_prob=0.5, prediction_fn=slim.softmax, reuse=None, scope='ssd_512_vgg'): """SSD net definition. """ # End_points collect relevant activations for external use. end_points = {} with tf.variable_scope(scope, 'ssd_512_vgg', [inputs], reuse=reuse): # Original VGG-16 blocks. net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') end_points['block1'] = net # print("====net.shape = {}".format(net)) net = slim.max_pool2d(net, [2, 2], scope='pool1') # Block 2. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') end_points['block2'] = net net = slim.max_pool2d(net, [2, 2], scope='pool2') # Block 3.txt. net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') end_points['block3'] = net net = slim.max_pool2d(net, [2, 2], scope='pool3') # Block 4. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') end_points['block4'] = net net = slim.max_pool2d(net, [2, 2], scope='pool4') # Block 5. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') end_points['block5'] = net net = slim.max_pool2d(net, [3, 3], 1, scope='pool5') # Additional SSD blocks. # Block 6: let's dilate the hell out of it! net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') end_points['block6'] = net # Block 7: 1x1 conv. Because the f**k. net = slim.conv2d(net, 1024, [1, 1], scope='conv7') end_points['block7'] = net if IS_FPN: stride_b8_to_b12 = 1 else: stride_b8_to_b12 = 2 # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). end_point = 'block8' with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block9' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=stride_b8_to_b12, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block10' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=stride_b8_to_b12, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block11' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=stride_b8_to_b12, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'concat' with tf.variable_scope(end_point): high_feature_list = [] for i in range(9, 12): high_feature_list.append(end_points['block' + str(i)]) high_feature = tf.concat(high_feature_list, 1) end_points[end_point] = high_feature end_point = 'block4_concat' with tf.variable_scope(end_point): net_block4 = slim.conv2d(end_points['concat'], 128, [1, 1], scope='conv1x1') # print("net_block4 ========{}".format(net_block4)) net_block4 = tf.layers.conv2d_transpose( net_block4, 512, kernel_size=3, strides=(4, 4), data_format='channels_first') # print("net_block4 ========{}".format(net_block4)) end_points['block4'] += net_block4 end_point = 'block7_concat' with tf.variable_scope(end_point): net_block7 = slim.conv2d(end_points['concat'], 128, [1, 1], scope='conv1x1') # print("net_block4 ========{}".format(net_block4)) net_block7 = tf.layers.conv2d_transpose( net_block7, 1024, kernel_size=1, strides=(2, 2), data_format='channels_first') # print("net_block4 ========{}".format(net_block4)) end_points['block7'] += net_block7 end_point = 'block8_concat' with tf.variable_scope(end_point): net_block8 = slim.conv2d(end_points['concat'], 512, [1, 1], scope='conv1x1') end_points['block8'] += net_block8 # with tf.variable_scope(end_point): # print("=debug===========high_feature = {}".format(high_feature)) # for i in range(1,12): # print("=debug===== "+str(i)+" = {}".format(end_points['block'+str(i)])) # Prediction and localisations layers. predictions_pest = [] logits_pest = [] predictions = [] logits = [] localisations = [] for i, layer in enumerate(feat_layers): with tf.variable_scope(layer + '_box'): # print("=debug===== "+str(i)+" = {}".format(end_points[layer])) # 这里获取每一个anchor的预测值 pest, p, l = ssd_vgg_300.ssd_multibox_layer( end_points[layer], num_classes, anchor_sizes[i], anchor_ratios[i], normalizations[i]) predictions_pest.append(prediction_fn(pest)) logits_pest.append(pest) predictions.append(prediction_fn(p)) logits.append(p) localisations.append(l) return predictions_pest, logits_pest, predictions, localisations, logits, end_points
def ssd_net(inputs, num_classes=SSDNet.default_params.num_classes, feat_layers=SSDNet.default_params.feat_layers, anchor_sizes=SSDNet.default_params.anchor_sizes, anchor_ratios=SSDNet.default_params.anchor_ratios, normalizations=SSDNet.default_params.normalizations, is_training=True, dropout_keep_prob=0.5, prediction_fn=slim.softmax, reuse=None, scope='ssd_512_vgg', DSSD_FLAG=False): """SSD net definition. """ # End_points collect relevant activations for external use. end_points = {} if inputs.shape[2] == inputs.shape[3]: mode = 'bnwh' else: mode = 'bwhn' with tf.variable_scope(scope, 'ssd_512_vgg', [inputs], reuse=reuse): # Original VGG-16 blocks. net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') end_points['block1'] = net net = slim.max_pool2d(net, [2, 2], scope='pool1') # Block 2. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') end_points['block2'] = net net = slim.max_pool2d(net, [2, 2], scope='pool2') # Block 3. net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') end_points['block3'] = net net = slim.max_pool2d(net, [2, 2], scope='pool3') # Block 4. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') end_points['block4'] = net net = slim.max_pool2d(net, [2, 2], scope='pool4') # Block 5. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') end_points['block5'] = net net = slim.max_pool2d(net, [3, 3], 1, scope='pool5') # Additional SSD blocks. # Block 6: let's dilate the hell out of it! net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') end_points['block6'] = net # Block 7: 1x1 conv. Because the f**k. net = slim.conv2d(net, 1024, [1, 1], scope='conv7') end_points['block7'] = net # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). end_point = 'block8' with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block9' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block10' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block11' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block12' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [4, 4], scope='conv4x4', padding='VALID') # Fix padding to match Caffe version (pad=1). # pad_shape = [(i-j) for i, j in zip(layer_shape(net), [0, 1, 1, 0])] # net = tf.slice(net, [0, 0, 0, 0], pad_shape, name='caffe_pad') end_points[end_point] = net # Prediction and localisations layers. # rever_feat_layers = list(reversed(feat_layers)) # for i, l in enumerate(rever_feat_layers): # if i == 0: continue # l_ = rever_feat_layers[i - 1] # # end_points[l] = tf.concat([upbilinear([end_points[l_], end_points[l]], name=l_), end_points[l]],axis=1) # with tf.variable_scope("fpn11"): # # # end_points['block11'] = tf.add(upbilinear([end_points['block12'], end_points['block11']],name ="up_11",mode=mode), # end_points['block11'],name= "fpn_block11") # # with tf.variable_scope("fpn10"): # # end_points['block10'] = tf.add(upbilinear([end_points['block11'],end_points['block10']],name ="up_10",mode=mode), # end_points['block10'], # name="fpn_block10") # with tf.variable_scope("fpn9"): # b9 = slim.conv2d(end_points['block10'], 256, [1, 1], scope='9conv1x1') # b9_ = slim.conv2d(end_points['block9'], 256, [1, 1], scope='9_conv1x1') # end_points['block9'] = tf.add(upbilinear([end_points['block10'],end_points['block9']],name ="up_9",mode=mode), # end_points['block9'], # name="fpn_block9") # with tf.variable_scope("fpn8"): # b8 = slim.conv2d(end_points['block9'], 512, [1, 1], scope='8conv1x1') # # end_points['block8'] = tf.add(upbilinear([b8, end_points['block8']],name ="up_8",mode=mode), end_points['block8'], # name="fpn_block8") # with tf.variable_scope("fpn7"): # b7 = slim.conv2d(end_points['block8'], 1024, [1, 1], scope='7conv1x1') # # end_points['block7'] = tf.add(upbilinear([b7, end_points['block7']],name ="up_7",mode=mode), end_points['block7'], # name="fpn_block7") # with tf.variable_scope("fpn4"): # b4 = slim.conv2d(end_points['block7'], 512, [1, 1], scope='4conv1x1') # # end_points['block4'] = tf.add(upbilinear([b4, end_points['block4']],name ="up_4",mode=mode), end_points['block4'], # name="fpn_block4") if DSSD_FLAG: with tf.variable_scope("dssd11"): de_12 = slim.conv2d_transpose(end_points['block12'], 512, [3, 3], stride=2, scope="de_12") con_12 = slim.conv2d(de_12, 512, [3, 3], scope='conv_12') bn_12 = slim.batch_norm(con_12, is_training=is_training) con_11 = slim.conv2d(end_points["block11"], 512, [3, 3], scope="conv11") bn_11 = slim.batch_norm(con_11, is_training=is_training) relu_11 = tf.nn.relu(bn_11) con_11 = slim.conv2d(relu_11, 512, [3, 3], scope="conv11_2") bn_11 = slim.batch_norm(con_11, is_training=is_training) end_points["block11"] = tf.nn.relu(tf.multiply(bn_12, bn_11)) with tf.variable_scope("dssd10"): de_11 = slim.conv2d_transpose(end_points['block11'], 512, [3, 3], stride=2, scope="de_11") con_11 = slim.conv2d(de_11, 512, [3, 3], scope='conv_11') bn_11 = slim.batch_norm(con_11, is_training=is_training) con_10 = slim.conv2d(end_points["block10"], 512, [3, 3], scope="conv10") bn_10 = slim.batch_norm(con_10, is_training=is_training) relu_10 = tf.nn.relu(bn_10) con_10 = slim.conv2d(relu_10, 512, [3, 3], scope="conv10_2") bn_10 = slim.batch_norm(con_10, is_training=is_training) end_points["block10"] = tf.nn.relu(tf.multiply(bn_11, bn_10)) with tf.variable_scope("dssd9"): de_10 = slim.conv2d_transpose(end_points['block10'], 512, [3, 3], stride=2, scope="de_10") con_10 = slim.conv2d(de_10, 512, [3, 3], scope='conv_10') bn_10 = slim.batch_norm(con_10, is_training=is_training) con_9 = slim.conv2d(end_points["block9"], 512, [3, 3], scope="conv9") bn_9 = slim.batch_norm(con_9, is_training=is_training) relu_9 = tf.nn.relu(bn_9) con_9 = slim.conv2d(relu_9, 512, [3, 3], scope="conv9_2") bn_9 = slim.batch_norm(con_9, is_training=is_training) end_points["block9"] = tf.nn.relu(tf.multiply(bn_10, bn_9)) with tf.variable_scope("dssd8"): de_9 = slim.conv2d_transpose(end_points['block9'], 512, [3, 3], stride=2, scope="de_9") con_9 = slim.conv2d(de_9, 512, [3, 3], scope='conv_9') bn_9 = slim.batch_norm(con_9, is_training=is_training) con_8 = slim.conv2d(end_points["block8"], 512, [3, 3], scope="conv8") bn_8 = slim.batch_norm(con_8, is_training=is_training) relu_8 = tf.nn.relu(bn_8) con_8 = slim.conv2d(relu_8, 512, [3, 3], scope="conv8_2") bn_8 = slim.batch_norm(con_8, is_training=is_training) end_points["block8"] = tf.nn.relu(tf.multiply(bn_9, bn_8)) with tf.variable_scope("dssd7"): de_8 = slim.conv2d_transpose(end_points['block8'], 512, [3, 3], stride=2, scope="de_8") con_8 = slim.conv2d(de_8, 512, [3, 3], scope='conv_8') bn_8 = slim.batch_norm(con_8, is_training=is_training) con_7 = slim.conv2d(end_points["block7"], 512, [3, 3], scope="conv7") bn_7 = slim.batch_norm(con_7, is_training=is_training) relu_7 = tf.nn.relu(bn_7) con_7 = slim.conv2d(relu_7, 512, [3, 3], scope="conv7_2") bn_7 = slim.batch_norm(con_7, is_training=is_training) end_points["block7"] = tf.nn.relu(tf.multiply(bn_8, bn_7)) with tf.variable_scope("dssd4"): de_7 = slim.conv2d_transpose(end_points['block7'], 512, [3, 3], stride=2, scope="de_7") con_7 = slim.conv2d(de_7, 512, [3, 3], scope='conv_7') bn_7 = slim.batch_norm(con_7, is_training=is_training) con_4 = slim.conv2d(end_points["block4"], 512, [3, 3], scope="conv4") bn_4 = slim.batch_norm(con_4, is_training=is_training) relu_4 = tf.nn.relu(bn_4) con_4 = slim.conv2d(relu_4, 512, [3, 3], scope="conv4_2") bn_4 = slim.batch_norm(con_4, is_training=is_training) end_points["block4"] = tf.nn.relu(tf.multiply(bn_7, bn_4)) # predictions = [] logits = [] localisations = [] for i, layer in enumerate(feat_layers): with tf.variable_scope(layer + '_box'): p, l = ssd_vgg_300.ssd_multibox_layer(end_points[layer], num_classes, anchor_sizes[i], anchor_ratios[i], normalizations[i]) predictions.append(prediction_fn(p)) logits.append(p) localisations.append(l) return predictions, localisations, logits, end_points
def ssd_net(inputs, num_classes=SSDNet.default_params.num_classes, feat_layers=SSDNet.default_params.feat_layers, anchor_sizes=SSDNet.default_params.anchor_sizes, anchor_ratios=SSDNet.default_params.anchor_ratios, normalizations=SSDNet.default_params.normalizations, is_training=True, dropout_keep_prob=0.5, prediction_fn=slim.softmax, reuse=None, scope='ssd_300_vgg'): """SSD net definition. """ # if data_format == 'NCHW': # inputs = tf.transpose(inputs, perm=(0, 3, 1, 2)) # End_points collect relevant activations for external use. """ net = layers_lib.repeat( inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2') net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat(net, 3, layers.conv2d, 256, [3, 3], scope='conv3') net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv4') net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') """ end_points = {} with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse): ###################################### # 前五个 Blocks,首先照搬 VGG16 架构 # # 注意这里使用 end_points 标注中间结果 # ###################################### # ——————————————————Original VGG-16 blocks.——————————————————————— net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') end_points['block1'] = net net = slim.max_pool2d(net, [2, 2], scope='pool1') # Block 2. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') end_points['block2'] = net net = slim.max_pool2d(net, [2, 2], scope='pool2') # Block 3. net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') end_points['block3'] = net net = slim.max_pool2d(net, [2, 2], scope='pool3') # Block 4. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') end_points['block4'] = net net = slim.max_pool2d(net, [2, 2], scope='pool4') # Block 5. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') end_points['block5'] = net net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5') # 池化层由2修改到三 #################################### # 后六个 Blocks,使用额外卷积层 # #################################### # ————————————Additional SSD blocks.—————————————————————— # Block 6: let's dilate the hell out of it! net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') end_points['block6'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block 7: 1x1 conv. Because the f**k. net = slim.conv2d(net, 1024, [1, 1], scope='conv7') end_points['block7'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). end_point = 'block8' with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block9' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block10' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block11' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net ###################################### # 每个中间层 end_points 返回中间结果 # # 将各层预测结果存入列表,返回给优化函数 # ###################################### # Prediction and localisations layers. predictions = [] logits = [] localisations = [] # feat_layers=['block4', 'block7', 'block8', 'block9', 'block10', 'block11'] for i, layer in enumerate(feat_layers): with tf.variable_scope(layer + '_box'): p, l = ssd_multibox_layer(end_points[layer], num_classes, anchor_sizes[i], anchor_ratios[i], normalizations[i]) """ 框的数目等于anchor_sizes[i]和anchor_ratios[i]的长度和 anchor_sizes=[(21., 45.), (45., 99.), (99., 153.), (153., 207.), (207., 261.), (261., 315.)] anchor_ratios=[[2, .5], [2, .5, 3, 1./3], [2, .5, 3, 1./3], [2, .5, 3, 1./3], [2, .5], [2, .5]] normalizations=[20, -1, -1, -1, -1, -1] """ predictions.append(prediction_fn(p)) logits.append(p) localisations.append(l) return predictions, localisations, logits, end_points
def ssd_net(inputs, num_classes=SSDNet.default_params.num_classes, feat_layers=SSDNet.default_params.feat_layers, anchor_sizes=SSDNet.default_params.anchor_sizes, anchor_ratios=SSDNet.default_params.anchor_ratios, normalizations=SSDNet.default_params.normalizations, is_training=True, dropout_keep_prob=0.5, prediction_fn=slim.softmax, reuse=None, scope='ssd_300_vgg', large=True, medium=True): """SSD net definition. """ # if data_format == 'NCHW': # inputs = tf.transpose(inputs, perm=(0, 3, 1, 2)) # End_points collect relevant activations for external use. end_points = {} with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse): # Original VGG-16 blocks. net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') end_points['block1'] = net net = slim.max_pool2d(net, [2, 2], scope='pool1') # Block 2. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') end_points['block2'] = net net = slim.max_pool2d(net, [2, 2], scope='pool2') # Block 3. net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') end_points['block3'] = net net = slim.max_pool2d(net, [2, 2], scope='pool3') # Block 4. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') print(net) end_points['block4'] = net net = slim.max_pool2d(net, [2, 2], scope='pool4') # Block 5. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') end_points['block5'] = net net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5') # Additional SSD blocks. # Block 6: let's dilate the hell out of it! net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') end_points['block6'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block 7: 1x1 conv. Because the f**k. net = slim.conv2d(net, 1024, [1, 1], scope='conv7') print(net) end_points['block7'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). end_point = 'block8' with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net print(net) end_point = 'block9' end_points = tf.cond( large, lambda: largenetwork(net, end_point, end_points), lambda: tf.cond(medium, lambda: mediumnetwork( net, end_point, end_points), lambda: foveanetwork( net, end_point, end_points))) # Prediction and localisations layers. #print(feat_layers) #find_predictions_large(end_points,num_classes,anchor_sizes,anchor_ratios,normalizations,prediction_fn) #find_predictions_fovea(end_points,num_classes,anchor_sizes,anchor_ratios,normalizations,prediction_fn) predictions, localisations, logits = tf.cond( large, lambda: find_predictions_large( end_points, num_classes, anchor_sizes, anchor_ratios, normalizations, prediction_fn), lambda: tf.cond( medium, lambda: find_predictions_medium( end_points, num_classes, anchor_sizes, anchor_ratios, normalizations, prediction_fn), lambda: find_predictions_fovea( end_points, num_classes, anchor_sizes, anchor_ratios, normalizations, prediction_fn))) return predictions, localisations, logits, end_points
def ssd_net(inputs, num_classes=SSDNet.default_params.num_classes, feat_layers=SSDNet.default_params.feat_layers, anchor_sizes=SSDNet.default_params.anchor_sizes, anchor_ratios=SSDNet.default_params.anchor_ratios, normalizations=SSDNet.default_params.normalizations, is_training=True, dropout_keep_prob=0.5, prediction_fn=slim.softmax, reuse=None, scope='ssd_300_vgg'): """SSD net definition. """ #if data_format == 'NCHW': #inputs = tf.transpose(inputs, perm=(0, 3, 1, 2)) # End_points collect relevant activations for external use. end_points = {} with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse): # Original VGG-16 blocks. net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') end_points['block1'] = net net = slim.max_pool2d(net, [2, 2], scope='pool1') # Block 2. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') end_points['block2'] = net net = slim.max_pool2d(net, [2, 2], scope='pool2') # Block 3. net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') end_points['block3'] = net net = slim.max_pool2d(net, [2, 2], scope='pool3') # Block 4. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') end_points['block4'] = net net = slim.max_pool2d(net, [2, 2], scope='pool4') # transform net4 to net7 net4_net7 = net net4_net7 = slim.conv2d(net4_net7, 1024, [1, 1], scope='net4_net7') # Block 5. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') end_points['block5'] = net net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5') # Additional SSD blocks. # Block 6: let's dilate the hell out of it! net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') end_points['block6'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block 7: 1x1 conv. net = slim.conv2d(net, 1024, [1, 1], scope='conv7') end_points['block7'] = net #19*19*1024 net7_net4 = tensor_resize.tensor4_resize(net, [38, 38]) net7_net4 = slim.conv2d(net7_net4, 512, [1, 1], scope='net7_net4') # # merge block4 to block7 end_points['block7'] = end_points['block7'] + net4_net7 # # Transform net7 to net8 net7_net8 = end_points['block7'] net7_net8 = slim.max_pool2d(net7_net8, [3, 3], stride=2, scope='net7_net8_mp') net7_net8 = slim.conv2d(net7_net8, 512, [1, 1], scope='net7_net8') net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). end_point = 'block8' with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net net8_net7 = tensor_resize.tensor4_resize(net, [19, 19]) net8_net7 = slim.conv2d(net8_net7, 1024, [1, 1], scope='net8_net7') # merge block7 to block8 end_points[end_point] = end_points[end_point] + net7_net8 # # Transform net8 to net9 net8_net9 = end_points[end_point] net8_net9 = slim.max_pool2d(net8_net9, [3, 3], stride=2, scope='net8_net9_mp') net8_net9 = slim.conv2d(net8_net9, 256, [1, 1], scope='net8_net9') end_point = 'block9' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net net9_net8 = tensor_resize.tensor4_resize(net, [10, 10]) net9_net8 = slim.conv2d(net9_net8, 512, [1, 1], scope='net9_net8') # merge block8 to block9 end_points[end_point] = end_points[end_point] + net8_net9 # Transform net9 to net10 net9_net10 = end_points[end_point] net9_net10 = slim.max_pool2d(net9_net10, [3, 3], stride=2, scope='net9_net10_mp') net9_net10 = slim.conv2d(net9_net10, 256, [1, 1], scope='net9_net10') end_point = 'block10' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net #3*3*256 net10_net9 = tensor_resize.tensor4_resize(net, [5, 5]) net10_net9 = slim.conv2d(net10_net9, 256, [1, 1], scope='net10_net9') # # merge block9 to block10 end_points[end_point] = end_points[end_point] + net9_net10 # # Transform net10 to net11 net10_net11 = end_points[end_point] net10_net11 = slim.max_pool2d(net10_net11, [3, 3], stride=2, scope='net10_net11_mp', padding='VALID') net10_net11 = slim.conv2d(net10_net11, 256, [1, 1], scope='net10_net11') end_point = 'block11' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net #1*1*256 #upsample net11 and Transform net11 to net10 ! net11_net10 = tensor_resize.tensor4_resize(net, [3, 3]) net11_net10 = slim.conv2d(net11_net10, 256, [1, 1], scope='net11_net10') # # merge block10 to block11 end_points[end_point] = end_points[end_point] + net10_net11 end_points['block4'] = end_points['block4'] + net7_net4 end_points['block7'] = end_points['block7'] + net8_net7 end_points['block8'] = end_points['block8'] + net9_net8 end_points['block9'] = end_points['block9'] + net10_net9 end_points['block10'] = end_points['block10'] + net11_net10 # Prediction and localisations layers. predictions = [] logits = [] localisations = [] for i, layer in enumerate(feat_layers): with tf.variable_scope(layer + '_box'): p, l = ssd_multibox_layer(end_points[layer], num_classes, anchor_sizes[i], anchor_ratios[i], normalizations[i]) predictions.append(prediction_fn(p)) logits.append(p) localisations.append(l) return predictions, localisations, logits, end_points
def ssd_net(inputs, num_classes, feature_layers, anchor_sizes, anchor_ratios, normalizations, is_training=True, dropout_keep_prob=0.5, prediction_fn=slim.softmax, reuse=None, scope='ssd_vgg'): #structure of SSD net outputs = {} with tf.variable_scope(scope, 'ssd_vgg', [inputs], reuse=reuse): # Structure of vgg16 # Block1 net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') outputs['block1'] = net net = slim.max_pool2d(net, [2, 2], scope='pool1') # Block 2 net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') outputs['block2'] = net net = slim.max_pool2d(net, [2, 2], scope='pool2') # Block 3 net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') outputs['block3'] = net net = slim.max_pool2d(net, [2, 2], scope='pool3') # Block 4 net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') outputs['block4'] = net net = slim.max_pool2d(net, [2, 2], scope='pool4') # Block 5 net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') outputs['block5'] = net net = slim.max_pool2d(net, [3, 3], 1, scope='pool5') # Additional SSD blocks # Block 6 net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') outputs['block6'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # block 7 net = slim.conv2d(net, 1024, [1, 1], scope='conv7') outputs['block7'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block8 with tf.variable_scope('block8'): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') outputs['block8'] = net # Block 9 with tf.variable_scope('block9'): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') outputs['block9'] = net # Block 10 with tf.variable_scope('block10'): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') outputs['block10'] = net # Block 11 with tf.variable_scope('block11'): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') outputs['block11'] = net # Block 12 with tf.variable_scope('block12'): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [4, 4], stride=2, scope='conv4x4', padding='VALID') outputs['block12'] = net # Prediction and locolization predictions = [] # Class prediction logits = [] # Probability of class locations = [] # Location prediction for i, layer in enumerate(feature_layers): # Block 4,7,8,9,10,11,12 with tf.variable_scope(layer + '_box'): p, l = ssd_multibox_layers(outputs[layer], num_classes, anchor_sizes[i], anchor_ratios[i], normalizations[i]) predictions.append( prediction_fn(p)) # Here use softmax to predict classs logits.append(p) locations.append(l) return predictions, locations, logits, outputs
def text_net(inputs, feat_layers=default_params.feat_layers, normalizations=default_params.normalizations, is_training=True, dropout_keep_prob=0.5, scope='vgg_16'): # checked feature_layers = {} with tf.variable_scope(scope, 'vgg_16', [inputs], reuse=None): # Original VGG-16 blocks. net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') # Block 2. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') # Block 3. net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') # Block 4. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') feature_layers['conv4'] = net net = slim.max_pool2d(net, [2, 2], scope='pool4') # Block 5. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5') # Additional SSD blocks. # Block 6: let's dilate the hell out of it! net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') # Block 7: 1x1 conv. Because the f**k. net = slim.conv2d(net, 1024, [1, 1], scope='conv7') feature_layers['conv7'] = net # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). end_point = 'conv8' with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') feature_layers['conv8'] = net end_point = 'conv9' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') feature_layers['conv9'] = net end_point = 'conv10' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') feature_layers['conv10'] = net end_point = 'conv11' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') feature_layers['conv11'] = net localisations, logits = text_detect_net(feature_layers, feat_layers) return localisations, logits, feature_layers
def ssd_net( inputs, #定义ssd网络结构 num_classes=SSDNet.default_params.num_classes, #分类数 feat_layers=SSDNet.default_params.feat_layers, #特征层 anchor_sizes=SSDNet.default_params.anchor_sizes, anchor_ratios=SSDNet.default_params.anchor_ratios, normalizations=SSDNet.default_params.normalizations, #正则化 is_training=True, dropout_keep_prob=0.5, prediction_fn=slim.softmax, reuse=None, scope='ssd_300_vgg'): """SSD net definition. """ # if data_format == 'NCHW': # inputs = tf.transpose(inputs, perm=(0, 3, 1, 2)) # End_points collect relevant activations for external use. end_points = {} #用于收集每一层输出结果 with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse): # Original VGG-16 blocks. net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') #VGG16网络的第一个conv,重复2次卷积,核为3x3,64个特征 end_points['block1'] = net #conv1_2结果存入end_points,name='block1' net = slim.max_pool2d(net, [2, 2], scope='pool1') # Block 2. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') #重复2次卷积,核为3x3,128个特征 end_points['block2'] = net #conv2_2结果存入end_points,name='block2' net = slim.max_pool2d(net, [2, 2], scope='pool2') # Block 3. net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') #重复3次卷积,核为3x3,256个特征 end_points['block3'] = net #conv3_3结果存入end_points,name='block3' net = slim.max_pool2d(net, [2, 2], scope='pool3') # Block 4. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') #重复3次卷积,核为3x3,512个特征 end_points['block4'] = net #conv4_3结果存入end_points,name='block4' net = slim.max_pool2d(net, [2, 2], scope='pool4') # Block 5. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') #重复3次卷积,核为3x3,512个特征 end_points['block5'] = net #conv5_3结果存入end_points,name='block5' net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5') # Additional SSD blocks. #去掉了VGG的全连接层 # Block 6: let's dilate the hell out of it! net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') #将VGG基础网络最后的池化层结果做扩展卷积(带孔卷积); end_points['block6'] = net #conv6结果存入end_points,name='block6' net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) #dropout层 # Block 7: 1x1 conv. Because the f**k. net = slim.conv2d( net, 1024, [1, 1], scope='conv7') #将dropout后的网络做1x1卷积,输出1024特征,name='block7' end_points['block7'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) #将卷积后的网络继续做dropout # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). end_point = 'block8' with tf.variable_scope(end_point): net = slim.conv2d( net, 256, [1, 1], scope='conv1x1' ) #对上述dropout的网络做1x1卷积,然后做3x3卷积,,输出512特征图,name=‘block8’ net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block9' with tf.variable_scope(end_point): net = slim.conv2d( net, 128, [1, 1], scope='conv1x1') #对上述网络做1x1卷积,然后做3x3卷积,输出256特征图,name=‘block9’ net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block10' with tf.variable_scope(end_point): net = slim.conv2d( net, 128, [1, 1], scope='conv1x1') #对上述网络做1x1卷积,然后做3x3卷积,输出256特征图,name=‘block10’ net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block11' with tf.variable_scope(end_point): net = slim.conv2d( net, 128, [1, 1], scope='conv1x1') #对上述网络做1x1卷积,然后做3x3卷积,输出256特征图,name=‘block11’ net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net # Prediction and localisations layers. #预测和定位 predictions = [] logits = [] localisations = [] for i, layer in enumerate(feat_layers): #遍历特征层 with tf.variable_scope(layer + '_box'): #起个命名范围 p, l = ssd_multibox_layer( end_points[ layer], #做多尺度大小box预测的特征层,返回每个cell中每个先验框预测的类别p和预测的位置l num_classes, #种类数 anchor_sizes[i], #先验框尺度(同一特征图上的先验框尺度和长宽比一致) anchor_ratios[i], #先验框长宽比 normalizations[i]) #每个特征正则化信息,目前是只对第一个特征图做归一化操作; #把每一层的预测收集 predictions.append(prediction_fn(p)) #prediction_fn为softmax,预测类别 logits.append(p) #把每个cell每个先验框预测的类别的概率值存在logits中 localisations.append(l) #预测位置信息 return predictions, localisations, logits, end_points #返回类别预测结果,位置预测结果,所属某个类别的概率值,以及特征层
def ssd_net(inputs, num_classes, feat_layers, normalizations, is_training, dropout_keep_prob, prediction_fn, reuse, scope): # End_points collect relevant activations for external use. end_points = {} with tf.variable_scope(scope, 'ssd_640_vgg', [inputs], reuse=reuse): # Original VGG-16 blocks. print("nnnn-block1 begin") net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='r2_crcr1') end_points['block1'] = net print("uuuu-block1 end") print("nnnn-block2 begin") net = slim.max_pool2d(net, [2, 2], scope='bbpool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='r2_crcr2') end_points['block2'] = net print("uuuu-block2 end") print("nnnn-block3 begin") net = slim.max_pool2d(net, [2, 2], scope='ddpool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='r3_crcr3') end_points['block3'] = net print("uuuu-block3 end") print("nnnn-block4 begin") net = slim.max_pool2d(net, [2, 2], scope='ffpool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='r3_crcr4') end_points['block4'] = net print("uuuu-block4 end") print("nnnn-block5 begin") net = slim.max_pool2d(net, [2, 2], scope='hhpool4') # rate as `[dilation]`/`pad` in prototxt?, if is `[dilation]` then set rate=1 net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], rate=1, scope='r3_crcr5') end_points['block5'] = net print("uuuu-block5 end") print("nnnn-block6 begin") # pool5: kernel_size: 3->2, stride: 1->2, +pad:1,, where to put `pad:1`? net = slim.max_pool2d(net, [2, 2], stride=2, scope='jjpool5') net = slim.conv2d(net, 1024, [3, 3], rate=1, scope='kkfc6') end_points['block6'] = net print("uuuu-block6 end") print("nnnn-block7 begin") net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) net = slim.conv2d(net, 1024, [1, 1], scope='llfc7') end_points['block7'] = net print("uuuu-block7 end") print("nnnn-block8 begin") # conv61->conv62 net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) end_point = 'block8' with tf.variable_scope(end_point): # paper: 1x1x128 net = slim.conv2d(net, 256, [1, 1], scope='mmconv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) # paper: 3x3x512-s2 net = slim.conv2d(net, 512, [3, 3], stride=2, scope='nnconv3x3', padding='VALID') end_points[end_point] = net print("uuuu-block8 end") print("nnnn-block9 begin") end_point = 'block9' # conv71->conv72 with tf.variable_scope(end_point): # paper: 1x1x128 net = slim.conv2d(net, 128, [1, 1], scope='ooconv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) # paper: 3x3x256-s2 net = slim.conv2d(net, 256, [3, 3], stride=2, scope='ppconv3x3', padding='VALID') end_points[end_point] = net print("uuuu-block9 end") # Prediction and localisations layers. predictions = [] logits = [] localisations = [] addn = 1 for i, layer in enumerate(feat_layers): with tf.variable_scope(layer + '_box'): print("nnnn-begin process----" + layer + '_box') p, l = MultiboxLayer(addn, end_points[layer], num_classes, normalizations[i]) addn = 0 print("uuuu-end process----" + layer + '_box') predictions.append(prediction_fn(p)) logits.append(p) localisations.append(l) print("[final end]") return predictions, localisations, logits, end_points
def __additional_ssd_block(self, end_points, net): # Additional SSD blocks. # Block 6: let's dilate the hell out of it! net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') net = slim.batch_norm(net) net = self.__dropout(net) end_points['block6'] = net # Block 7: 1x1 conv. Because the f**k. net = slim.conv2d(net, 1024, [1, 1], scope='conv7') net = slim.batch_norm(net) net = self.__dropout(net) end_points['block7'] = net # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). end_point = 'block8' with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = slim.batch_norm(net) net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') net = slim.batch_norm(net) end_points[end_point] = net end_point = 'block9' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.batch_norm(net) net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') net = slim.batch_norm(net) end_points[end_point] = net end_point = 'block10' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.batch_norm(net) net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') net = slim.batch_norm(net) end_points[end_point] = net end_point = 'block11' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.batch_norm(net) net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') net = slim.batch_norm(net) end_points[end_point] = net end_point = 'block12' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.batch_norm(net) net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [4, 4], scope='conv4x4', padding='VALID') net = slim.batch_norm(net) end_points[end_point] = net # Prediction and localisations layers. predictions = [] logits = [] localisations = [] for i, layer in enumerate(self.feat_layers): with tf.variable_scope(layer + '_box'): p, l = self.ssd_multibox_layer(end_points[layer], self.num_classes, self.anchor_sizes[i], self.anchor_ratios[i], self.normalizations[i]) predictions.append(slim.softmax(p)) logits.append(p) localisations.append(l) return predictions, localisations, logits, end_points
def ssd_net(inputs, num_classes=SSDNet.default_params.num_classes, feat_layers=SSDNet.default_params.feat_layers, anchor_sizes=SSDNet.default_params.anchor_sizes, anchor_ratios=SSDNet.default_params.anchor_ratios, normalizations=SSDNet.default_params.normalizations, is_training=True, dropout_keep_prob=0.5, prediction_fn=slim.softmax, reuse=None, scope='ssd_300_vgg'): """ SSD net definition. """ # End_points collect relevant activations for external use. end_points = {} with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse): # 基础 VGG-16 blocks. net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') end_points['block1'] = net net = slim.max_pool2d(net, [2, 2], scope='pool1') # 150*150*64 # Block 2. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') end_points['block2'] = net net = slim.max_pool2d(net, [2, 2], scope='pool2') # 75*75*128 # Block 3. net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') end_points['block3'] = net net = slim.max_pool2d(net, [2, 2], scope='pool3') # 38*38*256 # Block 4. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') end_points['block4'] = net net = slim.max_pool2d(net, [2, 2], scope='pool4') # 19*19*512 # Block 5. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') end_points['block5'] = net net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5') # 19*19*512 # 添加的 SSD blocks. # Block 6: let's dilate the hell out of it! net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') end_points['block6'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block 7: 1x1 conv. Because the f**k. net = slim.conv2d(net, 1024, [1, 1], scope='conv7') end_points['block7'] = net net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). end_point = 'block8' with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block9' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block10' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block11' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net # Prediction and localisations layers. predictions = [] logits = [] localisations = [] # 每一层特征图的预测 for i, layer in enumerate(feat_layers): with tf.variable_scope(layer + '_box'): # 特征图的需要框的点数*每个点的框数 # 每一层特征图框的大小和框的变化已经定好了 pred, loc = ssd_multibox_layer(end_points[layer], num_classes, anchor_sizes[i], anchor_ratios[i], normalizations[i]) predictions.append(prediction_fn(pred)) logits.append(pred) localisations.append(loc) return predictions, localisations, logits, end_points pass
def ssd_net(inputs, num_classes=SSDNet.default_params.num_classes, feat_layers=SSDNet.default_params.feat_layers, anchor_sizes=SSDNet.default_params.anchor_sizes, anchor_ratios=SSDNet.default_params.anchor_ratios, normalizations=SSDNet.default_params.normalizations, is_training=True, dropout_keep_prob=0.5, prediction_fn=slim.softmax, reuse=None, scope='ssd_512_vgg'): """SSD net definition. """ # End_points collect relevant activations for external use. end_points = {} with tf.variable_scope(scope, 'ssd_512_vgg', [inputs], reuse=reuse): # Original VGG-16 blocks. net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') end_points['block1'] = net net = slim.max_pool2d(net, [2, 2], scope='pool1') # Block 2. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') end_points['block2'] = net net = slim.max_pool2d(net, [2, 2], scope='pool2') # Block 3. net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') end_points['block3'] = net net = slim.max_pool2d(net, [2, 2], scope='pool3') # Block 4. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') end_points['block4'] = net net = slim.max_pool2d(net, [2, 2], scope='pool4') # Block 5. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') end_points['block5'] = net net = slim.max_pool2d(net, [3, 3], 1, scope='pool5') # Additional SSD blocks. # Block 6: let's dilate the hell out of it! net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') end_points['block6'] = net # Block 7: 1x1 conv. Because the f**k. net = slim.conv2d(net, 1024, [1, 1], scope='conv7') end_points['block7'] = net # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts). end_point = 'block8' with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block9' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block10' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block11' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block12' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [4, 4], scope='conv4x4', padding='VALID') # Fix padding to match Caffe version (pad=1). # pad_shape = [(i-j) for i, j in zip(layer_shape(net), [0, 1, 1, 0])] # net = tf.slice(net, [0, 0, 0, 0], pad_shape, name='caffe_pad') end_points[end_point] = net # Prediction and localisations layers. predictions = [] logits = [] localisations = [] for i, layer in enumerate(feat_layers): with tf.variable_scope(layer + '_box'): p, l = ssd_vgg_300.ssd_multibox_layer(end_points[layer], num_classes, anchor_sizes[i], anchor_ratios[i], normalizations[i]) predictions.append(prediction_fn(p)) logits.append(p) localisations.append(l) return predictions, localisations, logits, end_points
def ssd_net(inputs, num_classes=SSDNet.default_params.num_classes, feat_layer=SSDNet.default_params.feat_layers, anchor_sizes=SSDNet.default_params.anchor_sizes, anchor_ratios=SSDNet.default_params.anchor_raios, normalizations=SSDNet.default_params.normalizations, is_training=True, dropout_keep_prob=0.5, prediction_fn=slim.softmax, reues=None, scope='ssd_512_vgg'): """SSD net definition""" end_points = {} with tf.variable_scope(scope, 'ssd_512_vgg', [inputs], reuse=reues): # Original VGG-16 blocks net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') end_points['block1'] = net net = slim.max_pool2d(net, [2, 2], scope='pool1') # Block 2 net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') end_points['block2'] = net net = slim.max_pool2d(net, [2, 2], scope='pool2') # Block 3 net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') end_points['block3'] = net net = slim.max_pool2d(net, [2, 2], scope='pool3') # Block 4 net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') end_points['block4'] = net net = slim.max_pool2d(net, [2, 2], scope='pool4') # Block 5 net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') end_points['block5'] = net net = slim.max_pool2d(net, [2, 2], scope='pool5') # Additional SSD blocks # Block 6 net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') #TODO: find the meaning of this rate end_points['block6'] = net # Block 7 net = slim.conv2d(net, 1024, [1, 1], scope='conv7') end_points['block7'] = net # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts) end_point = 'block8' with tf.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block9' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], srtide=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block10' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], srtide=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block11' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], srtide=2, scope='conv3x3', padding='VALID') end_points[end_point] = net end_point = 'block12' with tf.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], srtide=2, scope='conv3x3', padding='VALID') end_points[end_point] = net # Prediction and localisations layers predictions = [] logits = [] localisations = [] for i, layer in enumerate(feat_layer): with tf.variable_scope(layer + '_box'): p, l = ssd_multibox_layer(end_points[layer], num_classes, anchor_sizes[i], anchor_ratios[i], normalizations[i]) predictions.append(prediction_fn(p)) logits.append(p) localisations.append(l) return predictions, localisations, logits, end_point