def LR_ASPP(self, x1, x2, num, it=True): ''' Args: x1: outputs of 1/16 resolution, for upper stream. x2: outputs of 1/8 resolution, for bottom stream. num: number of labels. it: is_training. ''' with tf.variable_scope("seg_head"): # upper stream _, H, W, _ = x1.get_shape() x11 = conv_bn_relu6('conv1', x1, 128, (1, 1), (1, 1), is_training=it, nl='RE') pool1 = L.pool('pool', x1, (49, 49), (16, 20), method='avg') x12 = L.conv('conv2', pool1, 128, (1, 1), (1, 1)) sig = L.sigmoid('sigmoid', x12) up1 = L.interp('up1', sig, (H, W)) fused = x11 * up1 up2 = L.interp('up2', fused, (2 * H, 2 * W)) x13 = L.conv('conv3', up2, num, (1, 1), (1, 1)) # bottom stream x21 = L.conv('conv4', x2, num, (1, 1), (1, 1)) seg = x13 + x21 return seg
def large(self, inputs, is_training=True, reuse=False): with tf.variable_scope(self.name + '_large', reuse=reuse, initializer=self.weight_init, regularizer=self.reg): # ksize, exp_size, out_size, SE, NL, # 224x224x3 -> 112x112x16 16 BNETS = [ [(3, 3), 16, 16, 'F', 'RE', 1], # 112x112x16 -> 112x112x16 16 [(3, 3), 64, 24, 'F', 'RE', 2], # 112x112x16 -> 56x56x24 8 [(3, 3), 72, 24, 'F', 'RE', 1], # 56x56x24 -> 56x56x24 8 [(5, 5), 72, 40, 'T', 'RE', 2], # 56x56x24 -> 28x28x40 4 [(5, 5), 120, 40, 'T', 'RE', 1], # 28x28x40 -> 28x28x40 4 [(5, 5), 120, 40, 'T', 'RE', 1], # 28x28x40 -> 28x28x40 4 [(3, 3), 240, 80, 'F', 'HS', 2], # 28x28x40 -> 14x14x80 2 [(3, 3), 200, 80, 'F', 'HS', 1], # 14x14x80 -> 14x14x80 2 [(3, 3), 184, 80, 'F', 'HS', 1], # 14x14x80 -> 14x14x80 2 [(3, 3), 184, 80, 'F', 'HS', 1], # 14x14x80 -> 14x14x80 2 [(3, 3), 480, 112, 'T', 'HS', 1], # 14x14x80 -> 14x14x112 2 [(3, 3), 672, 112, 'T', 'HS', 1], # 14x14x112 -> 14x14x112 1 [(5, 5), 672, 160, 'T', 'HS', 2], # 14x14x112 -> 7x7x160 1 [(5, 5), 960, 160, 'T', 'HS', 1], # 7x7x160 -> 7x7x160 1 [(5, 5), 960, 160, 'T', 'HS', 1] ] # 7x7x160 -> 7x7x160 1 x = conv_bn_relu6('conv1', inputs, 16, (3, 3), (2, 2), is_training=is_training, nl='HS') for idx, (ksize, exp_size, out_size, se, nl, strides) in enumerate(BNETS): name = "bneck{}".format(idx + 1) x = bneck(name, x, ksize, exp_size, out_size, se, nl, strides, is_training) x = conv_bn_relu6('conv2', x, 960, (1, 1), (1, 1), is_training=is_training, nl='HS') x = L.global_avg_pool('gap', x) x = L.conv('conv3', x, 1280, (1, 1), (1, 1)) x = L.hswish('conv3/hswich', x) x = L.dropout('dropout', x, 0.2, is_training=is_training) x = L.conv('conv4', x, self.num_labels, (1, 1), (1, 1)) x = tf.squeeze(x, [1, 2]) return x
def res_block(inputs, filters, name): # filters = 1/2 inputs' channel cut_across = inputs net = conv(inputs, [1, 1, filters], [1, 1, 1, 1], name + "/conv_1", trainable=trainable) net = conv(net, [3, 3, filters * 2], [1, 1, 1, 1], name + "/conv_2", trainable=trainable) return net + cut_across
def small(self, inputs, is_training=True, reuse=False): with tf.variable_scope(self.name + '_small', reuse=reuse, initializer=self.weight_init, regularizer=self.reg): # k e o SE NL s # 224x224x3 -> 112x12x16 16 index BNETS = [ [(3, 3), 16, 16, 'T', 'RE', 2], # 112x112x16 -> 56x56x16 8 0 [(3, 3), 72, 24, 'F', 'RE', 2], # 56x56x16 -> 28x28x24 4 1 [(3, 3), 88, 24, 'F', 'RE', 1], # 28x28x24 -> 28x28x24 4 2 + [(5, 5), 96, 40, 'T', 'HS', 2], # 28x28x24 -> 14x14x40 2 3 [(5, 5), 240, 40, 'T', 'HS', 1], # 14x14x40 -> 14x14x40 2 4 + [(5, 5), 240, 40, 'T', 'HS', 1], # 14x14x40 -> 14x14x40 2 5 + [(5, 5), 120, 48, 'T', 'HS', 1], # 14x14x40 -> 14x14x48 2 6 [(5, 5), 144, 48, 'T', 'HS', 1], # 14x14x48 -> 14x14x48 2 7 + [(5, 5), 288, 96, 'T', 'HS', 2], # 14x14x48 -> 7x7x96 1 8 [(5, 5), 576, 96, 'T', 'HS', 1], # 7x7x96 -> 7x7x96 1 9 + [(5, 5), 576, 96, 'T', 'HS', 1] ] # 7x7x96 -> 7x7x96 1 10 + x = conv_bn_relu6('conv1', inputs, 16, (3, 3), (2, 2), is_training=is_training, nl='HS') for idx, (ksize, exp_size, out_size, se, nl, strides) in enumerate(BNETS): name = "bneck{}".format(idx + 1) x = bneck(name, x, ksize, exp_size, out_size, se, nl, strides, is_training) x = conv_bn_relu6('conv2', x, 576, (1, 1), (1, 1), is_training=is_training, nl='HS') x = L.global_avg_pool('gap', x) x = L.conv('conv3', x, 1024, (1, 1), (1, 1)) # should be 1024 # dropout ? x = L.dropout('dropout', x, 0.2, is_training=is_training) x = L.hswish('conv3/hswich', x) x = L.conv('conv4', x, self.num_labels, (1, 1), (1, 1)) x = tf.squeeze(x, [1, 2]) return x
def darknet53_body(inputs, trainable): def res_block(inputs, filters, name): # filters = 1/2 inputs' channel cut_across = inputs net = conv(inputs, [1, 1, filters], [1, 1, 1, 1], name + "/conv_1", trainable=trainable) net = conv(net, [3, 3, filters * 2], [1, 1, 1, 1], name + "/conv_2", trainable=trainable) return net + cut_across def res_operator(inputs, filters, num_of_res, name): net = conv(inputs, [3, 3, 2 * filters], [1, 2, 2, 1], name + "/stride2_conv", trainable=trainable) for i in range(num_of_res): net = res_block(net, filters, name + "/" + str(i + 1)) return net net = conv(inputs, [3, 3, 32], [1, 1, 1, 1], "1_conv", trainable=trainable) net = res_operator(net, 32, 1, "res_block1") net = res_operator(net, 64, 2, "res_block2") net1 = res_operator(net, 128, 8, "res_block3") net2 = res_operator(net1, 256, 8, "res_block4") net3 = res_operator(net2, 512, 4, "res_block5") return net1, net2, net3
def res_operator(inputs, filters, num_of_res, name): net = conv(inputs, [3, 3, 2 * filters], [1, 2, 2, 1], name + "/stride2_conv", trainable=trainable) for i in range(num_of_res): net = res_block(net, filters, name + "/" + str(i + 1)) return net
def conv_bn_drop_relu6(name_scope, x, out_size, ksize, strides, padding="SAME", bias=False, is_training=True, trainable=True, weight_init=None, bias_init=tf.zeros_initializer(), scale=False, momentum=0.99, regularizer=None, lc=None, rate=0.0, nl='RE'): '''Convolution + BatchNorm + DropOut + ReLU6''' conv_layer = L.conv(name_scope, x, out_size, ksize, strides, padding, bias, trainable, weight_init, bias_init, regularizer, lc) bn_out = L.bn(name_scope, conv_layer, is_training, momentum=0.99, epsilon=1e-5) drop_out = L.dropout(name_scope, bn_out, is_training, rate) if 'RE' == nl: relu_out = L.relu(name_scope, drop_out) elif 'HS' == nl: relu_out = L.hswish(name_scope, drop_out) elif 'R6' == nl: relu_out = L.relu6(name_scope, drop_out) else: print("nolinear layer {} is not implementate yeat.".format(nl)) return relu_out
def yolo_block(inputs, filters, name): net = conv(inputs, [1, 1, filters], [1, 1, 1, 1], name + "/1_conv", trainable=trainable) net = conv(net, [3, 3, filters * 2], [1, 1, 1, 1], name + "/2_conv", trainable=trainable) net = conv(net, [1, 1, filters], [1, 1, 1, 1], name + "/3_conv", trainable=trainable) net = conv(net, [3, 3, filters * 2], [1, 1, 1, 1], name + "/4_conv", trainable=trainable) net = conv(net, [1, 1, filters], [1, 1, 1, 1], name + "/5_conv", trainable=trainable) return net
def yolo_fpn_head(nets, trainable): # nets is a list # the area of nets must be the order of big to small # return: feature map and upsample map def yolo_block(inputs, filters, name): net = conv(inputs, [1, 1, filters], [1, 1, 1, 1], name + "/1_conv", trainable=trainable) net = conv(net, [3, 3, filters * 2], [1, 1, 1, 1], name + "/2_conv", trainable=trainable) net = conv(net, [1, 1, filters], [1, 1, 1, 1], name + "/3_conv", trainable=trainable) net = conv(net, [3, 3, filters * 2], [1, 1, 1, 1], name + "/4_conv", trainable=trainable) net = conv(net, [1, 1, filters], [1, 1, 1, 1], name + "/5_conv", trainable=trainable) return net num_of_nets = len(nets) last_net = None filters_iter = 512 fpn_maps = [] for i in range(num_of_nets - 1, -1, -1): current_net = nets[i] if last_net is not None: shape = current_net.get_shape().as_list() last_net = conv(last_net, [1, 1, filters_iter], [1, 1, 1, 1], "before_yb_conv_" + str(num_of_nets - i - 1), trainable=trainable) last_net = upsample(last_net, (shape[1], shape[2])) current_net = tf.concat([last_net, current_net], axis=3) current_net = yolo_block(current_net, filters_iter, "yolo_block" + str(num_of_nets - i)) last_net = current_net net = conv(current_net, [3, 3, filters_iter * 2], [1, 1, 1, 1], str(i) + "_fp_final_3conv", trainable=trainable) fpn_maps.append(net) filters_iter = int(filters_iter / 2) return fpn_maps
def squeeze_excite(name_scope, x, reduction=4): with tf.variable_scope(name_scope): in_size = x.get_shape()[-1] gap = L.global_avg_pool('gap', x) conv1_relu1 = L.conv_relu('squeeze', gap, in_size // reduction, (1, 1), (1, 1)) conv2 = L.conv("excite", conv1_relu1, in_size, (1, 1), (1, 1)) hsigmoid1 = L.hsigmoid(conv2) return x * hsigmoid1
def yolo_regression(fpn_maps, class_num, trainable): def reshape_reg(input_reg): shape = input_reg.get_shape() final_dim = int(reg_dim / 3) return tf.reshape(input_reg, (-1, shape[1], shape[2], 3, final_dim)) reg_dim = 3 * (1 + 4 + class_num) feature_maps = [] for i, net in enumerate(fpn_maps): net = conv(net, [1, 1, reg_dim], [1, 1, 1, 1], str(2 - i) + "_fp_final_1conv", bn=False, leakyrelu=False, bias=True, trainable=trainable) net = reshape_reg(net) feature_maps.append(net) return feature_maps