def tiny_yolo3_efficientnet_body(inputs, num_anchors, num_classes, level=0): ''' Create Tiny YOLO_v3 EfficientNet model CNN body in keras. # Arguments level: EfficientNet level number. by default we use basic EfficientNetB0 as backbone ''' efficientnet, feature_map_info = get_efficientnet_backbone_info( inputs, level=level) x1 = efficientnet.get_layer('block6a_expand_activation').output x2 = efficientnet.get_layer('top_activation').output f1_channel_num = feature_map_info['f1_channel_num'] f2_channel_num = feature_map_info['f2_channel_num'] x2 = DarknetConv2D_BN_Leaky(f1_channel_num // 2, (1, 1))(x2) y1 = compose( DarknetConv2D_BN_Leaky(f1_channel_num, (3, 3)), #Depthwise_Separable_Conv2D_BN_Leaky(filters=f1_channel_num, kernel_size=(3, 3), block_id_str='8'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2) x2 = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x2) y2 = compose( Concatenate(), DarknetConv2D_BN_Leaky(f2_channel_num, (3, 3)), #Depthwise_Separable_Conv2D_BN_Leaky(filters=f2_channel_num, kernel_size=(3, 3), block_id_str='9'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, x1]) return Model(inputs, [y1, y2])
def tiny_yolo3lite_shufflenetv2_body(inputs, num_anchors, num_classes): '''Create Tiny YOLO_v3 Lite ShuffleNetV2 model CNN body in keras.''' shufflenetv2 = ShuffleNetV2(input_tensor=inputs, weights=None, include_top=False) # input: 416 x 416 x 3 # 1x1conv5_out: 13 x 13 x 1024 # stage4/block1/relu_1x1conv_1: 26 x 26 x 464 # stage3/block1/relu_1x1conv_1: 52 x 52 x 232 x1 = shufflenetv2.get_layer('stage4/block1/relu_1x1conv_1').output x2 = shufflenetv2.get_layer('1x1conv5_out').output x2 = DarknetConv2D_BN_Leaky(464, (1, 1))(x2) y1 = compose( #DarknetConv2D_BN_Leaky(1024, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=1024, kernel_size=(3, 3), block_id_str='17'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2) x2 = compose(DarknetConv2D_BN_Leaky(232, (1, 1)), UpSampling2D(2))(x2) y2 = compose( Concatenate(), #DarknetConv2D_BN_Leaky(464, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=464, kernel_size=(3, 3), block_id_str='18'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, x1]) return Model(inputs, [y1, y2])
def tiny_yolo3lite_body(inputs, num_anchors, num_classes): '''Create Tiny YOLO_v3 Lite model CNN body in keras.''' x1 = compose( Depthwise_Separable_Conv2D_BN_Leaky(16, (3, 3)), MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), Depthwise_Separable_Conv2D_BN_Leaky(32, (3, 3)), MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), Depthwise_Separable_Conv2D_BN_Leaky(64, (3, 3)), MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), Depthwise_Separable_Conv2D_BN_Leaky(128, (3, 3)), MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), Depthwise_Separable_Conv2D_BN_Leaky(256, (3, 3)))(inputs) x2 = compose( MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), Depthwise_Separable_Conv2D_BN_Leaky(512, (3, 3)), MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='same'), Depthwise_Separable_Conv2D_BN_Leaky(1024, (3, 3)), DarknetConv2D_BN_Leaky(256, (1, 1)))(x1) y1 = compose(Depthwise_Separable_Conv2D_BN_Leaky(512, (3, 3)), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2) x2 = compose(DarknetConv2D_BN_Leaky(128, (1, 1)), UpSampling2D(2))(x2) y2 = compose(Concatenate(), Depthwise_Separable_Conv2D_BN_Leaky(256, (3, 3)), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, x1]) return Model(inputs, [y1, y2])
def yolo3lite_spp_shufflenetv2_body(inputs, num_anchors, num_classes): '''Create YOLO_v3 Lite SPP ShuffleNetV2 model CNN body in keras.''' shufflenetv2 = ShuffleNetV2(input_tensor=inputs, weights=None, include_top=False) # input: 416 x 416 x 3 # 1x1conv5_out: 13 x 13 x 1024 # stage4/block1/relu_1x1conv_1: 26 x 26 x 464 # stage3/block1/relu_1x1conv_1: 52 x 52 x 232 f1 = shufflenetv2.get_layer('1x1conv5_out').output # f1 :13 x 13 x 1024 #x, y1 = make_depthwise_separable_last_layers(f1, 464, num_anchors * (num_classes + 5), block_id_str='14') x, y1 = make_spp_depthwise_separable_last_layers(f1, 464, num_anchors * (num_classes + 5), block_id_str='17') x = compose( DarknetConv2D_BN_Leaky(232, (1,1)), UpSampling2D(2))(x) f2 = shufflenetv2.get_layer('stage4/block1/relu_1x1conv_1').output # f2: 26 x 26 x 464 x = Concatenate()([x,f2]) x, y2 = make_depthwise_separable_last_layers(x, 232, num_anchors * (num_classes + 5), block_id_str='18') x = compose( DarknetConv2D_BN_Leaky(116, (1,1)), UpSampling2D(2))(x) f3 = shufflenetv2.get_layer('stage3/block1/relu_1x1conv_1').output # f3 : 52 x 52 x 232 x = Concatenate()([x, f3]) x, y3 = make_depthwise_separable_last_layers(x, 116, num_anchors * (num_classes + 5), block_id_str='19') return Model(inputs = inputs, outputs=[y1,y2,y3])
def tiny_yolo3_mobilenetv2_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create Tiny YOLO_v3 MobileNetV2 model CNN body in keras.''' mobilenetv2 = MobileNetV2(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # out_relu: 13 x 13 x 1280 # block_13_expand_relu: 26 x 26 x (576*alpha) # block_6_expand_relu: 52 x 52 x (192*alpha) x1 = mobilenetv2.get_layer('block_13_expand_relu').output x2 = mobilenetv2.get_layer('out_relu').output x2 = DarknetConv2D_BN_Leaky(int(576 * alpha), (1, 1))(x2) y1 = compose( DarknetConv2D_BN_Leaky(int(1280 * alpha), (3, 3)), #Depthwise_Separable_Conv2D_BN_Leaky(filters=int(1280*alpha), kernel_size=(3, 3), block_id_str='17'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2) x2 = compose(DarknetConv2D_BN_Leaky(int(288 * alpha), (1, 1)), UpSampling2D(2))(x2) y2 = compose( Concatenate(), DarknetConv2D_BN_Leaky(int(576 * alpha), (3, 3)), #Depthwise_Separable_Conv2D_BN_Leaky(filters=int(576*alpha), kernel_size=(3, 3), block_id_str='18'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, x1]) return Model(inputs, [y1, y2])
def tiny_yolo3lite_mobilenet_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create Tiny YOLO_v3 Lite MobileNet model CNN body in keras.''' mobilenet = MobileNet(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # conv_pw_13_relu :13 x 13 x (1024*alpha) # conv_pw_11_relu :26 x 26 x (512*alpha) # conv_pw_5_relu : 52 x 52 x (256*alpha) x1 = mobilenet.get_layer('conv_pw_11_relu').output x2 = mobilenet.get_layer('conv_pw_13_relu').output x2 = DarknetConv2D_BN_Leaky(int(512 * alpha), (1, 1))(x2) y1 = compose( #DarknetConv2D_BN_Leaky(int(1024*alpha), (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=int(1024 * alpha), kernel_size=(3, 3), block_id_str='14'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2) x2 = compose(DarknetConv2D_BN_Leaky(int(256 * alpha), (1, 1)), UpSampling2D(2))(x2) y2 = compose( Concatenate(), #DarknetConv2D_BN_Leaky(int(512*alpha), (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=int(512 * alpha), kernel_size=(3, 3), block_id_str='15'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, x1]) return Model(inputs, [y1, y2])
def tiny_yolo3_xception_body(inputs, num_anchors, num_classes): '''Create Tiny YOLO_v3 Xception model CNN body in keras.''' xception = Xception(input_tensor=inputs, weights='imagenet', include_top=False) # input: 416 x 416 x 3 # block14_sepconv2_act: 13 x 13 x 2048 # block13_sepconv2_bn(middle in block13): 26 x 26 x 1024 # add_46(end of block12): 26 x 26 x 728 # block4_sepconv2_bn(middle in block4) : 52 x 52 x 728 # add_37(end of block3) : 52 x 52 x 256 x1 = xception.get_layer('block13_sepconv2_bn').output # x1 :26 x 26 x 1024 x2 = xception.get_layer('block14_sepconv2_act').output # x2 :13 x 13 x 2048 x2 = DarknetConv2D_BN_Leaky(1024, (1, 1))(x2) y1 = compose( DarknetConv2D_BN_Leaky(2048, (3, 3)), #Depthwise_Separable_Conv2D_BN_Leaky(filters=2048, kernel_size=(3, 3), block_id_str='14'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2) x2 = compose(DarknetConv2D_BN_Leaky(512, (1, 1)), UpSampling2D(2))(x2) y2 = compose( Concatenate(), DarknetConv2D_BN_Leaky(1024, (3, 3)), #Depthwise_Separable_Conv2D_BN_Leaky(filters=1024, kernel_size=(3, 3), block_id_str='15'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, x1]) return Model(inputs, [y1, y2])
def yolo3_shufflenetv2_body(inputs, num_anchors, num_classes): """Create YOLO_V3 ShuffleNetV2 model CNN body in Keras.""" shufflenetv2 = ShuffleNetV2(input_tensor=inputs, weights=None, include_top=False) # input: 416 x 416 x 3 # 1x1conv5_out: 13 x 13 x 1024 # stage4/block1/relu_1x1conv_1: 26 x 26 x 464 # stage3/block1/relu_1x1conv_1: 52 x 52 x 232 f1 = shufflenetv2.get_layer('1x1conv5_out').output # f1 :13 x 13 x 1024 x, y1 = make_last_layers(f1, 464, num_anchors * (num_classes + 5)) x = compose(DarknetConv2D_BN_Leaky(232, (1, 1)), UpSampling2D(2))(x) f2 = shufflenetv2.get_layer('stage4/block1/relu_1x1conv_1').output # f2: 26 x 26 x 464 x = Concatenate()([x, f2]) x, y2 = make_last_layers(x, 232, num_anchors * (num_classes + 5)) x = compose(DarknetConv2D_BN_Leaky(116, (1, 1)), UpSampling2D(2))(x) f3 = shufflenetv2.get_layer('stage3/block1/relu_1x1conv_1').output # f3 : 52 x 52 x 232 x = Concatenate()([x, f3]) x, y3 = make_last_layers(x, 116, num_anchors * (num_classes + 5)) return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3_spp_xception_body(inputs, num_anchors, num_classes): """Create YOLO_V3 SPP Xception model CNN body in Keras.""" xception = Xception(input_tensor=inputs, weights='imagenet', include_top=False) # input: 416 x 416 x 3 # block14_sepconv2_act: 13 x 13 x 2048 # block13_sepconv2_bn(middle in block13): 26 x 26 x 1024 # add_46(end of block12): 26 x 26 x 728 # block4_sepconv2_bn(middle in block4) : 52 x 52 x 728 # add_37(end of block3) : 52 x 52 x 256 f1 = xception.get_layer('block14_sepconv2_act').output # f1 :13 x 13 x 2048 x, y1 = make_spp_last_layers(f1, 1024, num_anchors * (num_classes + 5)) x = compose(DarknetConv2D_BN_Leaky(512, (1, 1)), UpSampling2D(2))(x) f2 = xception.get_layer('block13_sepconv2_bn').output # f2: 26 x 26 x 1024 x = Concatenate()([x, f2]) x, y2 = make_last_layers(x, 512, num_anchors * (num_classes + 5)) x = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(x) f3 = xception.get_layer('block4_sepconv2_bn').output # f3 : 52 x 52 x 728 x = Concatenate()([x, f3]) x, y3 = make_last_layers(x, 256, num_anchors * (num_classes + 5)) return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3_mobilenetv3large_body(inputs, num_anchors, num_classes, alpha=1.0): """Create YOLO_V3 MobileNetV3Large model CNN body in Keras.""" mobilenetv3large = MobileNetV3Large(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # activation_38(layer 194, final feature map): 13 x 13 x (960*alpha) # expanded_conv_14/Add(layer 191, end of block14): 13 x 13 x (160*alpha) # activation_29(layer 146, middle in block12) : 26 x 26 x (672*alpha) # expanded_conv_11/Add(layer 143, end of block11) : 26 x 26 x (112*alpha) # activation_15(layer 79, middle in block6) : 52 x 52 x (240*alpha) # expanded_conv_5/Add(layer 76, end of block5): 52 x 52 x (40*alpha) # NOTE: activation layer name may different for TF1.x/2.x, so we # use index to fetch layer # f1: 13 x 13 x (960*alpha) f1 = mobilenetv3large.layers[194].output # f2: 26 x 26 x (672*alpha) f2 = mobilenetv3large.layers[146].output # f3: 52 x 52 x (240*alpha) f3 = mobilenetv3large.layers[79].output f1_channel_num = int(960 * alpha) f2_channel_num = int(672 * alpha) f3_channel_num = int(240 * alpha) #f1_channel_num = 1024 #f2_channel_num = 512 #f3_channel_num = 256 #feature map 1 head & output (13x13 for 416 input) x, y1 = make_last_layers(f1, f1_channel_num // 2, num_anchors * (num_classes + 5)) #x, y1 = make_last_layers(f1, f1_channel_num//2, num_anchors * (num_classes + 5), predict_filters=int(1024*alpha)) #upsample fpn merge for feature map 1 & 2 x = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, f2]) #feature map 2 head & output (26x26 for 416 input) x, y2 = make_last_layers(x, f2_channel_num // 2, num_anchors * (num_classes + 5)) #x, y2 = make_last_layers(x, f2_channel_num//2, num_anchors*(num_classes+5), predict_filters=int(512*alpha)) #upsample fpn merge for feature map 2 & 3 x = compose(DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, f3]) #feature map 3 head & output (52x52 for 416 input) x, y3 = make_last_layers(x, f3_channel_num // 2, num_anchors * (num_classes + 5)) #x, y3 = make_last_layers(x, f3_channel_num//2, num_anchors*(num_classes+5), predict_filters=int(256*alpha)) return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3lite_xception_body(inputs, num_anchors, num_classes): '''Create YOLO_v3 Lite Xception model CNN body in keras.''' xception = Xception(input_tensor=inputs, weights='imagenet', include_top=False) # input: 416 x 416 x 3 # block14_sepconv2_act: 13 x 13 x 2048 # block13_sepconv2_bn(middle in block13): 26 x 26 x 1024 # add_46(end of block12): 26 x 26 x 728 # block4_sepconv2_bn(middle in block4) : 52 x 52 x 728 # add_37(end of block3) : 52 x 52 x 256 # f1: 13 x 13 x 2048 f1 = xception.get_layer('block14_sepconv2_act').output # f2: 26 x 26 x 1024 f2 = xception.get_layer('block13_sepconv2_bn').output # f3: 52 x 52 x 728 f3 = xception.get_layer('block4_sepconv2_bn').output #f1_channel_num = 2048 #f2_channel_num = 1024 #f3_channel_num = 728 f1_channel_num = 1024 f2_channel_num = 512 f3_channel_num = 256 #feature map 1 head & output (13x13 for 416 input) x, y1 = make_depthwise_separable_last_layers(f1, f1_channel_num // 2, num_anchors * (num_classes + 5), block_id_str='14') #upsample fpn merge for feature map 1 & 2 x = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, f2]) #feature map 2 head & output (26x26 for 416 input) x, y2 = make_depthwise_separable_last_layers(x, f2_channel_num // 2, num_anchors * (num_classes + 5), block_id_str='15') #upsample fpn merge for feature map 2 & 3 x = compose(DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, f3]) #feature map 3 head & output (52x52 for 416 input) x, y3 = make_depthwise_separable_last_layers(x, f3_channel_num // 2, num_anchors * (num_classes + 5), block_id_str='16') return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3_mobilenetv3small_body(inputs, num_anchors, num_classes, alpha=1.0): """Create YOLO_V3 MobileNetV3Small model CNN body in Keras.""" mobilenetv3small = MobileNetV3Small(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # activation_31(layer 165, final feature map): 13 x 13 x (576*alpha) # expanded_conv_10/Add(layer 162, end of block10): 13 x 13 x (96*alpha) # activation_22(layer 117, middle in block8) : 26 x 26 x (288*alpha) # expanded_conv_7/Add(layer 114, end of block7) : 26 x 26 x (48*alpha) # activation_7(layer 38, middle in block3) : 52 x 52 x (96*alpha) # expanded_conv_2/Add(layer 35, end of block2): 52 x 52 x (24*alpha) # NOTE: activation layer name may different for TF1.x/2.x, so we # use index to fetch layer # f1 :13 x 13 x (576*alpha) f1 = mobilenetv3small.layers[165].output # f2: 26 x 26 x (288*alpha) f2 = mobilenetv3small.layers[117].output # f3 : 52 x 52 x (96*alpha) f3 = mobilenetv3small.layers[38].output f1_channel_num = int(576 * alpha) f2_channel_num = int(288 * alpha) f3_channel_num = int(96 * alpha) #f1_channel_num = 1024 #f2_channel_num = 512 #f3_channel_num = 256 #feature map 1 head & output (13x13 for 416 input) x, y1 = make_last_layers(f1, f1_channel_num // 2, num_anchors * (num_classes + 5)) #x, y1 = make_last_layers(f1, f1_channel_num//2, num_anchors * (num_classes + 5), predict_filters=int(1024*alpha)) #upsample fpn merge for feature map 1 & 2 x = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, f2]) #feature map 2 head & output (26x26 for 416 input) x, y2 = make_last_layers(x, f2_channel_num // 2, num_anchors * (num_classes + 5)) #x, y2 = make_last_layers(x, f2_channel_num//2, num_anchors*(num_classes+5), predict_filters=int(512*alpha)) #upsample fpn merge for feature map 2 & 3 x = compose(DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, f3]) #feature map 3 head & output (52x52 for 416 input) x, y3 = make_last_layers(x, f3_channel_num // 2, num_anchors * (num_classes + 5)) #x, y3 = make_last_layers(x, f3_channel_num//2, num_anchors*(num_classes+5), predict_filters=int(256*alpha)) return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3lite_spp_shufflenetv2_body(inputs, num_anchors, num_classes): '''Create YOLO_v3 Lite SPP ShuffleNetV2 model CNN body in keras.''' shufflenetv2 = ShuffleNetV2(input_tensor=inputs, weights=None, include_top=False) # input: 416 x 416 x 3 # 1x1conv5_out: 13 x 13 x 1024 # stage4/block1/relu_1x1conv_1: 26 x 26 x 464 # stage3/block1/relu_1x1conv_1: 52 x 52 x 232 # f1: 13 x 13 x 1024 f1 = shufflenetv2.get_layer('1x1conv5_out').output # f2: 26 x 26 x 464 f2 = shufflenetv2.get_layer('stage4/block1/relu_1x1conv_1').output # f3: 52 x 52 x 232 f3 = shufflenetv2.get_layer('stage3/block1/relu_1x1conv_1').output f1_channel_num = 1024 f2_channel_num = 464 f3_channel_num = 232 #f1_channel_num = 1024 #f2_channel_num = 512 #f3_channel_num = 256 #feature map 1 head & output (13x13 for 416 input) #x, y1 = make_depthwise_separable_last_layers(f1, f1_channel_num//2, num_anchors * (num_classes + 5), block_id_str='14') x, y1 = make_spp_depthwise_separable_last_layers(f1, f1_channel_num // 2, num_anchors * (num_classes + 5), block_id_str='17') #upsample fpn merge for feature map 1 & 2 x = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, f2]) #feature map 2 head & output (26x26 for 416 input) x, y2 = make_depthwise_separable_last_layers(x, f2_channel_num // 2, num_anchors * (num_classes + 5), block_id_str='18') #upsample fpn merge for feature map 2 & 3 x = compose(DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, f3]) #feature map 3 head & output (52x52 for 416 input) x, y3 = make_depthwise_separable_last_layers(x, f3_channel_num // 2, num_anchors * (num_classes + 5), block_id_str='19') return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3lite_spp_efficientnet_body(inputs, num_anchors, num_classes, level=3): ''' Create YOLO_v3 Lite SPP EfficientNet model CNN body in keras. # Arguments level: EfficientNet level number. by default we use basic EfficientNetB3 as backbone ''' efficientnet, feature_map_info = get_efficientnet_backbone_info( inputs, level=level) f1 = efficientnet.get_layer('top_activation').output f1_channel_num = feature_map_info['f1_channel_num'] f2 = efficientnet.get_layer('block6a_expand_activation').output f2_channel_num = feature_map_info['f2_channel_num'] f3 = efficientnet.get_layer('block4a_expand_activation').output f3_channel_num = feature_map_info['f3_channel_num'] #feature map 1 head & output (19x19 for 608 input) #x, y1 = make_spp_depthwise_separable_last_layers(f1, 672, num_anchors * (num_classes + 5), block_id_str='8') x, y1 = make_spp_depthwise_separable_last_layers(f1, f1_channel_num // 2, num_anchors * (num_classes + 5), block_id_str='8') #upsample fpn merge for feature map 1 & 2 x = compose( #DarknetConv2D_BN_Leaky(336, (1,1)), DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, f2]) #feature map 2 head & output (38x38 for 608 input) #x, y2 = make_depthwise_separable_last_layers(x, 240, num_anchors*(num_classes+5), block_id_str='9') x, y2 = make_depthwise_separable_last_layers(x, f2_channel_num // 2, num_anchors * (num_classes + 5), block_id_str='9') #upsample fpn merge for feature map 2 & 3 x = compose( #DarknetConv2D_BN_Leaky(120, (1,1)), DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, f3]) #feature map 3 head & output (76x76 for 608 input) #x, y3 = make_depthwise_separable_last_layers(x, 120, num_anchors*(num_classes+5), block_id_str='10') x, y3 = make_depthwise_separable_last_layers(x, f3_channel_num // 2, num_anchors * (num_classes + 5), block_id_str='10') return Model(inputs=inputs, outputs=[y1, y2, y3])
def tiny_yolo3lite_mobilenetv3large_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create Tiny YOLO_v3 Lite MobileNetV3Large model CNN body in keras.''' mobilenetv3large = MobileNetV3Large(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # activation_38(layer 194, final feature map): 13 x 13 x (960*alpha) # expanded_conv_14/Add(layer 191, end of block14): 13 x 13 x (160*alpha) # activation_29(layer 146, middle in block12) : 26 x 26 x (672*alpha) # expanded_conv_11/Add(layer 143, end of block11) : 26 x 26 x (112*alpha) # activation_15(layer 79, middle in block6) : 52 x 52 x (240*alpha) # expanded_conv_5/Add(layer 76, end of block5): 52 x 52 x (40*alpha) # NOTE: activation layer name may different for TF1.x/2.x, so we # use index to fetch layer # f1 :13 x 13 x (960*alpha) f1 = mobilenetv3large.layers[194].output # f2: 26 x 26 x (672*alpha) f2 = mobilenetv3large.layers[146].output f1_channel_num = int(960 * alpha) f2_channel_num = int(672 * alpha) #f1_channel_num = 1024 #f2_channel_num = 512 #feature map 1 transform x1 = DarknetConv2D_BN_Leaky(f1_channel_num // 2, (1, 1))(f1) #feature map 1 output (13x13 for 416 input) y1 = compose( #DarknetConv2D_BN_Leaky(f1_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f1_channel_num, kernel_size=(3, 3), block_id_str='15'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1) #upsample fpn merge for feature map 1 & 2 x2 = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x1) #feature map 2 output (26x26 for 416 input) y2 = compose( Concatenate(), #DarknetConv2D_BN_Leaky(f2_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f2_channel_num, kernel_size=(3, 3), block_id_str='16'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, f2]) return Model(inputs, [y1, y2])
def tiny_yolo3lite_mobilenetv3small_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create Tiny YOLO_v3 Lite MobileNetV3Small model CNN body in keras.''' mobilenetv3small = MobileNetV3Small(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # activation_31(layer 165, final feature map): 13 x 13 x (576*alpha) # expanded_conv_10/Add(layer 162, end of block10): 13 x 13 x (96*alpha) # activation_22(layer 117, middle in block8) : 26 x 26 x (288*alpha) # expanded_conv_7/Add(layer 114, end of block7) : 26 x 26 x (48*alpha) # activation_7(layer 38, middle in block3) : 52 x 52 x (96*alpha) # expanded_conv_2/Add(layer 35, end of block2): 52 x 52 x (24*alpha) # NOTE: activation layer name may different for TF1.x/2.x, so we # use index to fetch layer # f1 :13 x 13 x (576*alpha) f1 = mobilenetv3small.layers[165].output # f2: 26 x 26 x (288*alpha) f2 = mobilenetv3small.layers[117].output f1_channel_num = int(576 * alpha) f2_channel_num = int(288 * alpha) #f1_channel_num = 1024 #f2_channel_num = 512 #feature map 1 transform x1 = DarknetConv2D_BN_Leaky(f1_channel_num // 2, (1, 1))(f1) #feature map 1 output (13x13 for 416 input) y1 = compose( #DarknetConv2D_BN_Leaky(f1_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f1_channel_num, kernel_size=(3, 3), block_id_str='15'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1) #upsample fpn merge for feature map 1 & 2 x2 = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x1) #feature map 2 output (26x26 for 416 input) y2 = compose( Concatenate(), #DarknetConv2D_BN_Leaky(f2_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f2_channel_num, kernel_size=(3, 3), block_id_str='16'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, f2]) return Model(inputs, [y1, y2])
def resblock_body(x, num_filters, num_blocks): '''A series of resblocks starting with a downsampling Convolution2D''' # Darknet uses left and top padding instead of 'same' mode x = ZeroPadding2D(((1, 0), (1, 0)))(x) x = DarknetConv2D_BN_Leaky(num_filters, (3, 3), strides=(2, 2))(x) for i in range(num_blocks): y = compose(DarknetConv2D_BN_Leaky(num_filters // 2, (1, 1)), DarknetConv2D_BN_Leaky(num_filters, (3, 3)))(x) x = Add()([x, y]) return x
def yolo3lite_efficientnet_body(inputs, num_anchors, num_classes, level=0): ''' Create YOLO_v3 Lite EfficientNet model CNN body in keras. # Arguments level: EfficientNet level number. by default we use basic EfficientNetB0 as backbone ''' efficientnet, feature_map_info = get_efficientnet_backbone_info( inputs, level=level) f1 = efficientnet.get_layer('top_activation').output #x, y1 = make_depthwise_separable_last_layers(f1, 672, num_anchors * (num_classes + 5), block_id_str='8') f2_channel_num = feature_map_info['f2_channel_num'] x, y1 = make_depthwise_separable_last_layers(f1, f2_channel_num, num_anchors * (num_classes + 5), block_id_str='8') x = compose( #DarknetConv2D_BN_Leaky(336, (1,1)), DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x) f2 = efficientnet.get_layer('block6a_expand_activation').output x = Concatenate()([x, f2]) #x, y2 = make_depthwise_separable_last_layers(x, 240, num_anchors*(num_classes+5), block_id_str='9') f3_channel_num = feature_map_info['f3_channel_num'] x, y2 = make_depthwise_separable_last_layers(x, f3_channel_num, num_anchors * (num_classes + 5), block_id_str='9') x = compose( #DarknetConv2D_BN_Leaky(120, (1,1)), DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)), UpSampling2D(2))(x) f3 = efficientnet.get_layer('block4a_expand_activation').output x = Concatenate()([x, f3]) #x, y3 = make_depthwise_separable_last_layers(x, 120, num_anchors*(num_classes+5), block_id_str='10') x, y3 = make_depthwise_separable_last_layers(x, f3_channel_num // 2, num_anchors * (num_classes + 5), block_id_str='10') return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3lite_spp_mobilenetv2_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create YOLO_v3 Lite SPP MobileNetV2 model CNN body in keras.''' mobilenetv2 = MobileNetV2(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # out_relu: 13 x 13 x 1280 # block_13_expand_relu: 26 x 26 x (576*alpha) # block_6_expand_relu: 52 x 52 x (192*alpha) f1 = mobilenetv2.get_layer('out_relu').output # f1 :13 x 13 x 1280 x, y1 = make_spp_depthwise_separable_last_layers( f1, int(576 * alpha), num_anchors * (num_classes + 5), block_id_str='17', predict_filters=int(1024 * alpha)) x = compose(DarknetConv2D_BN_Leaky(int(288 * alpha), (1, 1)), UpSampling2D(2))(x) f2 = mobilenetv2.get_layer('block_13_expand_relu').output # f2: 26 x 26 x (576*alpha) x = Concatenate()([x, f2]) x, y2 = make_depthwise_separable_last_layers( x, int(192 * alpha), num_anchors * (num_classes + 5), block_id_str='18', predict_filters=int(512 * alpha)) x = compose(DarknetConv2D_BN_Leaky(int(96 * alpha), (1, 1)), UpSampling2D(2))(x) f3 = mobilenetv2.get_layer('block_6_expand_relu').output # f3 : 52 x 52 x (192*alpha) x = Concatenate()([x, f3]) x, y3 = make_depthwise_separable_last_layers( x, int(96 * alpha), num_anchors * (num_classes + 5), block_id_str='19', predict_filters=int(256 * alpha)) return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3_mobilenetv2_body(inputs, num_anchors, num_classes, alpha=1.0): """Create YOLO_V3 MobileNetV2 model CNN body in Keras.""" mobilenetv2 = MobileNetV2(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # out_relu: 13 x 13 x 1280 # block_13_expand_relu: 26 x 26 x (576*alpha) # block_6_expand_relu: 52 x 52 x (192*alpha) # f1 :13 x 13 x 1280 f1 = mobilenetv2.get_layer('out_relu').output # f2: 26 x 26 x (576*alpha) f2 = mobilenetv2.get_layer('block_13_expand_relu').output # f3 : 52 x 52 x (192*alpha) f3 = mobilenetv2.get_layer('block_6_expand_relu').output f1_channel_num = int(1280 * alpha) f2_channel_num = int(576 * alpha) f3_channel_num = int(192 * alpha) #f1_channel_num = 1024 #f2_channel_num = 512 #f3_channel_num = 256 #feature map 1 head & output (13x13 for 416 input) x, y1 = make_last_layers(f1, f1_channel_num // 2, num_anchors * (num_classes + 5)) #x, y1 = make_last_layers(f1, f1_channel_num//2, num_anchors * (num_classes + 5), predict_filters=int(1024*alpha)) #upsample fpn merge for feature map 1 & 2 x = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, f2]) #feature map 2 head & output (26x26 for 416 input) x, y2 = make_last_layers(x, f2_channel_num // 2, num_anchors * (num_classes + 5)) #x, y2 = make_last_layers(x, f2_channel_num//2, num_anchors*(num_classes+5), predict_filters=int(512*alpha)) #upsample fpn merge for feature map 2 & 3 x = compose(DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, f3]) #feature map 3 head & output (52x52 for 416 input) x, y3 = make_last_layers(x, f3_channel_num // 2, num_anchors * (num_classes + 5)) #x, y3 = make_last_layers(x, f3_channel_num//2, num_anchors*(num_classes+5), predict_filters=int(256*alpha)) return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3lite_mobilenetv3small_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create YOLO_v3 Lite MobileNetV3Small model CNN body in keras.''' mobilenetv3small = MobileNetV3Small(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # activation_31(layer 165, final feature map): 13 x 13 x (576*alpha) # expanded_conv_10/Add(layer 162, end of block10): 13 x 13 x (96*alpha) # activation_22(layer 117, middle in block8) : 26 x 26 x (288*alpha) # expanded_conv_7/Add(layer 114, end of block7) : 26 x 26 x (48*alpha) # activation_7(layer 38, middle in block3) : 52 x 52 x (96*alpha) # expanded_conv_2/Add(layer 35, end of block2): 52 x 52 x (24*alpha) # NOTE: activation layer name may different for TF1.x/2.x, so we # use index to fetch layer f1 = mobilenetv3small.layers[165].output # f1 :13 x 13 x (576*alpha) x, y1 = make_depthwise_separable_last_layers( f1, int(288 * alpha), num_anchors * (num_classes + 5)) #x, y1 = make_depthwise_separable_last_layers(f1, int(288*alpha), num_anchors * (num_classes + 5), predict_filters=int(1024*alpha)) x = compose(DarknetConv2D_BN_Leaky(int(144 * alpha), (1, 1)), UpSampling2D(2))(x) f2 = mobilenetv3small.layers[117].output # f2: 26 x 26 x (288*alpha) x = Concatenate()([x, f2]) x, y2 = make_depthwise_separable_last_layers( x, int(96 * alpha), num_anchors * (num_classes + 5)) #x, y2 = make_depthwise_separable_last_layers(x, int(96*alpha), num_anchors*(num_classes+5), predict_filters=int(512*alpha)) x = compose(DarknetConv2D_BN_Leaky(int(48 * alpha), (1, 1)), UpSampling2D(2))(x) f3 = mobilenetv3small.layers[38].output # f3 : 52 x 52 x (96*alpha) x = Concatenate()([x, f3]) x, y3 = make_depthwise_separable_last_layers( x, int(48 * alpha), num_anchors * (num_classes + 5)) #x, y3 = make_depthwise_separable_last_layers(x, int(48*alpha), num_anchors*(num_classes+5), predict_filters=int(256*alpha)) return Model(inputs=inputs, outputs=[y1, y2, y3])
def tiny_yolo3lite_mobilenetv2_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create Tiny YOLO_v3 Lite MobileNetV2 model CNN body in keras.''' mobilenetv2 = MobileNetV2(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # out_relu: 13 x 13 x 1280 # block_13_expand_relu: 26 x 26 x (576*alpha) # block_6_expand_relu: 52 x 52 x (192*alpha) # f1 :13 x 13 x 1280 f1 = mobilenetv2.get_layer('out_relu').output # f2: 26 x 26 x (576*alpha) f2 = mobilenetv2.get_layer('block_13_expand_relu').output f1_channel_num = int(1280 * alpha) f2_channel_num = int(576 * alpha) #f1_channel_num = 1024 #f2_channel_num = 512 #feature map 1 transform x1 = DarknetConv2D_BN_Leaky(f1_channel_num // 2, (1, 1))(f1) #feature map 1 output (13x13 for 416 input) y1 = compose( #DarknetConv2D_BN_Leaky(f1_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f1_channel_num, kernel_size=(3, 3), block_id_str='17'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1) #upsample fpn merge for feature map 1 & 2 x2 = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x1) #feature map 2 output (26x26 for 416 input) y2 = compose( Concatenate(), #DarknetConv2D_BN_Leaky(f2_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f2_channel_num, kernel_size=(3, 3), block_id_str='18'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, f2]) return Model(inputs, [y1, y2])
def tiny_yolo3lite_xception_body(inputs, num_anchors, num_classes): '''Create Tiny YOLO_v3 Lite Xception model CNN body in keras.''' xception = Xception(input_tensor=inputs, weights='imagenet', include_top=False) # input: 416 x 416 x 3 # block14_sepconv2_act: 13 x 13 x 2048 # block13_sepconv2_bn(middle in block13): 26 x 26 x 1024 # add_46(end of block12): 26 x 26 x 728 # block4_sepconv2_bn(middle in block4) : 52 x 52 x 728 # add_37(end of block3) : 52 x 52 x 256 # f1 :13 x 13 x 2048 f1 = xception.get_layer('block14_sepconv2_act').output # f2 :26 x 26 x 1024 f2 = xception.get_layer('block13_sepconv2_bn').output f1_channel_num = 2048 f2_channel_num = 1024 #f1_channel_num = 1024 #f2_channel_num = 512 #feature map 1 transform x1 = DarknetConv2D_BN_Leaky(f1_channel_num // 2, (1, 1))(f1) #feature map 1 output (13x13 for 416 input) y1 = compose( #DarknetConv2D_BN_Leaky(f1_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f1_channel_num, kernel_size=(3, 3), block_id_str='14'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1) #upsample fpn merge for feature map 1 & 2 x2 = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x1) #feature map 2 output (26x26 for 416 input) y2 = compose( Concatenate(), #DarknetConv2D_BN_Leaky(f2_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f2_channel_num, kernel_size=(3, 3), block_id_str='15'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, f2]) return Model(inputs, [y1, y2])
def yolo3lite_spp_mobilenet_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create YOLO_v3 Lite SPP MobileNet model CNN body in keras.''' mobilenet = MobileNet(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # conv_pw_13_relu :13 x 13 x (1024*alpha) # conv_pw_11_relu :26 x 26 x (512*alpha) # conv_pw_5_relu : 52 x 52 x (256*alpha) f1 = mobilenet.get_layer('conv_pw_13_relu').output # f1 :13 x 13 x (1024*alpha) #x, y1 = make_depthwise_separable_last_layers(f1, int(512*alpha), num_anchors * (num_classes + 5), block_id_str='14') x, y1 = make_spp_depthwise_separable_last_layers(f1, int(512 * alpha), num_anchors * (num_classes + 5), block_id_str='14') x = compose(DarknetConv2D_BN_Leaky(int(256 * alpha), (1, 1)), UpSampling2D(2))(x) f2 = mobilenet.get_layer('conv_pw_11_relu').output # f2: 26 x 26 x (512*alpha) x = Concatenate()([x, f2]) x, y2 = make_depthwise_separable_last_layers(x, int(256 * alpha), num_anchors * (num_classes + 5), block_id_str='15') x = compose(DarknetConv2D_BN_Leaky(int(128 * alpha), (1, 1)), UpSampling2D(2))(x) f3 = mobilenet.get_layer('conv_pw_5_relu').output # f3 : 52 x 52 x (256*alpha) x = Concatenate()([x, f3]) x, y3 = make_depthwise_separable_last_layers(x, int(128 * alpha), num_anchors * (num_classes + 5), block_id_str='16') return Model(inputs=inputs, outputs=[y1, y2, y3])
def tiny_yolo3lite_shufflenetv2_body(inputs, num_anchors, num_classes): '''Create Tiny YOLO_v3 Lite ShuffleNetV2 model CNN body in keras.''' shufflenetv2 = ShuffleNetV2(input_tensor=inputs, weights=None, include_top=False) # input: 416 x 416 x 3 # 1x1conv5_out: 13 x 13 x 1024 # stage4/block1/relu_1x1conv_1: 26 x 26 x 464 # stage3/block1/relu_1x1conv_1: 52 x 52 x 232 # f1: 13 x 13 x 1024 f1 = shufflenetv2.get_layer('1x1conv5_out').output # f2: 26 x 26 x 464 f2 = shufflenetv2.get_layer('stage4/block1/relu_1x1conv_1').output f1_channel_num = 1024 f2_channel_num = 464 #f1_channel_num = 1024 #f2_channel_num = 512 #feature map 1 transform x1 = DarknetConv2D_BN_Leaky(f1_channel_num // 2, (1, 1))(f1) #feature map 1 head & output (13x13 for 416 input) y1 = compose( #DarknetConv2D_BN_Leaky(f1_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f1_channel_num, kernel_size=(3, 3), block_id_str='17'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1) #upsample fpn merge for feature map 1 & 2 x2 = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x1) #feature map 2 head & output (26x26 for 416 input) y2 = compose( Concatenate(), #DarknetConv2D_BN_Leaky(f2_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f2_channel_num, kernel_size=(3, 3), block_id_str='18'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, f2]) return Model(inputs, [y1, y2])
def yolo3lite_body(inputs, num_anchors, num_classes): """Create YOLO_V3 Lite model CNN body in Keras.""" darknetlite = Model(inputs, darknet53lite_body(inputs)) x, y1 = make_depthwise_separable_last_layers( darknetlite.output, 512, num_anchors * (num_classes + 5)) x = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, darknetlite.layers[152].output]) x, y2 = make_depthwise_separable_last_layers( x, 256, num_anchors * (num_classes + 5)) x = compose(DarknetConv2D_BN_Leaky(128, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, darknetlite.layers[92].output]) x, y3 = make_depthwise_separable_last_layers( x, 128, num_anchors * (num_classes + 5)) return Model(inputs, [y1, y2, y3])
def yolo3_body(inputs, num_anchors, num_classes, weights_path=None): """Create YOLO_V3 model CNN body in Keras.""" darknet = Model(inputs, darknet53_body(inputs)) if weights_path is not None: darknet.load_weights(weights_path, by_name=True) print('Load weights {}.'.format(weights_path)) x, y1 = make_last_layers(darknet.output, 512, num_anchors * (num_classes + 5)) x = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, darknet.layers[152].output]) x, y2 = make_last_layers(x, 256, num_anchors * (num_classes + 5)) x = compose(DarknetConv2D_BN_Leaky(128, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, darknet.layers[92].output]) x, y3 = make_last_layers(x, 128, num_anchors * (num_classes + 5)) return Model(inputs, [y1, y2, y3])
def darknet53_body(x): '''Darknet53 body having 52 Convolution2D layers''' x = DarknetConv2D_BN_Leaky(32, (3, 3))(x) x = resblock_body(x, 64, 1) x = resblock_body(x, 128, 2) x = resblock_body(x, 256, 8) x = resblock_body(x, 512, 8) x = resblock_body(x, 1024, 4) return x
def yolo3lite_mobilenetv3large_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create YOLO_v3 Lite MobileNetV3Large model CNN body in keras.''' mobilenetv3large = MobileNetV3Large(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # activation_38(layer 194, final feature map): 13 x 13 x (960*alpha) # expanded_conv_14/Add(layer 191, end of block14): 13 x 13 x (160*alpha) # activation_29(layer 146, middle in block12) : 26 x 26 x (672*alpha) # expanded_conv_11/Add(layer 143, end of block11) : 26 x 26 x (112*alpha) # activation_15(layer 79, middle in block6) : 52 x 52 x (240*alpha) # expanded_conv_5/Add(layer 76, end of block5): 52 x 52 x (40*alpha) # NOTE: activation layer name may different for TF1.x/2.x, so we # use index to fetch layer f1 = mobilenetv3large.layers[194].output # f1 :13 x 13 x (960*alpha) x, y1 = make_depthwise_separable_last_layers(f1, int(672*alpha), num_anchors * (num_classes + 5), block_id_str='15') #x, y1 = make_depthwise_separable_last_layers(f1, int(672*alpha), num_anchors * (num_classes + 5), block_id_str='15', predict_filters=int(1024*alpha)) x = compose( DarknetConv2D_BN_Leaky(int(336*alpha), (1,1)), UpSampling2D(2))(x) f2 = mobilenetv3large.layers[146].output # f2: 26 x 26 x (672*alpha) x = Concatenate()([x,f2]) x, y2 = make_depthwise_separable_last_layers(x, int(240*alpha), num_anchors*(num_classes+5), block_id_str='16') #x, y2 = make_depthwise_separable_last_layers(x, int(240*alpha), num_anchors*(num_classes+5), block_id_str='16', predict_filters=int(512*alpha)) x = compose( DarknetConv2D_BN_Leaky(int(120*alpha), (1,1)), UpSampling2D(2))(x) f3 = mobilenetv3large.layers[79].output # f3 : 52 x 52 x (240*alpha) x = Concatenate()([x, f3]) x, y3 = make_depthwise_separable_last_layers(x, int(120*alpha), num_anchors*(num_classes+5), block_id_str='17') #x, y3 = make_depthwise_separable_last_layers(x, int(120*alpha), num_anchors*(num_classes+5), block_id_str='17', predict_filters=int(256*alpha)) return Model(inputs = inputs, outputs=[y1,y2,y3])
def tiny_yolo3lite_efficientnet_body(inputs, num_anchors, num_classes, level=0): ''' Create Tiny YOLO_v3 Lite EfficientNet model CNN body in keras. # Arguments level: EfficientNet level number. by default we use basic EfficientNetB0 as backbone ''' efficientnet, feature_map_info = get_efficientnet_backbone_info( inputs, level=level) f1 = efficientnet.get_layer('top_activation').output f2 = efficientnet.get_layer('block6a_expand_activation').output f1_channel_num = feature_map_info['f1_channel_num'] f2_channel_num = feature_map_info['f2_channel_num'] #feature map 1 transform x1 = DarknetConv2D_BN_Leaky(f1_channel_num // 2, (1, 1))(f1) #feature map 1 output (13x13 for 416 input) y1 = compose( #DarknetConv2D_BN_Leaky(f1_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f1_channel_num, kernel_size=(3, 3), block_id_str='8'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1) #upsample fpn merge for feature map 1 & 2 x2 = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x1) #feature map 2 output (26x26 for 416 input) y2 = compose( Concatenate(), #DarknetConv2D_BN_Leaky(f2_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f2_channel_num, kernel_size=(3, 3), block_id_str='9'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, f2]) return Model(inputs, [y1, y2])