def tiny_yolo3lite_body(inputs, num_anchors, num_classes): '''Create Tiny YOLO_v3 Lite model CNN body in keras.''' x1 = compose( Depthwise_Separable_Conv2D_BN_Leaky(16, (3, 3)), MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), Depthwise_Separable_Conv2D_BN_Leaky(32, (3, 3)), MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), Depthwise_Separable_Conv2D_BN_Leaky(64, (3, 3)), MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), Depthwise_Separable_Conv2D_BN_Leaky(128, (3, 3)), MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), Depthwise_Separable_Conv2D_BN_Leaky(256, (3, 3)))(inputs) x2 = compose( MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), Depthwise_Separable_Conv2D_BN_Leaky(512, (3, 3)), MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='same'), Depthwise_Separable_Conv2D_BN_Leaky(1024, (3, 3)), DarknetConv2D_BN_Leaky(256, (1, 1)))(x1) y1 = compose(Depthwise_Separable_Conv2D_BN_Leaky(512, (3, 3)), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2) x2 = compose(DarknetConv2D_BN_Leaky(128, (1, 1)), UpSampling2D(2))(x2) y2 = compose(Concatenate(), Depthwise_Separable_Conv2D_BN_Leaky(256, (3, 3)), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, x1]) return Model(inputs, [y1, y2])
def tiny_yolo3lite_shufflenetv2_body(inputs, num_anchors, num_classes): '''Create Tiny YOLO_v3 Lite ShuffleNetV2 model CNN body in keras.''' shufflenetv2 = ShuffleNetV2(input_tensor=inputs, weights=None, include_top=False) # input: 416 x 416 x 3 # 1x1conv5_out: 13 x 13 x 1024 # stage4/block1/relu_1x1conv_1: 26 x 26 x 464 # stage3/block1/relu_1x1conv_1: 52 x 52 x 232 x1 = shufflenetv2.get_layer('stage4/block1/relu_1x1conv_1').output x2 = shufflenetv2.get_layer('1x1conv5_out').output x2 = DarknetConv2D_BN_Leaky(464, (1, 1))(x2) y1 = compose( #DarknetConv2D_BN_Leaky(1024, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=1024, kernel_size=(3, 3), block_id_str='17'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2) x2 = compose(DarknetConv2D_BN_Leaky(232, (1, 1)), UpSampling2D(2))(x2) y2 = compose( Concatenate(), #DarknetConv2D_BN_Leaky(464, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=464, kernel_size=(3, 3), block_id_str='18'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, x1]) return Model(inputs, [y1, y2])
def tiny_yolo3_efficientnet_body(inputs, num_anchors, num_classes, level=0): ''' Create Tiny YOLO_v3 EfficientNet model CNN body in keras. # Arguments level: EfficientNet level number. by default we use basic EfficientNetB0 as backbone ''' efficientnet, feature_map_info = get_efficientnet_backbone_info( inputs, level=level) x1 = efficientnet.get_layer('block6a_expand_activation').output x2 = efficientnet.get_layer('top_activation').output f1_channel_num = feature_map_info['f1_channel_num'] f2_channel_num = feature_map_info['f2_channel_num'] x2 = DarknetConv2D_BN_Leaky(f1_channel_num // 2, (1, 1))(x2) y1 = compose( DarknetConv2D_BN_Leaky(f1_channel_num, (3, 3)), #Depthwise_Separable_Conv2D_BN_Leaky(filters=f1_channel_num, kernel_size=(3, 3), block_id_str='8'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2) x2 = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x2) y2 = compose( Concatenate(), DarknetConv2D_BN_Leaky(f2_channel_num, (3, 3)), #Depthwise_Separable_Conv2D_BN_Leaky(filters=f2_channel_num, kernel_size=(3, 3), block_id_str='9'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, x1]) return Model(inputs, [y1, y2])
def tiny_yolo3lite_mobilenet_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create Tiny YOLO_v3 Lite MobileNet model CNN body in keras.''' mobilenet = MobileNet(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # conv_pw_13_relu :13 x 13 x (1024*alpha) # conv_pw_11_relu :26 x 26 x (512*alpha) # conv_pw_5_relu : 52 x 52 x (256*alpha) x1 = mobilenet.get_layer('conv_pw_11_relu').output x2 = mobilenet.get_layer('conv_pw_13_relu').output x2 = DarknetConv2D_BN_Leaky(int(512 * alpha), (1, 1))(x2) y1 = compose( #DarknetConv2D_BN_Leaky(int(1024*alpha), (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=int(1024 * alpha), kernel_size=(3, 3), block_id_str='14'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2) x2 = compose(DarknetConv2D_BN_Leaky(int(256 * alpha), (1, 1)), UpSampling2D(2))(x2) y2 = compose( Concatenate(), #DarknetConv2D_BN_Leaky(int(512*alpha), (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=int(512 * alpha), kernel_size=(3, 3), block_id_str='15'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, x1]) return Model(inputs, [y1, y2])
def tiny_yolo3_mobilenetv2_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create Tiny YOLO_v3 MobileNetV2 model CNN body in keras.''' mobilenetv2 = MobileNetV2(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # out_relu: 13 x 13 x 1280 # block_13_expand_relu: 26 x 26 x (576*alpha) # block_6_expand_relu: 52 x 52 x (192*alpha) x1 = mobilenetv2.get_layer('block_13_expand_relu').output x2 = mobilenetv2.get_layer('out_relu').output x2 = DarknetConv2D_BN_Leaky(int(576 * alpha), (1, 1))(x2) y1 = compose( DarknetConv2D_BN_Leaky(int(1280 * alpha), (3, 3)), #Depthwise_Separable_Conv2D_BN_Leaky(filters=int(1280*alpha), kernel_size=(3, 3), block_id_str='17'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2) x2 = compose(DarknetConv2D_BN_Leaky(int(288 * alpha), (1, 1)), UpSampling2D(2))(x2) y2 = compose( Concatenate(), DarknetConv2D_BN_Leaky(int(576 * alpha), (3, 3)), #Depthwise_Separable_Conv2D_BN_Leaky(filters=int(576*alpha), kernel_size=(3, 3), block_id_str='18'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, x1]) return Model(inputs, [y1, y2])
def tiny_yolo3_xception_body(inputs, num_anchors, num_classes): '''Create Tiny YOLO_v3 Xception model CNN body in keras.''' xception = Xception(input_tensor=inputs, weights='imagenet', include_top=False) # input: 416 x 416 x 3 # block14_sepconv2_act: 13 x 13 x 2048 # block13_sepconv2_bn(middle in block13): 26 x 26 x 1024 # add_46(end of block12): 26 x 26 x 728 # block4_sepconv2_bn(middle in block4) : 52 x 52 x 728 # add_37(end of block3) : 52 x 52 x 256 x1 = xception.get_layer('block13_sepconv2_bn').output # x1 :26 x 26 x 1024 x2 = xception.get_layer('block14_sepconv2_act').output # x2 :13 x 13 x 2048 x2 = DarknetConv2D_BN_Leaky(1024, (1, 1))(x2) y1 = compose( DarknetConv2D_BN_Leaky(2048, (3, 3)), #Depthwise_Separable_Conv2D_BN_Leaky(filters=2048, kernel_size=(3, 3), block_id_str='14'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2) x2 = compose(DarknetConv2D_BN_Leaky(512, (1, 1)), UpSampling2D(2))(x2) y2 = compose( Concatenate(), DarknetConv2D_BN_Leaky(1024, (3, 3)), #Depthwise_Separable_Conv2D_BN_Leaky(filters=1024, kernel_size=(3, 3), block_id_str='15'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, x1]) return Model(inputs, [y1, y2])
def tiny_yolo3lite_mobilenetv3large_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create Tiny YOLO_v3 Lite MobileNetV3Large model CNN body in keras.''' mobilenetv3large = MobileNetV3Large(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # activation_38(layer 194, final feature map): 13 x 13 x (960*alpha) # expanded_conv_14/Add(layer 191, end of block14): 13 x 13 x (160*alpha) # activation_29(layer 146, middle in block12) : 26 x 26 x (672*alpha) # expanded_conv_11/Add(layer 143, end of block11) : 26 x 26 x (112*alpha) # activation_15(layer 79, middle in block6) : 52 x 52 x (240*alpha) # expanded_conv_5/Add(layer 76, end of block5): 52 x 52 x (40*alpha) # NOTE: activation layer name may different for TF1.x/2.x, so we # use index to fetch layer # f1 :13 x 13 x (960*alpha) f1 = mobilenetv3large.layers[194].output # f2: 26 x 26 x (672*alpha) f2 = mobilenetv3large.layers[146].output f1_channel_num = int(960 * alpha) f2_channel_num = int(672 * alpha) #f1_channel_num = 1024 #f2_channel_num = 512 #feature map 1 transform x1 = DarknetConv2D_BN_Leaky(f1_channel_num // 2, (1, 1))(f1) #feature map 1 output (13x13 for 416 input) y1 = compose( #DarknetConv2D_BN_Leaky(f1_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f1_channel_num, kernel_size=(3, 3), block_id_str='15'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1) #upsample fpn merge for feature map 1 & 2 x2 = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x1) #feature map 2 output (26x26 for 416 input) y2 = compose( Concatenate(), #DarknetConv2D_BN_Leaky(f2_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f2_channel_num, kernel_size=(3, 3), block_id_str='16'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, f2]) return Model(inputs, [y1, y2])
def tiny_yolo3lite_mobilenetv3small_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create Tiny YOLO_v3 Lite MobileNetV3Small model CNN body in keras.''' mobilenetv3small = MobileNetV3Small(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # activation_31(layer 165, final feature map): 13 x 13 x (576*alpha) # expanded_conv_10/Add(layer 162, end of block10): 13 x 13 x (96*alpha) # activation_22(layer 117, middle in block8) : 26 x 26 x (288*alpha) # expanded_conv_7/Add(layer 114, end of block7) : 26 x 26 x (48*alpha) # activation_7(layer 38, middle in block3) : 52 x 52 x (96*alpha) # expanded_conv_2/Add(layer 35, end of block2): 52 x 52 x (24*alpha) # NOTE: activation layer name may different for TF1.x/2.x, so we # use index to fetch layer # f1 :13 x 13 x (576*alpha) f1 = mobilenetv3small.layers[165].output # f2: 26 x 26 x (288*alpha) f2 = mobilenetv3small.layers[117].output f1_channel_num = int(576 * alpha) f2_channel_num = int(288 * alpha) #f1_channel_num = 1024 #f2_channel_num = 512 #feature map 1 transform x1 = DarknetConv2D_BN_Leaky(f1_channel_num // 2, (1, 1))(f1) #feature map 1 output (13x13 for 416 input) y1 = compose( #DarknetConv2D_BN_Leaky(f1_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f1_channel_num, kernel_size=(3, 3), block_id_str='15'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1) #upsample fpn merge for feature map 1 & 2 x2 = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x1) #feature map 2 output (26x26 for 416 input) y2 = compose( Concatenate(), #DarknetConv2D_BN_Leaky(f2_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f2_channel_num, kernel_size=(3, 3), block_id_str='16'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, f2]) return Model(inputs, [y1, y2])
def yolo3_nano_body(inputs, num_anchors, num_classes, weights_path=None): """ Create YOLO_V3 Nano model CNN body in Keras. Reference Paper: "YOLO Nano: a Highly Compact You Only Look Once Convolutional Neural Network for Object Detection" https://arxiv.org/abs/1910.01271 """ nano_net = NanoNet(input_tensor=inputs, weights='imagenet', include_top=False) if weights_path is not None: nano_net.load_weights(weights_path, by_name=True) print('Load weights {}.'.format(weights_path)) # input: 416 x 416 x 3 # Conv_pw_3_relu: 13 x 13 x 189 # pep_block_15_add: 26 x 26 x 325 # pep_block_7_add: 52 x 52 x 150 # f1 :13 x 13 x 189 f1 = nano_net.get_layer('Conv_pw_3').output # f2: 26 x 26 x 325 f2 = nano_net.get_layer('pep_block_15_add').output # f3 : 52 x 52 x 150 f3 = nano_net.get_layer('pep_block_7_add').output #feature map 1 head & output (13x13 for 416 input) y1 = _ep_block(f1, filters=462, stride=1, expansion=EP_EXPANSION, block_id=6) y1 = DarknetConv2D(num_anchors * (num_classes + 5), (1,1))(y1) #upsample fpn merge for feature map 1 & 2 x = compose( NanoConv2D_BN_Relu6(105, (1,1)), UpSampling2D(2))(f1) x = Concatenate()([x,f2]) #feature map 2 head & output (26x26 for 416 input) x = _pep_block(x, proj_filters=113, filters=325, stride=1, expansion=PEP_EXPANSION, block_id=18) x = _pep_block(x, proj_filters=99, filters=207, stride=1, expansion=PEP_EXPANSION, block_id=19) x = DarknetConv2D(98, (1,1))(x) y2 = _ep_block(x, filters=183, stride=1, expansion=EP_EXPANSION, block_id=7) y2 = DarknetConv2D(num_anchors * (num_classes + 5), (1,1))(y2) #upsample fpn merge for feature map 2 & 3 x = compose( NanoConv2D_BN_Relu6(47, (1,1)), UpSampling2D(2))(x) x = Concatenate()([x, f3]) #feature map 3 head & output (52x52 for 416 input) x = _pep_block(x, proj_filters=58, filters=122, stride=1, expansion=PEP_EXPANSION, block_id=20) x = _pep_block(x, proj_filters=52, filters=87, stride=1, expansion=PEP_EXPANSION, block_id=21) x = _pep_block(x, proj_filters=47, filters=93, stride=1, expansion=PEP_EXPANSION, block_id=22) y3 = DarknetConv2D(num_anchors * (num_classes + 5), (1,1))(x) return Model(inputs = inputs, outputs=[y1,y2,y3])
def tiny_yolo3lite_mobilenetv2_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create Tiny YOLO_v3 Lite MobileNetV2 model CNN body in keras.''' mobilenetv2 = MobileNetV2(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # out_relu: 13 x 13 x 1280 # block_13_expand_relu: 26 x 26 x (576*alpha) # block_6_expand_relu: 52 x 52 x (192*alpha) # f1 :13 x 13 x 1280 f1 = mobilenetv2.get_layer('out_relu').output # f2: 26 x 26 x (576*alpha) f2 = mobilenetv2.get_layer('block_13_expand_relu').output f1_channel_num = int(1280 * alpha) f2_channel_num = int(576 * alpha) #f1_channel_num = 1024 #f2_channel_num = 512 #feature map 1 transform x1 = DarknetConv2D_BN_Leaky(f1_channel_num // 2, (1, 1))(f1) #feature map 1 output (13x13 for 416 input) y1 = compose( #DarknetConv2D_BN_Leaky(f1_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f1_channel_num, kernel_size=(3, 3), block_id_str='17'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1) #upsample fpn merge for feature map 1 & 2 x2 = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x1) #feature map 2 output (26x26 for 416 input) y2 = compose( Concatenate(), #DarknetConv2D_BN_Leaky(f2_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f2_channel_num, kernel_size=(3, 3), block_id_str='18'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, f2]) return Model(inputs, [y1, y2])
def tiny_yolo3lite_xception_body(inputs, num_anchors, num_classes): '''Create Tiny YOLO_v3 Lite Xception model CNN body in keras.''' xception = Xception(input_tensor=inputs, weights='imagenet', include_top=False) # input: 416 x 416 x 3 # block14_sepconv2_act: 13 x 13 x 2048 # block13_sepconv2_bn(middle in block13): 26 x 26 x 1024 # add_46(end of block12): 26 x 26 x 728 # block4_sepconv2_bn(middle in block4) : 52 x 52 x 728 # add_37(end of block3) : 52 x 52 x 256 # f1 :13 x 13 x 2048 f1 = xception.get_layer('block14_sepconv2_act').output # f2 :26 x 26 x 1024 f2 = xception.get_layer('block13_sepconv2_bn').output f1_channel_num = 2048 f2_channel_num = 1024 #f1_channel_num = 1024 #f2_channel_num = 512 #feature map 1 transform x1 = DarknetConv2D_BN_Leaky(f1_channel_num // 2, (1, 1))(f1) #feature map 1 output (13x13 for 416 input) y1 = compose( #DarknetConv2D_BN_Leaky(f1_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f1_channel_num, kernel_size=(3, 3), block_id_str='14'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1) #upsample fpn merge for feature map 1 & 2 x2 = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x1) #feature map 2 output (26x26 for 416 input) y2 = compose( Concatenate(), #DarknetConv2D_BN_Leaky(f2_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f2_channel_num, kernel_size=(3, 3), block_id_str='15'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, f2]) return Model(inputs, [y1, y2])
def custom_tiny_yolo3_body(inputs, num_anchors, num_classes, weights_path): '''Create a custom Tiny YOLO_v3 model, use pre-trained weights from darknet and fit for our target classes.''' #TODO: get darknet class number from class file num_classes_coco = 80 base_model = tiny_yolo3_body(inputs, num_anchors, num_classes_coco) base_model.load_weights(weights_path, by_name=True) print('Load weights {}.'.format(weights_path)) #get conv output in original network y1 = base_model.get_layer('leaky_re_lu_8').output y2 = base_model.get_layer('leaky_re_lu_10').output y1 = DarknetConv2D(num_anchors*(num_classes+5), (1,1), name='prediction_13')(y1) y2 = DarknetConv2D(num_anchors*(num_classes+5), (1,1), name='prediction_26')(y2) return Model(inputs, [y1,y2])
def tiny_yolo3lite_shufflenetv2_body(inputs, num_anchors, num_classes): '''Create Tiny YOLO_v3 Lite ShuffleNetV2 model CNN body in keras.''' shufflenetv2 = ShuffleNetV2(input_tensor=inputs, weights=None, include_top=False) # input: 416 x 416 x 3 # 1x1conv5_out: 13 x 13 x 1024 # stage4/block1/relu_1x1conv_1: 26 x 26 x 464 # stage3/block1/relu_1x1conv_1: 52 x 52 x 232 # f1: 13 x 13 x 1024 f1 = shufflenetv2.get_layer('1x1conv5_out').output # f2: 26 x 26 x 464 f2 = shufflenetv2.get_layer('stage4/block1/relu_1x1conv_1').output f1_channel_num = 1024 f2_channel_num = 464 #f1_channel_num = 1024 #f2_channel_num = 512 #feature map 1 transform x1 = DarknetConv2D_BN_Leaky(f1_channel_num // 2, (1, 1))(f1) #feature map 1 head & output (13x13 for 416 input) y1 = compose( #DarknetConv2D_BN_Leaky(f1_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f1_channel_num, kernel_size=(3, 3), block_id_str='17'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1) #upsample fpn merge for feature map 1 & 2 x2 = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x1) #feature map 2 head & output (26x26 for 416 input) y2 = compose( Concatenate(), #DarknetConv2D_BN_Leaky(f2_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f2_channel_num, kernel_size=(3, 3), block_id_str='18'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, f2]) return Model(inputs, [y1, y2])
def tiny_yolo3lite_efficientnet_body(inputs, num_anchors, num_classes, level=0): ''' Create Tiny YOLO_v3 Lite EfficientNet model CNN body in keras. # Arguments level: EfficientNet level number. by default we use basic EfficientNetB0 as backbone ''' efficientnet, feature_map_info = get_efficientnet_backbone_info( inputs, level=level) f1 = efficientnet.get_layer('top_activation').output f2 = efficientnet.get_layer('block6a_expand_activation').output f1_channel_num = feature_map_info['f1_channel_num'] f2_channel_num = feature_map_info['f2_channel_num'] #feature map 1 transform x1 = DarknetConv2D_BN_Leaky(f1_channel_num // 2, (1, 1))(f1) #feature map 1 output (13x13 for 416 input) y1 = compose( #DarknetConv2D_BN_Leaky(f1_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f1_channel_num, kernel_size=(3, 3), block_id_str='8'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1) #upsample fpn merge for feature map 1 & 2 x2 = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x1) #feature map 2 output (26x26 for 416 input) y2 = compose( Concatenate(), #DarknetConv2D_BN_Leaky(f2_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f2_channel_num, kernel_size=(3, 3), block_id_str='9'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, f2]) return Model(inputs, [y1, y2])
def custom_yolo3_spp_body(inputs, num_anchors, num_classes): """Create a custom YOLO_v3 SPP model, use pre-trained weights from darknet and fit for our target classes.""" # TODO: get darknet class number from class file num_classes_coco = 80 base_model = yolo3_spp_body(inputs, num_anchors, num_classes_coco) # get conv output in original network y1 = base_model.get_layer('leaky_re_lu_58').output y2 = base_model.get_layer('leaky_re_lu_65').output y3 = base_model.get_layer('leaky_re_lu_72').output y1 = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1), name='prediction_13')(y1) y2 = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1), name='prediction_26')(y2) y3 = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1), name='prediction_52')(y3) return Model(inputs, [y1, y2, y3])
def tiny_yolo3_mobilenetv3small_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create Tiny YOLO_v3 MobileNetV3Small model CNN body in keras.''' mobilenetv3small = MobileNetV3Small(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # activation_31(layer 165, final feature map): 13 x 13 x (576*alpha) # expanded_conv_10/Add(layer 162, end of block10): 13 x 13 x (96*alpha) # activation_22(layer 117, middle in block8) : 26 x 26 x (288*alpha) # expanded_conv_7/Add(layer 114, end of block7) : 26 x 26 x (48*alpha) # activation_7(layer 38, middle in block3) : 52 x 52 x (96*alpha) # expanded_conv_2/Add(layer 35, end of block2): 52 x 52 x (24*alpha) # NOTE: activation layer name may different for TF1.x/2.x, so we # use index to fetch layer x1 = mobilenetv3small.layers[117].output x2 = mobilenetv3small.layers[165].output x2 = DarknetConv2D_BN_Leaky(int(288 * alpha), (1, 1))(x2) y1 = compose( DarknetConv2D_BN_Leaky(int(576 * alpha), (3, 3)), #Depthwise_Separable_Conv2D_BN_Leaky(filters=int(576*alpha), kernel_size=(3, 3), block_id_str='15'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2) x2 = compose(DarknetConv2D_BN_Leaky(int(144 * alpha), (1, 1)), UpSampling2D(2))(x2) y2 = compose( Concatenate(), DarknetConv2D_BN_Leaky(int(288 * alpha), (3, 3)), #Depthwise_Separable_Conv2D_BN_Leaky(filters=int(288*alpha), kernel_size=(3, 3), block_id_str='16'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, x1]) return Model(inputs, [y1, y2])
def tiny_yolo3lite_body(inputs, num_anchors, num_classes): '''Create Tiny YOLO_v3 Lite model CNN body in keras.''' #feature map 2 (26x26x256 for 416 input) f2 = compose( Depthwise_Separable_Conv2D_BN_Leaky(16, (3,3)), MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), Depthwise_Separable_Conv2D_BN_Leaky(32, (3,3)), MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), Depthwise_Separable_Conv2D_BN_Leaky(64, (3,3)), MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), Depthwise_Separable_Conv2D_BN_Leaky(128, (3,3)), MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), Depthwise_Separable_Conv2D_BN_Leaky(256, (3,3)))(inputs) #feature map 1 (13x13x1024 for 416 input) f1 = compose( MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), Depthwise_Separable_Conv2D_BN_Leaky(512, (3,3)), MaxPooling2D(pool_size=(2,2), strides=(1,1), padding='same'), Depthwise_Separable_Conv2D_BN_Leaky(1024, (3,3)))(f2) #feature map 1 transform x1 = DarknetConv2D_BN_Leaky(256, (1,1))(f1) #feature map 1 output (13x13 for 416 input) y1 = compose( Depthwise_Separable_Conv2D_BN_Leaky(512, (3,3)), DarknetConv2D(num_anchors*(num_classes+5), (1,1)))(x2) #upsample fpn merge for feature map 1 & 2 x2 = compose( DarknetConv2D_BN_Leaky(128, (1,1)), UpSampling2D(2))(x1) #feature map 2 output (26x26 for 416 input) y2 = compose( Concatenate(), Depthwise_Separable_Conv2D_BN_Leaky(256, (3,3)), DarknetConv2D(num_anchors*(num_classes+5), (1,1)))([x2, f2]) return Model(inputs, [y1,y2])
def tiny_yolo3_vgg16_body(inputs, num_anchors, num_classes): '''Create Tiny YOLO_v3 VGG16 model CNN body in keras.''' vgg16 = VGG16(input_tensor=inputs, weights='imagenet', include_top=False) x = vgg16.get_layer('block5_pool').output x = YoloConv2D(512, (3, 3), activation='relu', padding='same', name='block6_conv1')(x) x = YoloConv2D(512, (3, 3), activation='relu', padding='same', name='block6_conv2')(x) x = YoloConv2D(512, (3, 3), activation='relu', padding='same', name='block6_conv3')(x) #x = YoloConv2D(512, (3, 3), activation='relu', padding='same', name='block6_conv4')(x) # input: 416 x 416 x 3 # block6_conv3 :13 x 13 x 512 # block5_conv3 :26 x 26 x 512 # block4_conv3 : 52 x 52 x 512 x1 = vgg16.get_layer('block5_conv3').output x2 = x x2 = DarknetConv2D_BN_Leaky(512, (1, 1))(x2) y1 = compose( DarknetConv2D_BN_Leaky(1024, (3, 3)), #Depthwise_Separable_Conv2D_BN_Leaky(filters=1024, kernel_size=(3, 3), block_id_str='14'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2) x2 = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(x2) y2 = compose( Concatenate(), DarknetConv2D_BN_Leaky(512, (3, 3)), #Depthwise_Separable_Conv2D_BN_Leaky(filters=512, kernel_size=(3, 3), block_id_str='15'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, x1]) return Model(inputs, [y1, y2])
def custom_yolo3_spp_body(inputs, num_anchors, num_classes, weights_path): '''Create a custom YOLO_v3 SPP model, use pre-trained weights from darknet and fit for our target classes.''' #TODO: get darknet class number from class file num_classes_coco = 80 base_model = yolo3_spp_body(inputs, num_anchors, num_classes_coco) base_model.load_weights(weights_path, by_name=False) print('Load weights {}.'.format(weights_path)) # reform the predict conv layer for custom dataset classes #y1 = base_model.get_layer('leaky_re_lu_58').output #y2 = base_model.get_layer('leaky_re_lu_65').output #y3 = base_model.get_layer('leaky_re_lu_72').output y1 = base_model.layers[-6].output y2 = base_model.layers[-5].output y3 = base_model.layers[-4].output y1 = DarknetConv2D(num_anchors*(num_classes+5), (1,1), name='prediction_13')(y1) y2 = DarknetConv2D(num_anchors*(num_classes+5), (1,1), name='prediction_26')(y2) y3 = DarknetConv2D(num_anchors*(num_classes+5), (1,1), name='prediction_52')(y3) return Model(inputs, [y1,y2,y3])
def tiny_yolo3_mobilenetv3large_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create Tiny YOLO_v3 MobileNetV3Large model CNN body in keras.''' mobilenetv3large = MobileNetV3Large(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # activation_38(layer 194, final feature map): 13 x 13 x (960*alpha) # expanded_conv_14/Add(layer 191, end of block14): 13 x 13 x (160*alpha) # activation_29(layer 146, middle in block12) : 26 x 26 x (672*alpha) # expanded_conv_11/Add(layer 143, end of block11) : 26 x 26 x (112*alpha) # activation_15(layer 79, middle in block6) : 52 x 52 x (240*alpha) # expanded_conv_5/Add(layer 76, end of block5): 52 x 52 x (40*alpha) # NOTE: activation layer name may different for TF1.x/2.x, so we # use index to fetch layer x1 = mobilenetv3large.layers[146].output x2 = mobilenetv3large.layers[194].output x2 = DarknetConv2D_BN_Leaky(int(672*alpha), (1,1))(x2) y1 = compose( DarknetConv2D_BN_Leaky(int(960*alpha), (3,3)), #Depthwise_Separable_Conv2D_BN_Leaky(filters=int(960*alpha), kernel_size=(3, 3), block_id_str='15'), DarknetConv2D(num_anchors*(num_classes+5), (1,1)))(x2) x2 = compose( DarknetConv2D_BN_Leaky(int(336*alpha), (1,1)), UpSampling2D(2))(x2) y2 = compose( Concatenate(), DarknetConv2D_BN_Leaky(int(672*alpha), (3,3)), #Depthwise_Separable_Conv2D_BN_Leaky(filters=int(672*alpha), kernel_size=(3, 3), block_id_str='16'), DarknetConv2D(num_anchors*(num_classes+5), (1,1)))([x2,x1]) return Model(inputs, [y1,y2])
def NanoConv2D_BN_Relu6(*args, **kwargs): """Darknet Convolution2D followed by BatchNormalization and ReLU6.""" nano_name = kwargs.get('name') if nano_name: name_kwargs = {'name': nano_name + '_conv2d'} name_kwargs.update(kwargs) bn_name = nano_name + '_BN' relu_name = nano_name + '_relu' else: name_kwargs = {} name_kwargs.update(kwargs) bn_name = None relu_name = None no_bias_kwargs = {'use_bias': False} no_bias_kwargs.update(name_kwargs) return compose(DarknetConv2D(*args, **no_bias_kwargs), BatchNormalization(name=bn_name), ReLU(6., name=relu_name))
def yolo_nano_body(inputs, num_anchors, num_classes, weights_path=None): """ Create YOLO_V3 Nano model CNN body in Keras. Reference Paper: "YOLO Nano: a Highly Compact You Only Look Once Convolutional Neural Network for Object Detection" https://arxiv.org/abs/1910.01271 """ nano_net = Model(inputs, nano_net_body(inputs)) if weights_path is not None: nano_net.load_weights(weights_path, by_name=True) print('Load weights {}.'.format(weights_path)) # input: 416 x 416 x 3 # Conv_pw_3_relu: 13 x 13 x 189 # pep_block_15_add: 26 x 26 x 325 # pep_block_7_add: 52 x 52 x 150 f1 = nano_net.get_layer('Conv_pw_3').output # f1 :13 x 13 x 189 y1 = _ep_block(f1, filters=462, stride=1, expansion=2, block_id=6) y1 = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(y1) x = compose(NanoConv2D_BN_Relu6(105, (1, 1)), UpSampling2D(2))(f1) f2 = nano_net.get_layer('pep_block_15_add').output # f2: 26 x 26 x 325 x = Concatenate()([x, f2]) x = _pep_block(x, proj_filters=113, filters=325, stride=1, expansion=2, block_id=18) x = _pep_block(x, proj_filters=99, filters=207, stride=1, expansion=2, block_id=19) x = DarknetConv2D(98, (1, 1))(x) y2 = _ep_block(x, filters=183, stride=1, expansion=2, block_id=7) y2 = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(y2) x = compose(NanoConv2D_BN_Relu6(47, (1, 1)), UpSampling2D(2))(x) f3 = nano_net.get_layer('pep_block_7_add').output # f3 : 52 x 52 x 150 x = Concatenate()([x, f3]) x = _pep_block(x, proj_filters=58, filters=122, stride=1, expansion=2, block_id=20) x = _pep_block(x, proj_filters=52, filters=87, stride=1, expansion=2, block_id=21) x = _pep_block(x, proj_filters=47, filters=93, stride=1, expansion=2, block_id=22) y3 = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(x) return Model(inputs=inputs, outputs=[y1, y2, y3])
def NanoNet(input_shape=None, input_tensor=None, include_top=True, weights='imagenet', pooling=None, classes=1000, **kwargs): """Generate nano net model for Imagenet classification.""" if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=28, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: img_input = input_tensor x = nano_net_body(img_input) if include_top: model_name = 'nano_net' x = DarknetConv2D(classes, (1, 1))(x) x = GlobalAveragePooling2D(name='avg_pool')(x) x = Softmax()(x) else: model_name = 'nano_net_headless' if pooling == 'avg': x = GlobalAveragePooling2D(name='avg_pool')(x) elif pooling == 'max': x = GlobalMaxPooling2D(name='max_pool')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name=model_name) # Load weights. if weights == 'imagenet': if include_top: file_name = 'nanonet_weights_tf_dim_ordering_tf_kernels_224.h5' weight_path = BASE_WEIGHT_PATH + file_name else: file_name = 'nanonet_weights_tf_dim_ordering_tf_kernels_224_no_top.h5' weight_path = BASE_WEIGHT_PATH + file_name weights_path = get_file(file_name, weight_path, cache_subdir='models') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model