Пример #1
0
def tiny_yolo3_mobilenetv3small_body(inputs, num_anchors, num_classes, alpha=1.0):
    '''Create Tiny YOLO_v3 MobileNetV3Small model CNN body in keras.'''
    mobilenetv3small = MobileNetV3Small(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha)
    print('backbone layers number: {}'.format(len(mobilenetv3small.layers)))

    # input: 416 x 416 x 3
    # activation_31(layer 165, final feature map): 13 x 13 x (576*alpha)
    # expanded_conv_10/Add(layer 162, end of block10): 13 x 13 x (96*alpha)

    # activation_22(layer 117, middle in block8) : 26 x 26 x (288*alpha)
    # expanded_conv_7/Add(layer 114, end of block7) : 26 x 26 x (48*alpha)

    # activation_7(layer 38, middle in block3) : 52 x 52 x (96*alpha)
    # expanded_conv_2/Add(layer 35, end of block2): 52 x 52 x (24*alpha)

    # NOTE: activation layer name may different for TF1.x/2.x, so we
    # use index to fetch layer
    # f1 :13 x 13 x (576*alpha)
    f1 = mobilenetv3small.layers[165].output
    # f2: 26 x 26 x (288*alpha)
    f2 = mobilenetv3small.layers[117].output

    f1_channel_num = int(576*alpha)
    f2_channel_num = int(288*alpha)
    #f1_channel_num = 1024
    #f2_channel_num = 512

    y1, y2 = tiny_yolo3_predictions((f1, f2), (f1_channel_num, f2_channel_num), num_anchors, num_classes)

    return Model(inputs, [y1,y2])
Пример #2
0
def yolo2_mobilenetv3small_body(inputs, num_anchors, num_classes, alpha=1.0):
    """Create YOLO_V2 MobileNetV3Small model CNN body in Keras."""

    mobilenetv3small = MobileNetV3Small(input_tensor=inputs,
                                        weights='imagenet',
                                        include_top=False,
                                        alpha=alpha)

    # input: 416 x 416 x 3
    # mobilenetv3small.output(layer 165, final feature map): 13 x 13 x (576*alpha)
    # expanded_conv_10/Add(layer 162, end of block10): 13 x 13 x (96*alpha)

    # activation_22(layer 117, middle in block8) : 26 x 26 x (288*alpha)
    # expanded_conv_7/Add(layer 114, end of block7) : 26 x 26 x (48*alpha)

    conv_head1 = compose(DarknetConv2D_BN_Leaky(int(576 * alpha), (3, 3)),
                         DarknetConv2D_BN_Leaky(int(576 * alpha), (3, 3)))(
                             mobilenetv3small.output)

    # activation_22(layer 117) output shape: 26 x 26 x (288*alpha)
    activation_22 = mobilenetv3small.layers[117].output
    conv_head2 = DarknetConv2D_BN_Leaky(int(64 * alpha), (1, 1))(activation_22)
    # TODO: Allow Keras Lambda to use func arguments for output_shape?
    conv_head2_reshaped = Lambda(space_to_depth_x2,
                                 output_shape=space_to_depth_x2_output_shape,
                                 name='space_to_depth')(conv_head2)

    x = Concatenate()([conv_head2_reshaped, conv_head1])
    x = DarknetConv2D_BN_Leaky(int(576 * alpha), (3, 3))(x)
    x = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1),
                      name='predict_conv')(x)
    return Model(inputs, x)
def yolo3_mobilenetv3small_body(inputs, num_anchors, num_classes, alpha=1.0):
    """Create YOLO_V3 MobileNetV3Small model CNN body in Keras."""
    mobilenetv3small = MobileNetV3Small(input_tensor=inputs,
                                        weights='imagenet',
                                        include_top=False,
                                        alpha=alpha)

    # input: 416 x 416 x 3
    # activation_31(layer 165, final feature map): 13 x 13 x (576*alpha)
    # expanded_conv_10/Add(layer 162, end of block10): 13 x 13 x (96*alpha)

    # activation_22(layer 117, middle in block8) : 26 x 26 x (288*alpha)
    # expanded_conv_7/Add(layer 114, end of block7) : 26 x 26 x (48*alpha)

    # activation_7(layer 38, middle in block3) : 52 x 52 x (96*alpha)
    # expanded_conv_2/Add(layer 35, end of block2): 52 x 52 x (24*alpha)

    # NOTE: activation layer name may different for TF1.x/2.x, so we
    # use index to fetch layer
    # f1 :13 x 13 x (576*alpha)
    f1 = mobilenetv3small.layers[165].output
    # f2: 26 x 26 x (288*alpha)
    f2 = mobilenetv3small.layers[117].output
    # f3 : 52 x 52 x (96*alpha)
    f3 = mobilenetv3small.layers[38].output

    f1_channel_num = int(576 * alpha)
    f2_channel_num = int(288 * alpha)
    f3_channel_num = int(96 * alpha)
    #f1_channel_num = 1024
    #f2_channel_num = 512
    #f3_channel_num = 256

    #feature map 1 head & output (13x13 for 416 input)
    x, y1 = make_last_layers(f1, f1_channel_num // 2,
                             num_anchors * (num_classes + 5))
    #x, y1 = make_last_layers(f1, f1_channel_num//2, num_anchors * (num_classes + 5), predict_filters=int(1024*alpha))

    #upsample fpn merge for feature map 1 & 2
    x = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)),
                UpSampling2D(2))(x)
    x = Concatenate()([x, f2])

    #feature map 2 head & output (26x26 for 416 input)
    x, y2 = make_last_layers(x, f2_channel_num // 2,
                             num_anchors * (num_classes + 5))
    #x, y2 = make_last_layers(x, f2_channel_num//2, num_anchors*(num_classes+5), predict_filters=int(512*alpha))

    #upsample fpn merge for feature map 2 & 3
    x = compose(DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)),
                UpSampling2D(2))(x)
    x = Concatenate()([x, f3])

    #feature map 3 head & output (52x52 for 416 input)
    x, y3 = make_last_layers(x, f3_channel_num // 2,
                             num_anchors * (num_classes + 5))
    #x, y3 = make_last_layers(x, f3_channel_num//2, num_anchors*(num_classes+5), predict_filters=int(256*alpha))

    return Model(inputs=inputs, outputs=[y1, y2, y3])
Пример #4
0
def tiny_yolo4_mobilenetv3small_body(inputs,
                                     num_anchors,
                                     num_classes,
                                     alpha=1.0,
                                     use_spp=True):
    '''Create Tiny YOLO_v4 MobileNetV3Small model CNN body in keras.'''
    mobilenetv3small = MobileNetV3Small(input_tensor=inputs,
                                        weights='imagenet',
                                        include_top=False,
                                        alpha=alpha)

    # input: 416 x 416 x 3
    # activation_31(layer 165, final feature map): 13 x 13 x (576*alpha)
    # expanded_conv_10/Add(layer 162, end of block10): 13 x 13 x (96*alpha)

    # activation_22(layer 117, middle in block8) : 26 x 26 x (288*alpha)
    # expanded_conv_7/Add(layer 114, end of block7) : 26 x 26 x (48*alpha)

    # activation_7(layer 38, middle in block3) : 52 x 52 x (96*alpha)
    # expanded_conv_2/Add(layer 35, end of block2): 52 x 52 x (24*alpha)

    # f1 :13 x 13 x (576*alpha)
    # NOTE: activation layer name may different for TF1.x/2.x, so we
    # use index to fetch layer
    f1 = mobilenetv3small.layers[165].output
    # f2: 26 x 26 x (288*alpha) for 416 input
    f2 = mobilenetv3small.layers[117].output

    #feature map 1 head (13 x 13 x (288*alpha) for 416 input)
    x1 = DarknetConv2D_BN_Leaky(int(288 * alpha), (1, 1))(f1)
    if use_spp:
        x1 = Spp_Conv2D_BN_Leaky(x1, int(288 * alpha))

    #upsample fpn merge for feature map 1 & 2
    x1_upsample = compose(DarknetConv2D_BN_Leaky(int(144 * alpha), (1, 1)),
                          UpSampling2D(2))(x1)
    x2 = compose(
        Concatenate(),
        #Depthwise_Separable_Conv2D_BN_Leaky(filters=int(288*alpha), kernel_size=(3, 3), block_id_str='11'),
        DarknetConv2D_BN_Leaky(int(288 * alpha), (3, 3)))([x1_upsample, f2])

    #feature map 2 output (26 x 26 x (288*alpha) for 416 input)
    y2 = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(x2)

    #downsample fpn merge for feature map 2 & 1
    x2_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        #Darknet_Depthwise_Separable_Conv2D_BN_Leaky(int(288*alpha), (3,3), strides=(2,2), block_id_str='12'),
        DarknetConv2D_BN_Leaky(int(288 * alpha), (3, 3), strides=(2, 2)))(x2)
    x1 = compose(
        Concatenate(),
        #Depthwise_Separable_Conv2D_BN_Leaky(filters=int(576*alpha), kernel_size=(3, 3), block_id_str='13'),
        DarknetConv2D_BN_Leaky(int(576 * alpha), (3, 3)))([x2_downsample, x1])

    #feature map 1 output (13 x 13 x (576*alpha) for 416 input)
    y1 = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(x1)

    return Model(inputs, [y1, y2])
def tiny_yolo3lite_mobilenetv3small_body(inputs,
                                         num_anchors,
                                         num_classes,
                                         alpha=1.0):
    '''Create Tiny YOLO_v3 Lite MobileNetV3Small model CNN body in keras.'''
    mobilenetv3small = MobileNetV3Small(input_tensor=inputs,
                                        weights='imagenet',
                                        include_top=False,
                                        alpha=alpha)

    # input: 416 x 416 x 3
    # activation_31(layer 165, final feature map): 13 x 13 x (576*alpha)
    # expanded_conv_10/Add(layer 162, end of block10): 13 x 13 x (96*alpha)

    # activation_22(layer 117, middle in block8) : 26 x 26 x (288*alpha)
    # expanded_conv_7/Add(layer 114, end of block7) : 26 x 26 x (48*alpha)

    # activation_7(layer 38, middle in block3) : 52 x 52 x (96*alpha)
    # expanded_conv_2/Add(layer 35, end of block2): 52 x 52 x (24*alpha)

    # NOTE: activation layer name may different for TF1.x/2.x, so we
    # use index to fetch layer
    # f1 :13 x 13 x (576*alpha)
    f1 = mobilenetv3small.layers[165].output
    # f2: 26 x 26 x (288*alpha)
    f2 = mobilenetv3small.layers[117].output

    f1_channel_num = int(576 * alpha)
    f2_channel_num = int(288 * alpha)
    #f1_channel_num = 1024
    #f2_channel_num = 512

    #feature map 1 transform
    x1 = DarknetConv2D_BN_Leaky(f1_channel_num // 2, (1, 1))(f1)

    #feature map 1 output (13x13 for 416 input)
    y1 = compose(
        #DarknetConv2D_BN_Leaky(f1_channel_num, (3,3)),
        Depthwise_Separable_Conv2D_BN_Leaky(filters=f1_channel_num,
                                            kernel_size=(3, 3),
                                            block_id_str='15'),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1)

    #upsample fpn merge for feature map 1 & 2
    x2 = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)),
                 UpSampling2D(2))(x1)

    #feature map 2 output (26x26 for 416 input)
    y2 = compose(
        Concatenate(),
        #DarknetConv2D_BN_Leaky(f2_channel_num, (3,3)),
        Depthwise_Separable_Conv2D_BN_Leaky(filters=f2_channel_num,
                                            kernel_size=(3, 3),
                                            block_id_str='16'),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, f2])

    return Model(inputs, [y1, y2])
def yolo3lite_mobilenetv3small_body(inputs,
                                    num_anchors,
                                    num_classes,
                                    alpha=1.0):
    '''Create YOLO_v3 Lite MobileNetV3Small model CNN body in keras.'''
    mobilenetv3small = MobileNetV3Small(input_tensor=inputs,
                                        weights='imagenet',
                                        include_top=False,
                                        alpha=alpha)

    # input: 416 x 416 x 3
    # activation_31(layer 165, final feature map): 13 x 13 x (576*alpha)
    # expanded_conv_10/Add(layer 162, end of block10): 13 x 13 x (96*alpha)

    # activation_22(layer 117, middle in block8) : 26 x 26 x (288*alpha)
    # expanded_conv_7/Add(layer 114, end of block7) : 26 x 26 x (48*alpha)

    # activation_7(layer 38, middle in block3) : 52 x 52 x (96*alpha)
    # expanded_conv_2/Add(layer 35, end of block2): 52 x 52 x (24*alpha)

    # NOTE: activation layer name may different for TF1.x/2.x, so we
    # use index to fetch layer
    f1 = mobilenetv3small.layers[165].output
    # f1 :13 x 13 x (576*alpha)
    x, y1 = make_depthwise_separable_last_layers(
        f1, int(288 * alpha), num_anchors * (num_classes + 5))
    #x, y1 = make_depthwise_separable_last_layers(f1, int(288*alpha), num_anchors * (num_classes + 5), predict_filters=int(1024*alpha))

    x = compose(DarknetConv2D_BN_Leaky(int(144 * alpha), (1, 1)),
                UpSampling2D(2))(x)

    f2 = mobilenetv3small.layers[117].output
    # f2: 26 x 26 x (288*alpha)
    x = Concatenate()([x, f2])

    x, y2 = make_depthwise_separable_last_layers(
        x, int(96 * alpha), num_anchors * (num_classes + 5))
    #x, y2 = make_depthwise_separable_last_layers(x, int(96*alpha), num_anchors*(num_classes+5), predict_filters=int(512*alpha))

    x = compose(DarknetConv2D_BN_Leaky(int(48 * alpha), (1, 1)),
                UpSampling2D(2))(x)

    f3 = mobilenetv3small.layers[38].output
    # f3 : 52 x 52 x (96*alpha)
    x = Concatenate()([x, f3])
    x, y3 = make_depthwise_separable_last_layers(
        x, int(48 * alpha), num_anchors * (num_classes + 5))
    #x, y3 = make_depthwise_separable_last_layers(x, int(48*alpha), num_anchors*(num_classes+5), predict_filters=int(256*alpha))

    return Model(inputs=inputs, outputs=[y1, y2, y3])
Пример #7
0
def tiny_yolo2lite_mobilenetv3small_body(inputs, num_anchors, num_classes, alpha=1.0):
    """Create Tiny YOLO_V2 Lite MobileNetV3Small model CNN body in Keras."""
    mobilenetv3small = MobileNetV3Small(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha)
    print('backbone layers number: {}'.format(len(mobilenetv3small.layers)))

    # input: 416 x 416 x 3
    # mobilenetv3small.output(layer 165, final feature map): 13 x 13 x (576*alpha)

    # f1: 13 x 13 x (576*alpha)
    f1 = mobilenetv3small.output
    f1_channel_num = int(576*alpha)

    y = compose(
            Depthwise_Separable_Conv2D_BN_Leaky(f1_channel_num, (3,3), block_id_str='pred_1'),
            DarknetConv2D(num_anchors*(num_classes+5), (1,1), name='predict_conv'))(f1)

    return Model(inputs, y)
Пример #8
0
def tiny_yolo2_mobilenetv3small_body(inputs,
                                     num_anchors,
                                     num_classes,
                                     alpha=1.0):
    """Create Tiny YOLO_V2 MobileNetV3Small model CNN body in Keras."""
    mobilenetv3small = MobileNetV3Small(input_tensor=inputs,
                                        weights='imagenet',
                                        include_top=False,
                                        alpha=alpha)

    # input: 416 x 416 x 3
    # mobilenetv3small.output(layer 165, final feature map): 13 x 13 x (576*alpha)
    y = compose(
        DarknetConv2D_BN_Leaky(int(576 * alpha), (3, 3)),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1),
                      name='predict_conv'))(mobilenetv3small.output)

    return Model(inputs, y)
def tiny_yolo3_mobilenetv3small_body(inputs,
                                     num_anchors,
                                     num_classes,
                                     alpha=1.0):
    '''Create Tiny YOLO_v3 MobileNetV3Small model CNN body in keras.'''
    mobilenetv3small = MobileNetV3Small(input_tensor=inputs,
                                        weights='imagenet',
                                        include_top=False,
                                        alpha=alpha)

    # input: 416 x 416 x 3
    # activation_31(layer 165, final feature map): 13 x 13 x (576*alpha)
    # expanded_conv_10/Add(layer 162, end of block10): 13 x 13 x (96*alpha)

    # activation_22(layer 117, middle in block8) : 26 x 26 x (288*alpha)
    # expanded_conv_7/Add(layer 114, end of block7) : 26 x 26 x (48*alpha)

    # activation_7(layer 38, middle in block3) : 52 x 52 x (96*alpha)
    # expanded_conv_2/Add(layer 35, end of block2): 52 x 52 x (24*alpha)

    # NOTE: activation layer name may different for TF1.x/2.x, so we
    # use index to fetch layer
    x1 = mobilenetv3small.layers[117].output

    x2 = mobilenetv3small.layers[165].output
    x2 = DarknetConv2D_BN_Leaky(int(288 * alpha), (1, 1))(x2)

    y1 = compose(
        DarknetConv2D_BN_Leaky(int(576 * alpha), (3, 3)),
        #Depthwise_Separable_Conv2D_BN_Leaky(filters=int(576*alpha), kernel_size=(3, 3), block_id_str='15'),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2)

    x2 = compose(DarknetConv2D_BN_Leaky(int(144 * alpha), (1, 1)),
                 UpSampling2D(2))(x2)
    y2 = compose(
        Concatenate(),
        DarknetConv2D_BN_Leaky(int(288 * alpha), (3, 3)),
        #Depthwise_Separable_Conv2D_BN_Leaky(filters=int(288*alpha), kernel_size=(3, 3), block_id_str='16'),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, x1])

    return Model(inputs, [y1, y2])
Пример #10
0
def yolo4lite_mobilenetv3small_body(inputs,
                                    num_anchors,
                                    num_classes,
                                    alpha=1.0):
    '''Create YOLO_v4 Lite MobileNetV3Small model CNN body in keras.'''
    mobilenetv3small = MobileNetV3Small(input_tensor=inputs,
                                        weights='imagenet',
                                        include_top=False,
                                        alpha=alpha)

    # input: 416 x 416 x 3
    # activation_31(layer 165, final feature map): 13 x 13 x (576*alpha)
    # expanded_conv_10/Add(layer 162, end of block10): 13 x 13 x (96*alpha)

    # activation_22(layer 117, middle in block8) : 26 x 26 x (288*alpha)
    # expanded_conv_7/Add(layer 114, end of block7) : 26 x 26 x (48*alpha)

    # activation_7(layer 38, middle in block3) : 52 x 52 x (96*alpha)
    # expanded_conv_2/Add(layer 35, end of block2): 52 x 52 x (24*alpha)

    # NOTE: activation layer name may different for TF1.x/2.x, so we
    # use index to fetch layer
    # f1: 13 x 13 x (576*alpha)
    f1 = mobilenetv3small.layers[165].output
    # f2: 26 x 26 x (288*alpha) for 416 input
    f2 = mobilenetv3small.layers[117].output
    # f3: 52 x 52 x (96*alpha)
    f3 = mobilenetv3small.layers[38].output

    f1_channel_num = int(576 * alpha)
    f2_channel_num = int(288 * alpha)
    f3_channel_num = int(96 * alpha)
    #f1_channel_num = 1024
    #f2_channel_num = 512
    #f3_channel_num = 256

    y1, y2, y3 = yolo4lite_predictions(
        (f1, f2, f3), (f1_channel_num, f2_channel_num, f3_channel_num),
        num_anchors, num_classes)

    return Model(inputs, [y1, y2, y3])
Пример #11
0
def yolo2lite_mobilenetv3small_body(inputs, num_anchors, num_classes, alpha=1.0):
    """Create YOLO_V2 Lite MobileNetV3Small model CNN body in Keras."""
    mobilenetv3small = MobileNetV3Small(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha)
    print('backbone layers number: {}'.format(len(mobilenetv3small.layers)))

    # input: 416 x 416 x 3
    # mobilenetv3small.output(layer 165, final feature map): 13 x 13 x (576*alpha)
    # expanded_conv_10/Add(layer 162, end of block10): 13 x 13 x (96*alpha)

    # activation_22(layer 117, middle in block8) : 26 x 26 x (288*alpha)
    # expanded_conv_7/Add(layer 114, end of block7) : 26 x 26 x (48*alpha)

    # NOTE: activation layer name may different for TF1.x/2.x, so we
    # use index to fetch layer
    # f1: 13 x 13 x (960*alpha)
    f1 = mobilenetv3small.output
    # f2: 26 x 26 x (672*alpha)
    f2 = mobilenetv3small.layers[117].output

    f1_channel_num = int(576*alpha)
    f2_channel_num = int(288*alpha)

    y = yolo2lite_predictions((f1, f2), (f1_channel_num, f2_channel_num), num_anchors, num_classes)
    return Model(inputs, y)
Пример #12
0
def yolo4lite_mobilenetv3small_body(inputs,
                                    num_anchors,
                                    num_classes,
                                    alpha=1.0):
    '''Create YOLO_v4 Lite MobileNetV3Small model CNN body in keras.'''
    mobilenetv3small = MobileNetV3Small(input_tensor=inputs,
                                        weights='imagenet',
                                        include_top=False,
                                        alpha=alpha)

    # input: 416 x 416 x 3
    # activation_31(layer 165, final feature map): 13 x 13 x (576*alpha)
    # expanded_conv_10/Add(layer 162, end of block10): 13 x 13 x (96*alpha)

    # activation_22(layer 117, middle in block8) : 26 x 26 x (288*alpha)
    # expanded_conv_7/Add(layer 114, end of block7) : 26 x 26 x (48*alpha)

    # activation_7(layer 38, middle in block3) : 52 x 52 x (96*alpha)
    # expanded_conv_2/Add(layer 35, end of block2): 52 x 52 x (24*alpha)

    # f1 :13 x 13 x (576*alpha)
    # NOTE: activation layer name may different for TF1.x/2.x, so we
    # use index to fetch layer
    f1 = mobilenetv3small.layers[165].output
    #feature map 1 head (13 x 13 x (288*alpha) for 416 input)
    x1 = make_yolo_spp_depthwise_separable_head(f1,
                                                int(288 * alpha),
                                                block_id_str='11')

    #upsample fpn merge for feature map 1 & 2
    x1_upsample = compose(DarknetConv2D_BN_Leaky(int(144 * alpha), (1, 1)),
                          UpSampling2D(2))(x1)

    f2 = mobilenetv3small.layers[117].output
    # f2: 26 x 26 x (288*alpha) for 416 input
    x2 = DarknetConv2D_BN_Leaky(int(144 * alpha), (1, 1))(f2)
    x2 = Concatenate()([x2, x1_upsample])

    #feature map 2 head (26 x 26 x (144*alpha) for 416 input)
    x2 = make_yolo_depthwise_separable_head(x2,
                                            int(144 * alpha),
                                            block_id_str='12')

    #upsample fpn merge for feature map 2 & 3
    x2_upsample = compose(DarknetConv2D_BN_Leaky(int(48 * alpha), (1, 1)),
                          UpSampling2D(2))(x2)

    f3 = mobilenetv3small.layers[38].output
    # f3 : 52 x 52 x (96*alpha)

    x3 = DarknetConv2D_BN_Leaky(int(48 * alpha), (1, 1))(f3)
    x3 = Concatenate()([x3, x2_upsample])

    #feature map 3 head & output (52 x 52 x (96*alpha) for 416 input)
    #x3, y3 = make_depthwise_separable_last_layers(x3, int(48*alpha), num_anchors*(num_classes+5), block_id_str='13')
    x3 = make_yolo_depthwise_separable_head(x3,
                                            int(48 * alpha),
                                            block_id_str='13')
    y3 = compose(
        Depthwise_Separable_Conv2D_BN_Leaky(int(96 * alpha), (3, 3),
                                            block_id_str='13_3'),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x3)

    #downsample fpn merge for feature map 3 & 2
    x3_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        Darknet_Depthwise_Separable_Conv2D_BN_Leaky(int(144 * alpha), (3, 3),
                                                    strides=(2, 2),
                                                    block_id_str='13_4'))(x3)

    x2 = Concatenate()([x3_downsample, x2])

    #feature map 2 output (26 x 26 x (288*alpha) for 416 input)
    #x2, y2 = make_depthwise_separable_last_layers(x2, int(144*alpha), num_anchors*(num_classes+5), block_id_str='14')
    x2 = make_yolo_depthwise_separable_head(x2,
                                            int(144 * alpha),
                                            block_id_str='14')
    y2 = compose(
        Depthwise_Separable_Conv2D_BN_Leaky(int(288 * alpha), (3, 3),
                                            block_id_str='14_3'),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2)

    #downsample fpn merge for feature map 2 & 1
    x2_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        Darknet_Depthwise_Separable_Conv2D_BN_Leaky(int(288 * alpha), (3, 3),
                                                    strides=(2, 2),
                                                    block_id_str='14_4'))(x2)

    x1 = Concatenate()([x2_downsample, x1])

    #feature map 1 output (13 x 13 x (576*alpha) for 416 input)
    #x1, y1 = make_depthwise_separable_last_layers(x1, int(288*alpha), num_anchors*(num_classes+5), block_id_str='15')
    x1 = make_yolo_depthwise_separable_head(x1,
                                            int(288 * alpha),
                                            block_id_str='15')
    y1 = compose(
        Depthwise_Separable_Conv2D_BN_Leaky(int(576 * alpha), (3, 3),
                                            block_id_str='15_3'),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1)

    return Model(inputs, [y1, y2, y3])
Пример #13
0
def get_base_model(model_type, model_input_shape, weights='imagenet'):

    input_tensor = Input(shape=model_input_shape + (3, ), name='image_input')

    if model_type == 'mobilenet':
        model = MobileNet(input_tensor=input_tensor,
                          input_shape=model_input_shape + (3, ),
                          weights=weights,
                          pooling=None,
                          include_top=False,
                          alpha=0.5)
    elif model_type == 'mobilenetv2':
        model = MobileNetV2(input_tensor=input_tensor,
                            input_shape=model_input_shape + (3, ),
                            weights=weights,
                            pooling=None,
                            include_top=False,
                            alpha=0.5)
    elif model_type == 'mobilenetv3large':
        model = MobileNetV3Large(input_tensor=input_tensor,
                                 input_shape=model_input_shape + (3, ),
                                 weights=weights,
                                 pooling=None,
                                 include_top=False,
                                 alpha=0.75)
    elif model_type == 'mobilenetv3small':
        model = MobileNetV3Small(input_tensor=input_tensor,
                                 input_shape=model_input_shape + (3, ),
                                 weights=weights,
                                 pooling=None,
                                 include_top=False,
                                 alpha=0.75)
    elif model_type == 'peleenet':
        model = PeleeNet(input_tensor=input_tensor,
                         input_shape=model_input_shape + (3, ),
                         weights=weights,
                         pooling=None,
                         include_top=False)
    elif model_type == 'ghostnet':
        model = GhostNet(input_tensor=input_tensor,
                         input_shape=model_input_shape + (3, ),
                         weights=weights,
                         pooling=None,
                         include_top=False)
    elif model_type == 'squeezenet':
        model = SqueezeNet(input_tensor=input_tensor,
                           input_shape=model_input_shape + (3, ),
                           weights=weights,
                           pooling=None,
                           include_top=False)
    elif model_type == 'mobilevit_s':
        model = MobileViT_S(input_tensor=input_tensor,
                            input_shape=model_input_shape + (3, ),
                            weights=weights,
                            pooling=None,
                            include_top=False)
    elif model_type == 'mobilevit_xs':
        model = MobileViT_XS(input_tensor=input_tensor,
                             input_shape=model_input_shape + (3, ),
                             weights=weights,
                             pooling=None,
                             include_top=False)
    elif model_type == 'mobilevit_xxs':
        model = MobileViT_XXS(input_tensor=input_tensor,
                              input_shape=model_input_shape + (3, ),
                              weights=weights,
                              pooling=None,
                              include_top=False)
    elif model_type == 'resnet50':
        model = ResNet50(input_tensor=input_tensor,
                         input_shape=model_input_shape + (3, ),
                         weights=weights,
                         pooling=None,
                         include_top=False)
    elif model_type == 'simple_cnn':
        model = SimpleCNN(input_tensor=input_tensor,
                          input_shape=model_input_shape + (3, ),
                          weights=None,
                          pooling=None,
                          include_top=False)
    elif model_type == 'simple_cnn_lite':
        model = SimpleCNNLite(input_tensor=input_tensor,
                              input_shape=model_input_shape + (3, ),
                              weights=None,
                              pooling=None,
                              include_top=False)
    else:
        raise ValueError('Unsupported model type')
    return model
Пример #14
0
def yolo4_mobilenetv3small_body(inputs, num_anchors, num_classes, alpha=1.0):
    """Create YOLO_V4 MobileNetV3Small model CNN body in Keras."""
    mobilenetv3small = MobileNetV3Small(input_tensor=inputs,
                                        weights='imagenet',
                                        include_top=False,
                                        alpha=alpha)

    # input: 416 x 416 x 3
    # activation_31(layer 165, final feature map): 13 x 13 x (576*alpha)
    # expanded_conv_10/Add(layer 162, end of block10): 13 x 13 x (96*alpha)

    # activation_22(layer 117, middle in block8) : 26 x 26 x (288*alpha)
    # expanded_conv_7/Add(layer 114, end of block7) : 26 x 26 x (48*alpha)

    # activation_7(layer 38, middle in block3) : 52 x 52 x (96*alpha)
    # expanded_conv_2/Add(layer 35, end of block2): 52 x 52 x (24*alpha)

    # NOTE: activation layer name may different for TF1.x/2.x, so we
    # use index to fetch layer
    # f1: 13 x 13 x (576*alpha)
    f1 = mobilenetv3small.layers[165].output
    # f2: 26 x 26 x (288*alpha) for 416 input
    f2 = mobilenetv3small.layers[117].output
    # f3: 52 x 52 x (96*alpha)
    f3 = mobilenetv3small.layers[38].output

    f1_channel_num = int(576 * alpha)
    f2_channel_num = int(288 * alpha)
    f3_channel_num = int(96 * alpha)
    #f1_channel_num = 1024
    #f2_channel_num = 512
    #f3_channel_num = 256

    #feature map 1 head (13 x 13 x f1_channel_num//2 for 416 input)
    x1 = make_yolo_spp_head(f1, f1_channel_num // 2)

    #upsample fpn merge for feature map 1 & 2
    x1_upsample = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)),
                          UpSampling2D(2))(x1)

    x2 = DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1))(f2)
    x2 = Concatenate()([x2, x1_upsample])

    #feature map 2 head (26 x 26 x f2_channel_num//2 for 416 input)
    x2 = make_yolo_head(x2, f2_channel_num // 2)

    #upsample fpn merge for feature map 2 & 3
    x2_upsample = compose(DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)),
                          UpSampling2D(2))(x2)

    x3 = DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1))(f3)
    x3 = Concatenate()([x3, x2_upsample])

    #feature map 3 head & output (52 x 52 x f3_channel_num for 416 input)
    #x3, y3 = make_last_layers(x3, f3_channel_num//2, num_anchors*(num_classes+5))
    x3 = make_yolo_head(x3, f3_channel_num // 2)
    y3 = compose(DarknetConv2D_BN_Leaky(f3_channel_num, (3, 3)),
                 DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x3)

    #downsample fpn merge for feature map 3 & 2
    x3_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        DarknetConv2D_BN_Leaky(f2_channel_num // 2, (3, 3),
                               strides=(2, 2)))(x3)

    x2 = Concatenate()([x3_downsample, x2])

    #feature map 2 output (26 x 26 x f2_channel_num for 416 input)
    #x2, y2 = make_last_layers(x2, f2_channel_num//2, num_anchors*(num_classes+5))
    x2 = make_yolo_head(x2, f2_channel_num // 2)
    y2 = compose(DarknetConv2D_BN_Leaky(f2_channel_num, (3, 3)),
                 DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2)

    #downsample fpn merge for feature map 2 & 1
    x2_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        DarknetConv2D_BN_Leaky(f1_channel_num // 2, (3, 3),
                               strides=(2, 2)))(x2)

    x1 = Concatenate()([x2_downsample, x1])

    #feature map 1 output (13 x 13 x f1_channel_num for 416 input)
    #x1, y1 = make_last_layers(x1, f1_channel_num//2, num_anchors*(num_classes+5))
    x1 = make_yolo_head(x1, f1_channel_num // 2)
    y1 = compose(DarknetConv2D_BN_Leaky(f1_channel_num, (3, 3)),
                 DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1)

    return Model(inputs, [y1, y2, y3])