def make_five_convs(x, num_filters): x = DarknetConv2D_BN_Leaky(num_filters, (1, 1))(x) x = DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3))(x) x = DarknetConv2D_BN_Leaky(num_filters, (1, 1))(x) x = DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3))(x) x = DarknetConv2D_BN_Leaky(num_filters, (1, 1))(x) return x
def r_yolo_body(image_input_td, num_anchors, num_classes, td_len, mode): """Create YOLO_V3 model CNN body in Keras.""" # image_input_td = Input(shape=(td_len, None, None, 3)) # darknet = Model(image_input_td, r_darknet_body(inputs, image_input_td)) darknet, skip_conn = darknet_body_r(image_input_td, td_len, mode) darknet = Model(image_input_td, darknet) # print(darknet.summary()) x, y1 = make_last_layers(darknet.output, 512, num_anchors * (num_classes + 5)) print('Concatenating:', darknet.layers[skip_conn[0]], darknet.layers[skip_conn[0]].output) print('Concatenating:', darknet.layers[skip_conn[1]], darknet.layers[skip_conn[1]].output) x = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(x) print(x.shape) x = Concatenate()([x, darknet.layers[skip_conn[1]].output]) x, y2 = make_last_layers(x, 256, num_anchors * (num_classes + 5)) print('Frist layer concatenated') x = compose(DarknetConv2D_BN_Leaky(128, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, darknet.layers[skip_conn[0]].output]) x, y3 = make_last_layers(x, 128, num_anchors * (num_classes + 5)) print('Second layer concatenated') return Model(image_input_td, [y1, y2, y3])
def densenet_body(img_input, num_anchors, num_classes, SPP=False, CSP=False): if CSP == True: x = __create_CSPdense_net(img_input) densenet = Model(img_input, x) x, y1 = make_last_layers(densenet.output, 512, num_anchors * (num_classes + 5), SPP) x = compose(DarknetConv2D_BN_Leaky(512, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, densenet.layers[308].output]) x, y2 = make_last_layers(x, 512, num_anchors * (num_classes + 5)) x = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, densenet.layers[136].output]) x, y3 = make_last_layers(x, 256, num_anchors * (num_classes + 5)) if CSP == False: x = __create_dense_net(img_input) densenet = Model(img_input, x) x, y1 = make_last_layers(densenet.output, 512, num_anchors * (num_classes + 5), SPP) x = compose(DarknetConv2D_BN_Leaky(512, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, densenet.layers[308].output]) x, y2 = make_last_layers(x, 512, num_anchors * (num_classes + 5)) x = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, densenet.layers[136].output]) x, y3 = make_last_layers(x, 256, num_anchors * (num_classes + 5)) return Model(img_input, [y1, y2, y3])
def yoloV4densenet_body(img_input, num_anchors, num_classes, SPP=False, CSP=False): if CSP == True: x = __create_CSPdense_net(img_input) densenet = Model(img_input, x) P5, y1 = make_last_layers(densenet.output, 512, num_anchors * (num_classes + 5), SPP) P5_upsample = compose(DarknetConv2D_BN_Leaky(512, (1, 1)), UpSampling2D(2))(P5) P4 = DarknetConv2D_BN_Leaky(512, (1, 1))(densenet.layers[315].output) P4 = Concatenate()([P4, P5_upsample]) P4 = make_five_convs(P4, 512) P4_upsample = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(P4) P3 = DarknetConv2D_BN_Leaky(256, (1, 1))(densenet.layers[138].output) P3 = Concatenate()([P3, P4_upsample]) P3 = make_five_convs(P3, 256) P3_output = DarknetConv2D_BN_Leaky(256 * 2, (3, 3))(P3) P3_output = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(P3_output) #26,26 output P3_downsample = ZeroPadding2D(((1, 0), (1, 0)))(P3) P3_downsample = DarknetConv2D_BN_Leaky(512, (3, 3), strides=(2, 2))(P3_downsample) P4 = Concatenate()([P3_downsample, P4]) P4 = make_five_convs(P4, 512) P4_output = DarknetConv2D_BN_Leaky(512 * 2, (3, 3))(P4) P4_output = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(P4_output) #13,13 output P4_downsample = ZeroPadding2D(((1, 0), (1, 0)))(P4) P4_downsample = DarknetConv2D_BN_Leaky(512, (3, 3), strides=(2, 2))(P4_downsample) P5 = Concatenate()([P4_downsample, P5]) P5 = make_five_convs(P5, 512) P5_output = DarknetConv2D_BN_Leaky(512 * 2, (3, 3))(P5) P5_output = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(P5_output) return Model(img_input, [P5_output, P4_output, P3_output])
def yolo_body(inputs, num_anchors, num_classes, spp=False): """Create YOLO_V3 model CNN body in Keras.""" darknet = Model(inputs, darknet_body(inputs)) x, y1 = make_last_layers(darknet.output, 512, num_anchors * (num_classes + 5), spp) x = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, darknet.layers[152].output]) x, y2 = make_last_layers(x, 256, num_anchors * (num_classes + 5)) x = compose(DarknetConv2D_BN_Leaky(128, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, darknet.layers[92].output]) x, y3 = make_last_layers(x, 128, num_anchors * (num_classes + 5)) return Model(inputs, [y1, y2, y3])
def yolo_body_shufflenet(inputs, num_anchors, num_classes): """Create YOLO_V3_mobilenet model CNN body in Keras.""" mobilenet = ShuffleNet(input_tensor=inputs) # input: 416 x 416 x 3 # conv_pw_13_relu :13 x 13 x 1024 # conv_pw_11_relu :26 x 26 x 512 # conv_pw_5_relu : 52 x 52 x 256 # mobilenet.summary() f1 = mobilenet.get_layer('stage4/block4/relu_out').output # f1 :13 x 13 x 576 # spp # sp3 = MaxPooling2D(pool_size=(3,3),strides=1,padding='same')(f1) # sp5 = MaxPooling2D(pool_size=(5,5),strides=1,padding='same')(f1) # f1 = compose( # Concatenate(), # DarknetConv2D_BN_Leaky(512, (1,1)))([sp3,sp5,f1]) # end f1 = DarknetSeparableConv2D_BN_Leaky(256, (3, 3))(f1) y1 = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(f1) f1 = compose(DarknetConv2D_BN_Leaky(128, (1, 1)), UpSampling2D(2))(f1) f2 = mobilenet.get_layer('stage3/block8/relu_out').output # f2: 26 x 26 x 512 f2 = compose(Concatenate(), DarknetSeparableConv2D_BN_Leaky(256, (3, 3)))([f1, f2]) y2 = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(f2) return Model(inputs=inputs, outputs=[y1, y2])
def spp_block(x): '''Create SPP block''' # x = ZeroPadding2D(((1,0),(1,0)))(x) x = DarknetConv2D_BN_Leaky(512, (1, 1), strides=(1, 1))(x) x = DarknetConv2D_BN_Leaky(1024, (3, 3), strides=(1, 1))(x) x = DarknetConv2D_BN_Leaky(512, (1, 1), strides=(1, 1))(x) mp5 = MaxPooling2D(pool_size=(5, 5), strides=(1, 1), padding='same')(x) mp9 = MaxPooling2D(pool_size=(9, 9), strides=(1, 1), padding='same')(x) mp13 = MaxPooling2D(pool_size=(13, 13), strides=(1, 1), padding='same')(x) x = Concatenate()([x, mp13, mp9, mp5]) # x = DarknetConv2D_BN_Leaky(512, (1,1), strides=(1,1))(x) # x = DarknetConv2D_BN_Leaky(1024, (3,3), strides=(1,1))(x) # x = DarknetConv2D_BN_Leaky(512, (1,1), strides=(1,1))(x) # x = DarknetConv2D_BN_Leaky(1024, (3,3), strides=(1,1))(x) return x
def darknet_body_r(image_input_td, td_len, mode): image_input = Input(shape=(None, None, 3)) # (320, 320, 3) skip_conn = [] x = DarknetConv2D_BN_Leaky(32, (3, 3))(image_input) print(len(Model(image_input, x).layers)) x = resblock_body(x, 64, 1) print(len(Model(image_input, x).layers)) x = resblock_body(x, 128, 2) print(len(Model(image_input, x).layers)) x = resblock_body(x, 256, 8) print(len(Model(image_input, x).layers)) x = Model(image_input, x) print('-' * 20) x = TimeDistributed(x)(image_input_td) # x = TimeDistributed(ZeroPadding2D(((1,0),(1,0))))(x) if mode == 'lstm': x = ConvLSTM2D(256, kernel_size=(3, 3), padding='same', activation='relu')(x) elif mode == 'bilstm': # x = TimeDistributed(ZeroPadding2D(((1,0),(1,0))))(x) x = Bidirectional( ConvLSTM2D(256, kernel_size=(3, 3), padding='same', activation='relu'))(x) elif mode == '3d': x = Conv3D(256, kernel_size=(td_len, 3, 3), padding='valid', activation='relu')(x) x = Lambda(lambda x: x[:, 0, :, :])(x) x = ZeroPadding2D(((2, 0), (2, 0)))(x) else: raise ValueError('Recurrent mode not recognized') x = BatchNormalization()(x) x = LeakyReLU(alpha=0.1)(x) print(len(Model(image_input_td, x).layers)) skip_conn.append(len(Model(image_input_td, x).layers) - 1) x = resblock_body(x, 512, 8) print(len(Model(image_input_td, x).layers)) skip_conn.append(len(Model(image_input_td, x).layers) - 1) x = resblock_body(x, 1024, 4) print(len(Model(image_input_td, x).layers)) return x, skip_conn
def darknet_body(x): '''Darknent body having 52 Convolution2D layers''' # inpt = x x = DarknetConv2D_BN_Leaky(32, (3, 3))(x) # print(len(Model(inpt, x).layers)) x = resblock_body(x, 64, 1) # print(len(Model(inpt, x).layers)) x = resblock_body(x, 128, 2) # print(len(Model(inpt, x).layers)) x = resblock_body(x, 256, 8) # print(len(Model(inpt, x).layers)) x = resblock_body(x, 512, 8) # print(len(Model(inpt, x).layers)) x = resblock_body(x, 1024, 4) # print(len(Model(inpt, x).layers)) return x
def darknet_body(x): '''Darknent body having 52 Convolution2D layers''' inpt = x x = DarknetConv2D_BN_Leaky(32, (3, 3))(x) print(len(Model(inpt, x).layers), Model(inpt, x).output.shape, x.name, '416x32', 1) x = resblock_body(x, 64, 1) print(len(Model(inpt, x).layers), Model(inpt, x).output.shape, x.name, '208x64', 2) x = resblock_body(x, 128, 2) print(len(Model(inpt, x).layers), Model(inpt, x).output.shape, x.name, '104x128', 4) x = resblock_body(x, 256, 8) print(len(Model(inpt, x).layers), Model(inpt, x).output.shape, x.name, '52x256', 8) x = resblock_body(x, 512, 8) print(len(Model(inpt, x).layers), Model(inpt, x).output.shape, x.name, '26x512', 16) x = resblock_body(x, 1024, 4) print(len(Model(inpt, x).layers), Model(inpt, x).output.shape, x.name, '13x1024', 32) return x
def make_last_layers(x, num_filters, out_filters, spp=False): '''6 Conv2D_BN_Leaky layers followed by a Conv2D_linear layer''' if spp: # x = compose( ## DarknetConv2D_BN_Leaky(num_filters, (1,1)), ## DarknetConv2D_BN_Leaky(num_filters*2, (3,3)), # DarknetConv2D_BN_Leaky(num_filters, (1,1)), # DarknetConv2D_BN_Leaky(num_filters*2, (3,3)), # DarknetConv2D_BN_Leaky(num_filters, (1,1)))(x) x = DarknetConv2D_BN_Leaky(num_filters, (1, 1), strides=(1, 1))(x) x = DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3), strides=(1, 1))(x) x = DarknetConv2D_BN_Leaky(num_filters, (1, 1), strides=(1, 1))(x) mp5 = MaxPooling2D(pool_size=(5, 5), strides=(1, 1), padding='same')(x) mp9 = MaxPooling2D(pool_size=(9, 9), strides=(1, 1), padding='same')(x) mp13 = MaxPooling2D(pool_size=(13, 13), strides=(1, 1), padding='same')(x) x = Concatenate()([x, mp13, mp9, mp5]) x = DarknetConv2D_BN_Leaky(num_filters, (1, 1))(x) x = DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3))(x) x = DarknetConv2D_BN_Leaky(num_filters, (1, 1))(x) else: x = compose(DarknetConv2D_BN_Leaky(num_filters, (1, 1)), DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)), DarknetConv2D_BN_Leaky(num_filters, (1, 1)), DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)), DarknetConv2D_BN_Leaky(num_filters, (1, 1)))(x) y = compose(DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)), DarknetConv2D(out_filters, (1, 1)))(x) return x, y
for i, l in enumerate(orig_spp.layers): if 'conv' in l.name: print(i, l.name, '\t\t', l.strides, l.kernel_size, l.filters, '\t\t', l.input.shape.as_list()) elif 'add_' in l.name or 'concat' in l.name: print(i, l.name, '\t\t\t\t\t\t', [l.shape.as_list() for l in l.input]) else: print(i, l.name, '\t\t\t\t\t\t', l.input.shape.as_list()) # %% img_size = 19 image_input = Input(shape=(img_size, img_size, 3)) # (None, None, 3) x = ZeroPadding2D(((1, 0), (1, 0)))(image_input) x = DarknetConv2D_BN_Leaky(512, (1, 1), strides=(1, 1))(x) x = DarknetConv2D_BN_Leaky(1024, (3, 3), strides=(1, 1))(x) x = DarknetConv2D_BN_Leaky(512, (1, 1), strides=(1, 1))(x) mp5 = MaxPooling2D(pool_size=(5, 5), strides=(1, 1), padding='same')(x) mp9 = MaxPooling2D(pool_size=(9, 9), strides=(1, 1), padding='same')(x) mp13 = MaxPooling2D(pool_size=(13, 13), strides=(1, 1), padding='same')(x) x = Concatenate()([x, mp5, mp9, mp13]) x = DarknetConv2D_BN_Leaky(512, (1, 1), strides=(1, 1))(x) x = DarknetConv2D_BN_Leaky(1024, (3, 3), strides=(1, 1))(x) x = DarknetConv2D_BN_Leaky(512, (1, 1), strides=(1, 1))(x) x = DarknetConv2D_BN_Leaky(1024, (3, 3), strides=(1, 1))(x) x = Model(image_input, x) #%%