def import_feature_extractor(backend, input_size): if backend == 'Inception3': feature_extractor = Inception3Feature(input_size) elif backend == 'SqueezeNet': feature_extractor = SqueezeNetFeature(input_size) elif backend == 'MobileNet': feature_extractor = MobileNetFeature(input_size) elif backend == 'Full Yolo': feature_extractor = FullYoloFeature(input_size) elif backend == 'Tiny Yolo': feature_extractor = TinyYoloFeature(input_size) elif backend == 'VGG16': feature_extractor = VGG16Feature(input_size) elif backend == 'ResNet50': feature_extractor = ResNet50Feature(input_size) elif os.path.dirname(backend) != "": basePath = os.path.dirname(backend) sys.path.append(basePath) custom_backend_name = os.path.basename(backend) custom_backend = import_dynamically(custom_backend_name) feature_extractor = custom_backend(input_size) if not issubclass(custom_backend,BaseFeatureExtractor): raise RuntimeError('You are trying to import a custom backend, your backend must' ' be in inherited from "backend.BaseFeatureExtractor".') print('Using a custom backend called {}.'.format(custom_backend_name)) else: raise RuntimeError('Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet,' 'SqueezeNet, VGG16, ResNet50, or Inception3 at the moment!') return feature_extractor
def __init__(self, architecture, input_size, labels, max_box_per_image, anchors): self.architecture = architecture self.input_size = input_size self.labels = list(labels) self.nb_class = len(self.labels) self.nb_box = 5 self.class_wt = np.ones(self.nb_class, dtype='float32') self.anchors = anchors self.max_box_per_image = max_box_per_image # make the feature extractor layers input_image = Input(shape=(self.input_size, self.input_size, 3)) self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image , 4)) if architecture == 'Inception3': self.feature_extractor = Inception3Feature(self.input_size) elif architecture == 'SqueezeNet': self.feature_extractor = SqueezeNetFeature(self.input_size) elif architecture == 'MobileNet': self.feature_extractor = MobileNetFeature(self.input_size) elif architecture == 'Full Yolo': self.feature_extractor = FullYoloFeature(self.input_size) elif architecture == 'Tiny Yolo': self.feature_extractor = TinyYoloFeature(self.input_size) elif architecture == 'VGG16': self.feature_extractor = VGG16Feature(self.input_size) elif architecture == 'ResNet50': self.feature_extractor = ResNet50Feature(self.input_size) else: raise Exception('Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet, SqueezeNet, VGG16, ResNet50, and Inception3 at the moment!') print (self.feature_extractor.get_output_shape()) self.grid_h, self.grid_w = self.feature_extractor.get_output_shape() features = self.feature_extractor.extract(input_image) # make the object detection layer output = Conv2D(self.nb_box * (4 + 1 + self.nb_class), (1,1), strides=(1,1), padding='same', name='conv_23', kernel_initializer='lecun_normal')(features) output = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output) output = Lambda(lambda args: args[0])([output, self.true_boxes]) self.model = Model([input_image, self.true_boxes], output) # initialize the weights of the detection layer layer = self.model.layers[-4] weights = layer.get_weights() new_kernel = np.random.normal(size=weights[0].shape)/(self.grid_h*self.grid_w) new_bias = np.random.normal(size=weights[1].shape)/(self.grid_h*self.grid_w) layer.set_weights([new_kernel, new_bias]) # print a summary of the whole model self.model.summary()
def __init__(self, backend, input_size, labels, max_box_per_image=50, anchors=[ 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 ]): self.input_size = input_size self.labels = list(labels) self.nb_class = len(self.labels) self.nb_box = len(anchors) // 2 self.class_wt = np.ones(self.nb_class, dtype='float32') self.anchors = anchors self.max_box_per_image = max_box_per_image ########################## # Make the model ########################## # make the feature extractor layers input_image = Input(shape=(self.input_size, self.input_size, 3)) self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4)) if backend == 'SqueezeNet': self.feature_extractor = SqueezeNetFeature(self.input_size) elif backend == 'MobileNet': self.feature_extractor = MobileNetFeature(self.input_size) elif backend == 'Tiny Yolo': self.feature_extractor = TinyYoloFeature(self.input_size) else: raise Exception( 'Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet, SqueezeNet, VGG16, ResNet50, and Inception3 at the moment!' ) print(self.feature_extractor.get_output_shape()) self.grid_h, self.grid_w = self.feature_extractor.get_output_shape() features = self.feature_extractor.extract(input_image) # make the object detection layer output = Conv2D(self.nb_box * (4 + 1 + self.nb_class), (1, 1), strides=(1, 1), padding='same', name='DetectionLayer', kernel_initializer='lecun_normal')(features) output = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output) output = Lambda(lambda args: args[0])([output, self.true_boxes]) self.model = Model([input_image, self.true_boxes], output) # initialize the weights of the detection layer layer = self.model.layers[-4] weights = layer.get_weights() new_kernel = np.random.normal(size=weights[0].shape) / (self.grid_h * self.grid_w) new_bias = np.random.normal(size=weights[1].shape) / (self.grid_h * self.grid_w) layer.set_weights([new_kernel, new_bias]) # print a summary of the whole model self.model.summary()
def __init__(self, backend, input_size, labels, max_box_per_image, anchors, load_from_json=None, trained_weights=None): self.input_size = input_size self.labels = list(labels) self.nb_class = len(self.labels) self.nb_box = len(anchors) // 2 self.class_wt = np.ones(self.nb_class, dtype='float32') self.anchors = anchors self.max_box_per_image = max_box_per_image if load_from_json == None: ########################## # Make the model ########################## # make the feature extractor layers input_image = Input(shape=(self.input_size, self.input_size, 3)) self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4)) if backend == 'Inception3': self.feature_extractor = Inception3Feature(self.input_size) elif backend == 'SqueezeNet': self.feature_extractor = SqueezeNetFeature(self.input_size) elif backend == 'MobileNet': self.feature_extractor = MobileNetFeature(self.input_size) elif backend == 'Full Yolo': self.feature_extractor = FullYoloFeature(self.input_size) elif backend == 'Tiny Yolo': self.feature_extractor = TinyYoloFeature(self.input_size) elif backend == 'VGG16': self.feature_extractor = VGG16Feature(self.input_size) elif backend == 'ResNet50': self.feature_extractor = ResNet50Feature(self.input_size) elif backend == 'Tiniest': self.feature_extractor = TiniestYoloFeature(self.input_size) else: raise Exception( 'Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet, SqueezeNet, VGG16, ResNet50, and Inception3 at the moment!' ) print(self.feature_extractor.get_output_shape()) self.grid_h, self.grid_w = self.feature_extractor.get_output_shape( ) features = self.feature_extractor.extract(input_image) # make the object detection layer output = Conv2D(self.nb_box * (4 + 1 + self.nb_class), (1, 1), strides=(1, 1), padding='same', name='DetectionLayer', kernel_initializer='lecun_normal')(features) output = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output) output = Lambda(lambda args: args[0])([output, self.true_boxes]) self.model = Model([input_image, self.true_boxes], output) # initialize the weights of the detection layer layer = self.model.layers[-4] weights = layer.get_weights() new_kernel = np.random.normal( size=weights[0].shape) / (self.grid_h * self.grid_w) new_bias = np.random.normal(size=weights[1].shape) / (self.grid_h * self.grid_w) layer.set_weights([new_kernel, new_bias]) else: self.feature_extractor = None with open(load_from_json, 'rb') as f: cfg = pickle.load(f) self.model = model_from_json(cfg) with open(trained_weights, 'rb') as f: weights = pickle.load(f) self.model.set_weights(weights) self.grid_h, self.grid_w = self.model.get_output_shape_at(-1)[1:3] # print a summary of the whole model self.model.summary()
def __init__(self, backend, input_size, labels, max_box_per_image, anchors, verbose=1): ########################## # Save the network parameters ########################## self.input_size = input_size self.labels = list(labels) self.nb_class = len(self.labels) self.nb_box = len(anchors) // 2 self.class_wt = np.ones(self.nb_class, dtype='float32') self.anchors = anchors self.max_box_per_image = max_box_per_image ########################## # Make the model ########################## # make the feature extractor layers input_image = Input(shape=(self.input_size, self.input_size, 3)) self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4)) if backend == 'Inception3': self.feature_extractor = Inception3Feature(input_size=self.input_size) elif backend == 'Squeezenet': self.feature_extractor = SqueezeNetFeature(input_size=self.input_size) elif backend == 'MobileNet': self.feature_extractor = MobileNetFeature(input_size=self.input_size) elif backend == 'Full Yolo': self.feature_extractor = FullYoloFeature(input_size=self.input_size) elif backend == 'Tiny Yolo': self.feature_extractor = TinyYoloFeature(input_size=self.input_size) elif backend == 'VGG16': self.feature_extractor = VGG16Feature(input_size=self.input_size) elif backend == 'ResNet50': self.feature_extractor = ResNet50Feature(input_size=self.input_size) else: raise Exception('Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet, SqueezeNet, VGG16, ResNet50, and Inception3 at the moment!') print('Feature extractor shape: {}'.format(self.feature_extractor.get_output_shape())) self.grid_h, self.grid_w = self.feature_extractor.get_output_shape() features = self.feature_extractor.extract(input_image=input_image) # make the object detection layer output = Conv2D(filters=self.nb_box * (4 + 1 + self.nb_class), kernel_size=(1, 1), strides=(1, 1), padding='same', kernel_initializer='lecun_normal', name='DetectionLayer')(features) output = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output) # small hack to allow true_boxes to be registered when Keras build the model # for more information: https://github.com/fchollet/keras/issues/2790 output = Lambda(lambda args: args[0])([output, self.true_boxes]) self.model = Model([input_image, self.true_boxes], output) # ??? Why have to redefine Conv2D_DetectionLayer as below? # initialize the weights of the detection layer layer = self.model.layers[-4] weights = layer.get_weights() new_kernel = np.random.normal(size=weights[0].shape) / (self.grid_h * self.grid_w) new_bias = np.random.normal(size=weights[1].shape) / (self.grid_h * self.grid_w) layer.set_weights([new_kernel, new_bias]) # print a summary of the whole model if verbose: self.model.summary()
def __init__(self, backend, input_width, input_height, input_channel, labels, max_box_per_image, anchors, saved_config_name): self.input_width = input_width self.input_height = input_height self.input_channel = input_channel self.labels = list(labels) self.nb_class = len(self.labels) self.nb_box = len(anchors) // 2 # each anchor has 2 (w,h) number. self.class_wt = np.ones(self.nb_class, dtype='float32') self.anchors = anchors self.max_box_per_image = max_box_per_image ########################## # Make the model ########################## # models.model_1(self.input_height, self.input_width, self.input_channel, \ # self.max_box_per_image, self.nb_box, self.nb_class) # make the feature extractor layers input_image = Input(shape=(self.input_height, self.input_width, self.input_channel)) self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4)) if backend == 'Inception3': self.feature_extractor = Inception3Feature(self.input_height, self.input_width, self.input_channel) elif backend == 'SqueezeNet': self.feature_extractor = SqueezeNetFeature(self.input_height, self.input_width, self.input_channel) elif backend == 'MobileNet': self.feature_extractor = MobileNetFeature(self.input_height, self.input_width, self.input_channel) elif backend == 'Full Yolo': self.feature_extractor = FullYoloFeature(self.input_height, self.input_width, self.input_channel) elif backend == 'Tiny Yolo': self.feature_extractor = TinyYoloFeature(self.input_height, self.input_width, self.input_channel) elif backend == 'Tiny Yolo_1': self.feature_extractor = TinyYoloFeature_1(self.input_height, self.input_width, self.input_channel) elif backend == 'Tiny Yolo_2': self.feature_extractor = TinyYoloFeature_2(self.input_height, self.input_width, self.input_channel) elif backend == 'Tiny Yolo_3': self.feature_extractor = TinyYoloFeature_3(self.input_height, self.input_width, self.input_channel) elif backend == 'Tiny Yolo_4': self.feature_extractor = TinyYoloFeature_4(self.input_height, self.input_width, self.input_channel) elif backend == 'Tiny Yolo_5': self.feature_extractor = TinyYoloFeature_5(self.input_height, self.input_width, self.input_channel) elif backend == 'VGG16': self.feature_extractor = VGG16Feature(self.input_height, self.input_width) elif backend == 'ResNet50': self.feature_extractor = ResNet50Feature(self.input_height, self.input_width) elif backend == 'My Yolo': self.feature_extractor = MyYoloFeature(self.input_height, self.input_width) else: raise Exception( 'Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet, SqueezeNet, VGG16, ResNet50, and Inception3 at the moment!' ) # print(self.feature_extractor.get_output_shape()) self.grid_h, self.grid_w = self.feature_extractor.get_output_shape() #features = self.feature_extractor.extract(input_image) features = self.feature_extractor.feature_extractor.output # make the object detection layer output = Conv2D(self.nb_box * (4 + 1 + self.nb_class), (1, 1), strides=(1, 1), padding='same', name='DetectionLayer', kernel_initializer='lecun_normal')(features) output = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output) output = Lambda(lambda args: args[0])([output, self.true_boxes]) #self.model = Model([input_image, self.true_boxes], output) self.model = Model( [self.feature_extractor.feature_extractor.input, self.true_boxes], output) # initialize the weights of the detection layer layer = self.model.layers[-4] weights = layer.get_weights() new_kernel = np.random.normal(size=weights[0].shape) / (self.grid_h * self.grid_w) new_bias = np.random.normal(size=weights[1].shape) / (self.grid_h * self.grid_w) layer.set_weights([new_kernel, new_bias]) # save model config model_json = self.model.to_json() with open(str(saved_config_name), "w") as json_file: json_file.write(model_json) # print a summary of the whole model self.feature_extractor.feature_extractor.summary() self.model.summary()
def __init__(self, backend, input_size, labels, actions, ob_weights, max_box_per_image=50, anchors=[0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828]): self.input_size = input_size self.labels = list(labels) self.actions = list(actions) self.nb_moves = len(self.actions) self.nb_class = len(self.labels) self.nb_box = len(anchors)//2 self.class_wt = np.ones(self.nb_class, dtype='float32') self.anchors = anchors self.max_box_per_image = max_box_per_image self.losses = { "obj_output": self.yolo_loss, "dir_output": "categorical_crossentropy", } self.lossWeights = {"obj_output": 0.5, "dir_output": 1.0} ########################## # Make the model ########################## # make the feature extractor layers input_image = Input(shape=(self.input_size, self.input_size, 3)) self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image , 4)) if backend == 'SqueezeNet': self.feature_extractor = SqueezeNetFeature(self.input_size) elif backend == 'MobileNet': self.feature_extractor = MobileNetFeature(self.input_size) elif backend == 'Tiny Yolo': self.feature_extractor = TinyYoloFeature(self.input_size) else: raise Exception('Architecture not supported! Only support Tiny Yolo, MobileNet, SqueezeNet at the moment!') print(self.feature_extractor.get_output_shape()) self.grid_h, self.grid_w = self.feature_extractor.get_output_shape() features = self.feature_extractor.extract(input_image) print(self.feature_extractor.get_output_shape()) # make the object detection layer output = Conv2D(self.nb_box * (4 + 1 + self.nb_class), (1,1), strides=(1,1), padding='same', name='DetectionLayer', kernel_initializer='lecun_normal')(features) output = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output) output = Lambda(lambda args: args[0], name='obj_output')([output, self.true_boxes]) convdir = Conv2D(2, (3, 3), activation='relu', padding='same', use_bias=False, name='dir1')(input_image) convdir2 = Conv2D(2, (3, 3), activation='relu', padding='same', use_bias=False, name='dir2')(convdir) pooldir = MaxPooling2D(pool_size=2, name='dir3')(convdir2) convdir1 = Conv2D(4, (3, 3), activation='relu', padding='same', use_bias=False, name='dir4')(pooldir) convdir12 = Conv2D(4, (3, 3), activation='relu', padding='same', use_bias=False, name='dir5')(convdir1) pooldir1 = AveragePooling2D(pool_size=2, name='dir6')(convdir12) flat1 = Flatten()(pooldir1) flat2 = Flatten()(output) added = Concatenate()([flat1, flat2]) #fc1 = Dense(32, activation='relu', name='fchingona')(added) fc2 = Dense(6, activation='softmax', name='dir_output')(added) self.model = Model([input_image, self.true_boxes], [output, fc2]) self.model.load_weights(ob_weights, by_name=True) # print a summary of the whole model self.model.summary()
def __init__(self, backend, input_size, labels, max_box_per_image, anchors, training=True): self.input_size = input_size self.labels = list(labels) self.nb_class = len(self.labels) self.nb_box = len(anchors) // 2 self.class_wt = np.ones(self.nb_class, dtype='float32') self.anchors = anchors self.training = training self.max_box_per_image = max_box_per_image ########################## # Make the model ########################## # make the feature extractor layers input_image = Input(shape=(self.input_size, self.input_size, 3)) self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4)) if backend == 'Inception3': self.feature_extractor = Inception3Feature(self.input_size) elif backend == 'SqueezeNet': self.feature_extractor = SqueezeNetFeature(self.input_size, training=self.training) elif backend == 'MobileNet': self.feature_extractor = MobileNetFeature(self.input_size) elif backend == 'Full Yolo': self.feature_extractor = FullYoloFeature(self.input_size) elif backend == 'Tiny Yolo': self.feature_extractor = TinyYoloFeature(self.input_size) elif backend == 'VGG16': self.feature_extractor = VGG16Feature(self.input_size) elif backend == 'ResNet50': self.feature_extractor = ResNet50Feature(self.input_size) else: raise Exception( 'Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet, SqueezeNet, VGG16, ResNet50, and Inception3 at the moment!' ) print(self.feature_extractor.get_output_shape()) self.grid_h, self.grid_w = self.feature_extractor.get_output_shape() features = self.feature_extractor.extract(input_image) # make the object detection layer output_01 = Conv2D(self.nb_box * (4 + 1 + self.nb_class), (1, 1), strides=(1, 1), padding='same', name='DetectionLayer', kernel_initializer='lecun_normal')(features) output_02 = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output_01) output_03 = Lambda(lambda args: args[0])([output_02, self.true_boxes]) self.model = Model([input_image, self.true_boxes], output_03) # self.model = Model(input_image, output_01) # self.batch_size = 2 # optimizer = Adam(lr=.0005, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) # self.model.compile(loss='MSE', optimizer=optimizer) # initialize the weights of the detection layer # layer = self.model.layers[-4] # weights = layer.get_weights() # # new_kernel = np.random.normal(size=weights[0].shape)/(self.grid_h*self.grid_w) # new_bias = np.random.normal(size=weights[1].shape)/(self.grid_h*self.grid_w) # # layer.set_weights([new_kernel, new_bias]) # print a summary of the whole model self.model.summary()
def __init__(self, backend, input_size, labels, max_box_per_image, anchors): self.input_size = input_size self.labels = list(labels) self.nb_class = len(self.labels) self.nb_box = len(anchors)//2 self.class_wt = np.ones(self.nb_class, dtype='float32') self.anchors = anchors self.max_box_per_image = max_box_per_image ########################## # Make the model ########################## # make the feature extractor layers input_image = Input(shape=(self.input_size, self.input_size, 3)) self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image , 4)) if backend == 'Inception3': self.feature_extractor = Inception3Feature(self.input_size) elif backend == 'SqueezeNet': self.feature_extractor = SqueezeNetFeature(self.input_size) elif backend == 'MobileNet': self.feature_extractor = MobileNetFeature(self.input_size) elif backend == 'Full Yolo': self.feature_extractor = FullYoloFeature(self.input_size) elif backend == 'Tiny Yolo': self.feature_extractor = TinyYoloFeature(self.input_size) elif backend == 'VGG16': self.feature_extractor = VGG16Feature(self.input_size) elif backend == 'ResNet50': self.feature_extractor = ResNet50Feature(self.input_size) else: raise Exception('Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet, SqueezeNet, VGG16, ResNet50, and Inception3 at the moment!') print(self.feature_extractor.get_output_shape()) self.grid_h, self.grid_w = self.feature_extractor.get_output_shape() features = self.feature_extractor.feature_extractor.output # To join the feature extractor and the detection layer #features = self.feature_extractor.extract(input_image) #original # make the object detection layer output = Conv2D(self.nb_box * (4 + 1 + self.nb_class), (1,1), strides=(1,1), padding='same', name='DetectionLayer', kernel_initializer='lecun_normal')(features) output = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output) #the purpose of the lambda layer is when it is training, for inference you can remove it output = Lambda(lambda args: args[0])([output, self.true_boxes]) self.model = Model([self.feature_extractor.feature_extractor.input , self.true_boxes], output) # To join the feature extractor and the detection layer #self.model = Model([input_image, self.true_boxes], output) #original #for layer in self.model.layers[:-5]: # this is to freeze the layers # layer.trainable = False #################### this part uncomment only when you're training from scratch # initialize the weights of the detection layer #layer = self.model.layers[-4] #weights = layer.get_weights() #new_kernel = np.random.normal(size=weights[0].shape)/(self.grid_h*self.grid_w) #new_bias = np.random.normal(size=weights[1].shape)/(self.grid_h*self.grid_w) #layer.set_weights([new_kernel, new_bias]) #################### # print a summary of the whole model self.model.summary()
def __init__(self, backend, input_size, labels, max_box_per_image, anchors, threshold, max_sur): self.input_size = input_size self.labels = list(labels) self.nb_class = len(self.labels) self.nb_box = len(anchors)//2 #应该是期望的box数量?还没仔细看paper self.class_wt = np.ones(self.nb_class, dtype='float32') #这里没有拓展,可以各类型的修改权重 self.anchors = anchors self.threshold = threshold self.max_sur = max_sur self.max_box_per_image = max_box_per_image ########################## # Make the model ########################## # make the feature extractor layers # 构建了一个图片的input层 input_image = Input(shape=(self.input_size, self.input_size, 3)) # 构建了bounding box回归的输入层 self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image , 4)) # 从backend获取卷积部分的结构,这里返回的是bankend中定义的不同的几个类的对象,都是BaseFeatureExtractor类的子类 # backend中子类的self.feature_extractor变量就是构建的keras的model对象,在backend中只是加了个包装 # 这里的YOLO类中的变量也叫feature_extractor,但是对应的是backend中的几个子类的对象,不能弄混了 if backend == 'Inception3': self.feature_extractor = Inception3Feature(self.input_size) elif backend == 'SqueezeNet': self.feature_extractor = SqueezeNetFeature(self.input_size) elif backend == 'MobileNet': self.feature_extractor = MobileNetFeature(self.input_size) elif backend == 'Full Yolo': self.feature_extractor = FullYoloFeature(self.input_size) elif backend == 'Tiny Yolo': self.feature_extractor = TinyYoloFeature(self.input_size) elif backend == 'VGG16': self.feature_extractor = VGG16Feature(self.input_size) elif backend == 'ResNet50': self.feature_extractor = ResNet50Feature(self.input_size) else: raise Exception('Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet, SqueezeNet, VGG16, ResNet50, and Inception3 at the moment!') # 通过在父类定义的get_output_shape()获得输出的特征矩阵的大小 print(issubclass(Model,Layer)) print(self.feature_extractor.get_output_shape()) self.grid_h, self.grid_w = self.feature_extractor.get_output_shape() # 这个extract()和get_output_shape()一样也是backend中BaseFeatureExtractor类定义的父类方法 # 这里是把上面定义的图片输入层和模型的特征提取模块接在一起 # 看backen的代码其实这一步有点多余,因为backend已经有输入层了,可能应为方便?我觉得这个操作可以去掉 # 总之,这里的features就是一个构建到一半的模型(的特征提取部分),如果直接调用predict出来的是一个特征矩阵 features = self.feature_extractor.extract(input_image) # make the object detection layer # 构造模型的分类层 # 输出的shape为: # self.nb_box * (4 + 1 + self.nb_class), self.grid_h, self.grid_w) output = Conv2D(self.nb_box * (4 + 1 + self.nb_class), (1,1), strides=(1,1), padding='same', name='DetectionLayer', kernel_initializer='lecun_normal')(features) # 有13*13组对每个bounding box(max/2个)的predict output = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output) #加的这层lambda层贼奇怪,把true_boxes放进来以后又不取,相当于没放进来,不知道有啥用 #注意,这里隐形的加了一个input-layer,对于true_boxes的输入 output = Lambda(lambda args: args[0])([output, self.true_boxes]) self.model = Model([input_image, self.true_boxes], output) """ 这里要注意的是: 现在的model是一个: input->BACKEND->Covn2d->Reshape->(input)->Lambda的模型 一共是6层 虽然backend里面的构造很复杂,但是在这里被当成一个layer(因为其实MODEL对象是layer对象的子类) """ #输出的shape:(self.grid_h, self.grid_w, max_box_num, dim(也就是4 + 1 + self.nb_class)) print(self.model.layers) print(f"number of layers:{len(self.model.layers)}") print(self.model.output_shape) # initialize the weights of the detection layer layer = self.model.layers[-4] weights = layer.get_weights() print(f"weigth_2D:{weights}") # 第一个array的shape是(w,h,d(上一层传下来有多少个feature-map),number) # 第二个array的shape是(number)也就是说每个kernel一个bias print(f"weigth_2D_shape:{(weights[0].shape,weights[1].shape)}") new_kernel = np.random.normal(size=weights[0].shape)/(self.grid_h*self.grid_w) new_bias = np.random.normal(size=weights[1].shape)/(self.grid_h*self.grid_w) #分类layer是高斯随机的 layer.set_weights([new_kernel, new_bias]) # print a summary of the whole model self.model.summary()