def set_callbacks(self, file_model=None, epochs=None): if epochs is None: epochs = self.epochs callbacks = [] # Create the Learning rate scheduler. total_steps = int(epochs * self.y_train.shape[0] / self.batch_size) warm_up_steps = int(self.warmup_epochs * self.y_train.shape[0] / self.batch_size) base_steps = total_steps * (not self.cosine_decay) if self.cosine_decay: schedule = WarmUpCosineDecayScheduler( learning_rate_base=self.learning_rate_base, total_steps=total_steps, warmup_learning_rate=0.0, warmup_steps=warm_up_steps, hold_base_rate_steps=base_steps) #schedule = LearningRateScheduler(lr_schedule) else: schedule = CLRScheduler(max_lr=self.learning_rate_base, min_lr=0.00002, total_steps=total_steps) callbacks.append(schedule) min_val_acc = (1. / self.num_clases) + 0.1 early_stop = EarlyStopByTimeAndAcc(limit_time=360, baseline=min_val_acc, patience=8) callbacks.append(early_stop) print("No Early stopping") # callbacks.append(EarlyStopping(monitor='val_acc', patience=epochs//5, baseline=min_val_acc)) val_acc = 'val_accuracy' if keras.__version__ == '2.3.1' else 'val_acc' if file_model is not None: # checkpoint_last = ModelCheckpoint(file_model) # checkpoint_loss = ModelCheckpoint(file_model, monitor='val_loss', save_best_only=True) checkpoint_acc = ModelCheckpoint(file_model, monitor=val_acc, save_best_only=True) callbacks.append(checkpoint_acc) if self.early_stop > 0 and keras.__version__ == '2.2.4': callbacks.append( EarlyStopping(monitor=val_acc, patience=self.early_stop, restore_best_weights=True)) elif self.early_stop > 0: callbacks.append( EarlyStopping(monitor=val_acc, patience=self.early_stop)) if self.reduce_plateu: callbacks.append( ReduceLROnPlateau(monitor=val_acc, factor=0.2, patience=5, verbose=self.verb)) return callbacks
batch_size = 2 learning_rate_base = 1e-3 if Cosine_scheduler: # 预热期 warmup_epoch = int((Freeze_epoch - Init_epoch) * 0.2) # 总共的步长 total_steps = int( (Freeze_epoch - Init_epoch) * num_train / batch_size) # 预热步长 warmup_steps = int(warmup_epoch * num_train / batch_size) # 学习率 reduce_lr = WarmUpCosineDecayScheduler( learning_rate_base=learning_rate_base, total_steps=total_steps, warmup_learning_rate=1e-4, warmup_steps=warmup_steps, hold_base_rate_steps=num_train, min_learn_rate=1e-6) model.compile(optimizer=Adam(), loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) else: reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1) model.compile(optimizer=Adam(learning_rate_base), loss={ 'yolo_loss': lambda y_true, y_pred: y_pred
def main(): # 标签的位置 annotation_path = '2020_train_all.txt' # 获取classes和anchor的位置 classes_path = 'model_data/our_classes.txt' anchors_path = 'model_data/yolo4_anchors.txt' #------------------------------------------------------# # 权值文件请看README,百度网盘下载 # 训练自己的数据集时提示维度不匹配正常 # 预测的东西都不一样了自然维度不匹配 #------------------------------------------------------# weights_path = 'model_data/last9.h5' # 获得classes和anchor class_names = get_classes(classes_path) anchors = get_anchors(anchors_path) # 一共有多少类 num_classes = len(class_names) num_anchors = len(anchors) # 训练后的模型保存的位置 log_dir = 'logs/' # 输入的shape大小 # 显存比较小可以使用416x416 # 现存比较大可以使用608x608 input_shape = (608, 608) mosaic = True Cosine_scheduler = False label_smoothing = 0 # 清除session K.clear_session() # 输入的图像为 image_input = Input(shape=(None, None, 3)) h, w = input_shape # 创建yolo模型 print('Create YOLOv4 model with {} anchors and {} classes.'.format( num_anchors, num_classes)) model_body = yolo_body(image_input, num_anchors // 3, num_classes) # 载入预训练权重 print('Load weights {}.'.format(weights_path)) model_body.load_weights(weights_path, by_name=True, skip_mismatch=True) # y_true为13,13,3,85 # 26,26,3,85 # 52,52,3,85 y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \ num_anchors//3, num_classes+5)) for l in range(3)] # 输入为*model_body.input, *y_true # 输出为model_loss loss_input = [*model_body.output, *y_true] model_loss = Lambda(yolo_loss, output_shape=(1, ), name='yolo_loss', arguments={ 'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5, 'label_smoothing': label_smoothing })(loss_input) model = Model([model_body.input, *y_true], model_loss) # 训练参数设置 logging = TensorBoard(log_dir=log_dir) checkpoint = ModelCheckpoint( log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='val_loss', save_weights_only=True, save_best_only=False, period=1) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=6, verbose=1) # 0.1用于验证,0.9用于训练 val_split = 0.1 with open(annotation_path) as f: lines = f.readlines() np.random.seed(10101) np.random.shuffle(lines) np.random.seed(None) num_val = int(len(lines) * val_split) num_train = len(lines) - num_val #------------------------------------------------------# # 主干特征提取网络特征通用,冻结训练可以加快训练速度 # 也可以在训练初期防止权值被破坏。 # Init_Epoch为起始世代 # Freeze_Epoch为冻结训练的世代 # Epoch总训练世代 # 提示OOM或者显存不足请调小Batch_size #------------------------------------------------------# freeze_layers = 249 for i in range(freeze_layers): model_body.layers[i].trainable = False print('Freeze the first {} layers of total {} layers.'.format( freeze_layers, len(model_body.layers))) # 调整非主干模型first if True: Init_epoch = 0 Freeze_epoch = 50 # batch_size大小,每次喂入多少数据 batch_size = 8 # 最大学习率 learning_rate_base = 1e-3 if Cosine_scheduler: # 预热期 warmup_epoch = int((Freeze_epoch - Init_epoch) * 0.2) # 总共的步长 total_steps = int( (Freeze_epoch - Init_epoch) * num_train / batch_size) # 预热步长 warmup_steps = int(warmup_epoch * num_train / batch_size) # 学习率 reduce_lr = WarmUpCosineDecayScheduler( learning_rate_base=learning_rate_base, total_steps=total_steps, warmup_learning_rate=1e-4, warmup_steps=warmup_steps, hold_base_rate_steps=num_train, min_learn_rate=1e-6) model.compile(optimizer=Adam(), loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) else: reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=1) model.compile(optimizer=Adam(learning_rate_base), loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) print('Train on {} samples, val on {} samples, with batch size {}.'. format(num_train, num_val, batch_size)) model.fit_generator( data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic), steps_per_epoch=max(1, num_train // batch_size), validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False), validation_steps=max(1, num_val // batch_size), epochs=Freeze_epoch, initial_epoch=Init_epoch, callbacks=[logging, checkpoint, reduce_lr, early_stopping]) model.save_weights(log_dir + 'trained_weights_stage_1.h5') for i in range(freeze_layers): model_body.layers[i].trainable = True # 解冻后训练 if True: Freeze_epoch = 50 Epoch = 100 # batch_size大小,每次喂入多少数据 batch_size = 2 # 最大学习率 learning_rate_base = 1e-4 if Cosine_scheduler: # 预热期 warmup_epoch = int((Epoch - Freeze_epoch) * 0.2) # 总共的步长 total_steps = int((Epoch - Freeze_epoch) * num_train / batch_size) # 预热步长 warmup_steps = int(warmup_epoch * num_train / batch_size) # 学习率 reduce_lr = WarmUpCosineDecayScheduler( learning_rate_base=learning_rate_base, total_steps=total_steps, warmup_learning_rate=1e-5, warmup_steps=warmup_steps, hold_base_rate_steps=num_train // 2, min_learn_rate=1e-6) model.compile(optimizer=Adam(), loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) else: reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=1) model.compile(optimizer=Adam(learning_rate_base), loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) print('Train on {} samples, val on {} samples, with batch size {}.'. format(num_train, num_val, batch_size)) model.fit_generator( data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic), steps_per_epoch=max(1, num_train // batch_size), validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False), validation_steps=max(1, num_val // batch_size), epochs=Epoch, initial_epoch=Freeze_epoch, callbacks=[logging, checkpoint, reduce_lr, early_stopping]) model.save_weights(log_dir + 'last1.h5')
def trainModel(self, mosaic=True, cosine_scheduler=True, label_smoothing=0.1): """ """ anchors = get_anchors(self.anchors_path) num_classes = len(self.classes) num_anchors = len(anchors) K.clear_session() image_input = Input(shape=(None, None, 3)) h, w = self.input_shape print('Create YOLOv4 model with {} anchors and {} classes.'.format(num_anchors, num_classes)) # model_body = Model(image_input, [P5_output, P4_output, P3_output]) model_body = yolo_body(image_input, num_anchors // 3, num_classes) print('Load weights {}.'.format(self.pretrain_model)) model_body.load_weights(self.pretrain_model, by_name=True, skip_mismatch=True) # y_true = [Input(shape=(h//32,w//32,3,cls+5), Input(shape=(h//16,w//16,3,cls+5), Input(shape=(h//8,w//8,3,cls+5)] y_true = [Input(shape=(h // {0: 32, 1: 16, 2: 8}[i], w // {0: 32, 1: 16, 2: 8}[i], num_anchors // 3, num_classes + 5)) for i in range(3)] # model_body.output = [P5_output, P4_output, P3_output] loss_input = [*model_body.output, *y_true] model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5, 'label_smoothing': label_smoothing})(loss_input) model = Model([model_body.input, *y_true], model_loss) # plot_model(model, to_file="yolov4_loss_model.png", show_shapes=True, show_layer_names=True) logging = TensorBoard(log_dir=self.log_dir) checkpoint = ModelCheckpoint(self.log_dir + 'epoch{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='val_loss', save_weights_only=True, save_best_only=False, period=1) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1) val_split = 0.1 with open(self.annotation_path) as f: lines = f.readlines() np.random.seed(10101) np.random.shuffle(lines) np.random.seed(None) num_val = int(len(lines) * val_split) num_train = len(lines) - num_val # ------------------------------------------------------# # backbone extract general feature in network # freeze some head layers can speed up training, and prevent weights from influence in early epoch # ------------------------------------------------------# freeze_layers = 249 for i in range(freeze_layers): model_body.layers[i].trainable = False print('Freeze the first {} layers of total {} layers.'.format(freeze_layers, len(model_body.layers))) init_epoch = 0 freeze_epoch = self.epochs // 2 batch_size = self.batch_size * 2 learning_rate_base = 1e-3 if cosine_scheduler: warm_up_epoch = int((freeze_epoch - init_epoch) * 0.2) total_steps = int((freeze_epoch - init_epoch) * num_train / batch_size) warm_up_steps = int(warm_up_epoch * num_train / batch_size) reduce_lr = WarmUpCosineDecayScheduler(learning_rate_base=learning_rate_base, total_steps=total_steps, warmup_learning_rate=1e-4, warmup_steps=warm_up_steps, hold_base_rate_steps=num_train, min_learn_rate=1e-6) model.compile(optimizer=Adam(), loss=dummy_loss) else: reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=1, min_lr=1e-6) model.compile(optimizer=Adam(learning_rate_base), loss=dummy_loss) print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size)) model.fit_generator( data_generator(lines[:num_train], batch_size, self.input_shape, anchors, num_classes, mosaic=mosaic), steps_per_epoch=max(1, num_train // batch_size), validation_data=data_generator(lines[num_train:], batch_size, self.input_shape, anchors, num_classes, mosaic=False), validation_steps=max(1, num_val // batch_size), epochs=freeze_epoch, initial_epoch=init_epoch, callbacks=[logging, checkpoint, reduce_lr, early_stopping]) model.save_weights(self.log_dir + 'trained_weights_stage_1.h5') for i in range(freeze_layers): model_body.layers[i].trainable = True print("\n\nStarting Training all Layers....\n\n") batch_size = self.batch_size learning_rate_base = 1e-4 if cosine_scheduler: warm_up_epoch = int((self.epochs - freeze_epoch) * 0.2) total_steps = int((self.epochs - freeze_epoch) * num_train / batch_size) warm_up_steps = int(warm_up_epoch * num_train / batch_size) reduce_lr = WarmUpCosineDecayScheduler(learning_rate_base=learning_rate_base, total_steps=total_steps, warmup_learning_rate=1e-5, warmup_steps=warm_up_steps, hold_base_rate_steps=num_train // 2, min_learn_rate=1e-6) model.compile(optimizer=Adam(), loss=dummy_loss) else: reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=1, min_lr=1e-6) model.compile(optimizer=Adam(learning_rate_base), loss=dummy_loss) print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size)) model.fit_generator( data_generator(lines[:num_train], batch_size, self.input_shape, anchors, num_classes, mosaic=mosaic), steps_per_epoch=max(1, num_train // batch_size), validation_data=data_generator(lines[num_train:], batch_size, self.input_shape, anchors, num_classes, mosaic=False), validation_steps=max(1, num_val // batch_size), epochs=self.epochs, initial_epoch=freeze_epoch, callbacks=[logging, checkpoint, reduce_lr, early_stopping]) model.save_weights(self.log_dir + 'last1.h5')