def main(args): annotation_file = args.annotation_file log_dir = os.path.join('logs', '000') classes_path = args.classes_path class_names = get_classes(classes_path) num_classes = len(class_names) print('classes_path =', classes_path) print('class_names = ', class_names) print('num_classes = ', num_classes) anchors = get_anchors(args.anchors_path) num_anchors = len(anchors) # get freeze level according to CLI option if args.weights_path: freeze_level = 0 else: freeze_level = 1 if args.freeze_level is not None: freeze_level = args.freeze_level # callbacks for training process logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') checkpoint = ModelCheckpoint(os.path.join( log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'), monitor='val_loss', verbose=1, save_weights_only=False, save_best_only=True, period=1) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1, cooldown=0, min_lr=1e-10) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks = [ logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan ] # get train&val dataset dataset = get_dataset(annotation_file) if args.val_annotation_file: val_dataset = get_dataset(args.val_annotation_file) num_train = len(dataset) print('num_train = ', num_train) num_val = len(val_dataset) dataset.extend(val_dataset) else: val_split = args.val_split num_val = int(len(dataset) * val_split) num_train = len(dataset) - num_val # assign multiscale interval if args.multiscale: rescale_interval = args.rescale_interval else: rescale_interval = -1 #Doesn't rescale # model input shape check input_shape = args.model_image_size assert (input_shape[0] % 32 == 0 and input_shape[1] % 32 == 0), 'Multiples of 32 required' # get different model type & train&val data generator if num_anchors == 9: # YOLOv3 use 9 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval) #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes) tiny_version = False elif num_anchors == 6: # Tiny YOLOv3 use 6 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval) #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes) tiny_version = True elif num_anchors == 5: # YOLOv2 use 5 anchors get_train_model = get_yolo2_train_model data_generator = yolo2_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo2DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval) #val_data_generator = Yolo2DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes) tiny_version = False else: raise ValueError('Unsupported anchors number') # prepare online evaluation callback if args.eval_online: eval_callback = EvalCallBack( args.model_type, dataset[num_train:], anchors, class_names, args.model_image_size, args.model_pruning, log_dir, eval_epoch_interval=args.eval_epoch_interval, save_eval_checkpoint=args.save_eval_checkpoint) callbacks.append(eval_callback) # prepare train/val data shuffle callback if args.data_shuffle: shuffle_callback = DatasetShuffleCallBack(dataset) callbacks.append(shuffle_callback) # prepare model pruning config pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype( np.int32) * args.total_epoch if args.model_pruning: pruning_callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0) ] callbacks = callbacks + pruning_callbacks # prepare optimizer optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=None) # get train model model = get_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) # support multi-gpu training template_model = None if args.gpu_num >= 2: # keep the template model for saving result template_model = model model = multi_gpu_model(model, gpus=args.gpu_num) # recompile multi gpu model model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) model.summary() # Transfer training some epochs with frozen layers first if needed, to get a stable loss. initial_epoch = args.init_epoch ##################################################################################################### epochs = initial_epoch + args.transfer_epoch print("Transfer training stage") print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) #model.fit_generator(train_data_generator, model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val // args.batch_size), epochs=epochs, initial_epoch=initial_epoch, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Wait 2 seconds for next stage time.sleep(2) if args.decay_type: # rebuild optimizer to apply learning rate decay, only after # unfreeze all layers callbacks.remove(reduce_lr) steps_per_epoch = max(1, num_train // args.batch_size) decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch) optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=args.decay_type, decay_steps=decay_steps) # Unfreeze the whole network for further tuning # NOTE: more GPU memory is required after unfreezing the body print("Unfreeze and continue training, to fine-tune.") for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) #model.fit_generator(train_data_generator, model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val // args.batch_size), epochs=args.total_epoch, initial_epoch=epochs, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Finally store model if args.model_pruning: if template_model is not None: template_model = sparsity.strip_pruning(template_model) else: model = sparsity.strip_pruning(model) if template_model is not None: template_model.save(os.path.join(log_dir, 'trained_final.h5')) else: model.save(os.path.join(log_dir, 'trained_final.h5'))
def main(args): #데이터 annotation 파일 경로 annotation_file = args.annotation_file # 결과 log 및 weight가 저장될 경로 log_dir = os.path.join('logs', '000') #클래스 파일 경로 classes_path = args.classes_path class_names = get_classes(classes_path) num_classes = len(class_names) # anchors 받아오는 라인 anchors = get_anchors(args.anchors_path) num_anchors = len(anchors) # get freeze level according to CLI option if args.weights_path: freeze_level = 0 else: freeze_level = 1 if args.freeze_level is not None: freeze_level = args.freeze_level # callbacks for training process logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') checkpoint = ModelCheckpoint(os.path.join( log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'), monitor='val_loss', mode='min', verbose=1, save_weights_only=False, save_best_only=True, period=1) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, mode='min', patience=10, verbose=1, cooldown=0, min_lr=1e-10) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=1, mode='min') terminate_on_nan = TerminateOnNaN() callbacks = [ logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan ] # 데이터셋 로딩 dataset = get_dataset(annotation_file) if args.val_annotation_file: val_dataset = get_dataset(args.val_annotation_file) num_train = len(dataset) num_val = len(val_dataset) dataset.extend(val_dataset) else: val_split = args.val_split num_val = int(len(dataset) * val_split) num_train = len(dataset) - num_val # assign multiscale interval if args.multiscale: rescale_interval = args.rescale_interval else: rescale_interval = -1 #Doesn't rescale # model input shape check input_shape = args.model_image_size assert (input_shape[0] % 32 == 0 and input_shape[1] % 32 == 0), 'model_image_size should be multiples of 32' # 모델종류에 따른 data generator 및 모델 생성 if num_anchors == 9: # YOLOv3 use 9 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper tiny_version = False elif num_anchors == 6: # Tiny YOLOv3 use 6 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper tiny_version = True elif num_anchors == 5: # YOLOv2 use 5 anchors get_train_model = get_yolo2_train_model data_generator = yolo2_data_generator_wrapper tiny_version = False else: raise ValueError('Unsupported anchors number') # prepare online evaluation callback if args.eval_online: eval_callback = EvalCallBack( args.model_type, dataset[num_train:], anchors, class_names, args.model_image_size, args.model_pruning, log_dir, eval_epoch_interval=args.eval_epoch_interval, save_eval_checkpoint=args.save_eval_checkpoint, elim_grid_sense=args.elim_grid_sense) callbacks.append(eval_callback) # prepare train/val data shuffle callback if args.data_shuffle: shuffle_callback = DatasetShuffleCallBack(dataset) callbacks.append(shuffle_callback) # prepare model pruning config pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype( np.int32) * args.total_epoch if args.model_pruning: pruning_callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0) ] callbacks = callbacks + pruning_callbacks # prepare optimizer optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=None) # support multi-gpu training if args.gpu_num >= 2: # devices_list=["/gpu:0", "/gpu:1"] devices_list = ["/gpu:{}".format(n) for n in range(args.gpu_num)] strategy = tf.distribute.MirroredStrategy(devices=devices_list) print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) with strategy.scope(): # get multi-gpu train model model = get_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) else: # get normal train model model = get_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) model.summary() # Transfer training some epochs with frozen layers first if needed, to get a stable loss. initial_epoch = args.init_epoch epochs = initial_epoch + args.transfer_epoch print("Transfer training stage") print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) # 성능향상을 위해 초반 일부 epoch은 Transfer Learning 진행 (Initial Epoch ~ Transfer Epoch) model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, multi_anchor_assign=args.multi_anchor_assign), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator( dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign), validation_steps=max(1, num_val // args.batch_size), epochs=epochs, initial_epoch=initial_epoch, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Wait 2 seconds for next stage time.sleep(2) if args.decay_type: # rebuild optimizer to apply learning rate decay, only after # unfreeze all layers callbacks.remove(reduce_lr) steps_per_epoch = max(1, num_train // args.batch_size) decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch) optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=args.decay_type, decay_steps=decay_steps) # Unfreeze the whole network for further tuning # NOTE: more GPU memory is required after unfreezing the body print("Unfreeze and continue training, to fine-tune.") if args.gpu_num >= 2: with strategy.scope(): for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change else: for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) # Transfer Learning 이후 나머지 Epoch에 대하여 학습 진행 (Transfer Epoch ~ Total Epoch) # 이 부분이 필요없거나 학습 시간이 너무 오래 걸릴 경우 Total Epoch을 Transfer와 동일하게 두고, 아래 학습을 진행하지 않고 넘어갈 수 있음 # 본인 컴퓨터 사양에 맞춰서 진행 model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, multi_anchor_assign=args.multi_anchor_assign), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator( dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign), validation_steps=max(1, num_val // args.batch_size), epochs=args.total_epoch, initial_epoch=epochs, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Finally store model if args.model_pruning: model = sparsity.strip_pruning(model) model.save(os.path.join(log_dir, 'trained_final.h5'))
def main(args): annotation_file = args.annotation_file log_dir = os.path.join('logs', '000') classes_path = args.classes_path class_names = get_classes(classes_path) num_classes = len(class_names) anchors = get_anchors(args.anchors_path) num_anchors = len(anchors) # get freeze level according to CLI option if args.weights_path: freeze_level = 0 else: freeze_level = 1 if args.freeze_level is not None: freeze_level = args.freeze_level # callbacks for training process logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') checkpoint = ModelCheckpoint(os.path.join( log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'), monitor='val_loss', mode='min', verbose=1, save_weights_only=False, save_best_only=True, period=1) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, mode='min', patience=10, verbose=1, cooldown=0, min_lr=1e-10) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=1, mode='min') terminate_on_nan = TerminateOnNaN() callbacks = [ logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan ] # get train&val dataset dataset = get_dataset(annotation_file) if args.val_annotation_file: val_dataset = get_dataset(args.val_annotation_file) num_train = len(dataset) num_val = len(val_dataset) dataset.extend(val_dataset) else: val_split = args.val_split num_val = int(len(dataset) * val_split) num_train = len(dataset) - num_val # assign multiscale interval if args.multiscale: rescale_interval = args.rescale_interval else: rescale_interval = -1 #Doesn't rescale # model input shape check input_shape = args.model_image_size assert (input_shape[0] % 32 == 0 and input_shape[1] % 32 == 0), 'model_image_size should be multiples of 32' # get different model type & train&val data generator if args.model_type.startswith( 'scaled_yolo4_') or args.model_type.startswith('yolo5_'): # Scaled-YOLOv4 & YOLOv5 entrance, use yolo5 submodule but now still yolo3 data generator # TODO: create new yolo5 data generator to apply YOLOv5 anchor assignment get_train_model = get_yolo5_train_model data_generator = yolo5_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo5DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign) #val_data_generator = Yolo5DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign) tiny_version = False elif args.model_type.startswith('yolo3_') or args.model_type.startswith( 'yolo4_'): #if num_anchors == 9: # YOLOv3 & v4 entrance, use 9 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign) #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign) tiny_version = False elif args.model_type.startswith( 'tiny_yolo3_') or args.model_type.startswith('tiny_yolo4_'): #elif num_anchors == 6: # Tiny YOLOv3 & v4 entrance, use 6 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign) #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign) tiny_version = True elif args.model_type.startswith('yolo2_') or args.model_type.startswith( 'tiny_yolo2_'): #elif num_anchors == 5: # YOLOv2 & Tiny YOLOv2 use 5 anchors get_train_model = get_yolo2_train_model data_generator = yolo2_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo2DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval) #val_data_generator = Yolo2DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes) tiny_version = False else: raise ValueError('Unsupported model type') # prepare online evaluation callback if args.eval_online: eval_callback = EvalCallBack( args.model_type, dataset[num_train:], anchors, class_names, args.model_image_size, args.model_pruning, log_dir, eval_epoch_interval=args.eval_epoch_interval, save_eval_checkpoint=args.save_eval_checkpoint, elim_grid_sense=args.elim_grid_sense) callbacks.append(eval_callback) # prepare train/val data shuffle callback if args.data_shuffle: shuffle_callback = DatasetShuffleCallBack(dataset) callbacks.append(shuffle_callback) # prepare model pruning config pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype( np.int32) * args.total_epoch if args.model_pruning: pruning_callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0) ] callbacks = callbacks + pruning_callbacks # prepare optimizer optimizer = get_optimizer(args.optimizer, args.learning_rate, average_type=None, decay_type=None) # support multi-gpu training if args.gpu_num >= 2: # devices_list=["/gpu:0", "/gpu:1"] devices_list = ["/gpu:{}".format(n) for n in range(args.gpu_num)] strategy = tf.distribute.MirroredStrategy(devices=devices_list) print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) with strategy.scope(): # get multi-gpu train model model = get_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) else: # get normal train model model = get_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) model.summary() # Transfer training some epochs with frozen layers first if needed, to get a stable loss. initial_epoch = args.init_epoch epochs = initial_epoch + args.transfer_epoch print("Transfer training stage") print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) #model.fit_generator(train_data_generator, model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, multi_anchor_assign=args.multi_anchor_assign), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator( dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign), validation_steps=max(1, num_val // args.batch_size), epochs=epochs, initial_epoch=initial_epoch, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Wait 2 seconds for next stage time.sleep(2) if args.decay_type or args.average_type: # rebuild optimizer to apply learning rate decay or weights averager, # only after unfreeze all layers if args.decay_type: callbacks.remove(reduce_lr) if args.average_type == 'ema' or args.average_type == 'swa': # weights averager need tensorflow-addons, # which request TF 2.x and have version compatibility import tensorflow_addons as tfa callbacks.remove(checkpoint) avg_checkpoint = tfa.callbacks.AverageModelCheckpoint( filepath=os.path.join( log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'), update_weights=True, monitor='val_loss', mode='min', verbose=1, save_weights_only=False, save_best_only=True, period=1) callbacks.append(avg_checkpoint) steps_per_epoch = max(1, num_train // args.batch_size) decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch) optimizer = get_optimizer(args.optimizer, args.learning_rate, average_type=args.average_type, decay_type=args.decay_type, decay_steps=decay_steps) # Unfreeze the whole network for further tuning # NOTE: more GPU memory is required after unfreezing the body print("Unfreeze and continue training, to fine-tune.") if args.gpu_num >= 2: with strategy.scope(): for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change else: for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) #model.fit_generator(train_data_generator, model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, multi_anchor_assign=args.multi_anchor_assign), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator( dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign), validation_steps=max(1, num_val // args.batch_size), epochs=args.total_epoch, initial_epoch=epochs, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Finally store model if args.model_pruning: model = sparsity.strip_pruning(model) model.save(os.path.join(log_dir, 'trained_final.h5'))
def main(args): annotation_file = args.annotation_file classes_path = args.classes_path class_names = get_classes(classes_path) num_classes = len(class_names) anchors = get_anchors(args.anchors_path) num_anchors = len(anchors) log_dir_path = args.log_directory try: log_dir = os.path.join('logs', log_dir_path) except TypeError: date_now = datetime.now() log_dir_folder_name = f'{date_now.strftime("%Y_%m_%d_%H%M%S")}_{args.model_type}_TransferEp_{args.transfer_epoch}_TotalEP_{args.total_epoch}' log_dir = os.path.realpath(os.path.join( 'logs', log_dir_folder_name )) # get freeze level according to CLI option if args.weights_path: freeze_level = 0 else: freeze_level = 1 if args.freeze_level is not None: freeze_level = args.freeze_level # How many percentage of layers to unfreeze in fine tuning unfreeze_level = args.unfreeze_level # callbacks for training process logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') checkpoint = ModelCheckpoint( filepath=log_dir + os.sep + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='val_loss', mode='min', verbose=1, save_weights_only=False, save_best_only=True, period=1 ) reduce_lr = ReduceLROnPlateau( monitor='val_loss', factor=0.5, mode='min', patience=10, verbose=1, cooldown=0, min_lr=1e-10 ) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=1, mode='min') terminate_on_nan = TerminateOnNaN() callbacks = [logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan] # get train&val dataset dataset = get_dataset(annotation_file) if args.val_annotation_file: val_dataset = get_dataset(args.val_annotation_file) num_train = len(dataset) num_val = len(val_dataset) dataset.extend(val_dataset) else: val_split = args.val_split num_val = int(len(dataset) * val_split) num_train = len(dataset) - num_val # assign multiscale interval if args.multiscale: rescale_interval = args.rescale_interval else: rescale_interval = -1 # Doesn't rescale # model input shape check input_shape = args.model_image_size assert (input_shape[0] % 32 == 0 and input_shape[1] % 32 == 0), 'model_image_size should be multiples of 32' # get different model type & train&val data generator if num_anchors == 9: # YOLOv3 use 9 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator # train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign) # val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign) tiny_version = False elif num_anchors == 6: # Tiny YOLOv3 use 6 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator # train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign) # val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign) tiny_version = True elif num_anchors == 5: # YOLOv2 use 5 anchors get_train_model = get_yolo2_train_model data_generator = yolo2_data_generator_wrapper # tf.keras.Sequence style data generator # train_data_generator = Yolo2DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval) # val_data_generator = Yolo2DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes) tiny_version = False else: raise ValueError('Unsupported anchors number') # prepare online evaluation callback if args.eval_online: eval_callback = EvalCallBack( model_type=args.model_type, annotation_lines=dataset[num_train:], anchors=anchors, class_names=class_names, model_image_size=args.model_image_size, model_pruning=args.model_pruning, log_dir=log_dir, eval_epoch_interval=args.eval_epoch_interval, save_eval_checkpoint=args.save_eval_checkpoint, elim_grid_sense=args.elim_grid_sense ) callbacks.append(eval_callback) # prepare train/val data shuffle callback if args.data_shuffle: shuffle_callback = DatasetShuffleCallBack(dataset) callbacks.append(shuffle_callback) # prepare model pruning config pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype(np.int32) * args.total_epoch if args.model_pruning: pruning_callbacks = [sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0)] callbacks = callbacks + pruning_callbacks # prepare optimizer optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=None) # support multi-gpu training if args.gpu_num >= 2: # devices_list=["/gpu:0", "/gpu:1"] devices_list = ["/gpu:{}".format(n) for n in range(args.gpu_num)] strategy = tf.distribute.MirroredStrategy(devices=devices_list) print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) with strategy.scope(): # get multi-gpu train model model = get_train_model( model_type=args.model_type, anchors=anchors, num_classes=num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step ) else: # get normal train model model = get_train_model( model_type=args.model_type, anchors=anchors, num_classes=num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step ) if args.show_history: model.summary() layers_count = len(model.layers) print(f'Total layers: {layers_count}') # Transfer training some epochs with frozen layers first if needed, to get a stable loss. initial_epoch = args.init_epoch epochs = initial_epoch + args.transfer_epoch print("Transfer training stage") print('Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'.format(num_train, num_val, args.batch_size, input_shape)) # model.fit_generator(train_data_generator, """ Transfer training steps, train with freeze layers """ model.fit( data_generator( annotation_lines=dataset[:num_train], batch_size=args.batch_size, input_shape=input_shape, anchors=anchors, num_classes=num_classes, enhance_augment=args.enhance_augment, rescale_interval=rescale_interval, multi_anchor_assign=args.multi_anchor_assign ), steps_per_epoch=max(1, num_train // args.batch_size), # validation_data=val_data_generator, validation_data=data_generator( annotation_lines=dataset[num_train:], batch_size=args.batch_size, input_shape=input_shape, anchors=anchors, num_classes=num_classes, multi_anchor_assign=args.multi_anchor_assign ), validation_steps=max(1, num_val // args.batch_size), epochs=epochs, initial_epoch=initial_epoch, # verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks ) # Wait 2 seconds for next stage time.sleep(2) if args.decay_type: # rebuild optimizer to apply learning rate decay, only after # unfreeze all layers callbacks.remove(reduce_lr) steps_per_epoch = max(1, num_train // args.batch_size) decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch) optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=args.decay_type, decay_steps=decay_steps) # Unfreeze the whole network for further tuning # NOTE: more GPU memory is required after unfreezing the body fine_tune_layers = int(layers_count * unfreeze_level) print(f"Unfreeze {unfreeze_level * 100}% of layers and continue training, to fine-tune.") print(f"Unfroze {fine_tune_layers} layers of {layers_count}") if args.gpu_num >= 2: with strategy.scope(): for i in range(layers_count - fine_tune_layers, layers_count): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change else: for i in range(layers_count - fine_tune_layers, layers_count): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change print('Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'.format(num_train, num_val, args.batch_size, input_shape)) """ Fine-tuning steps, more memory will be used. LR (Learning Rate) will be decayed """ # model.fit_generator(train_data_generator, model.fit( # The YOLO data augmentation generator tool data_generator( annotation_lines=dataset[:num_train], batch_size=args.batch_size, input_shape=input_shape, anchors=anchors, num_classes=num_classes, enhance_augment=args.enhance_augment, rescale_interval=rescale_interval, multi_anchor_assign=args.multi_anchor_assign ), steps_per_epoch=max(1, num_train // args.batch_size), # validation_data=val_data_generator, # Validation generator validation_data=data_generator( annotation_lines=dataset[num_train:], batch_size=args.batch_size, input_shape=input_shape, anchors=anchors, num_classes=num_classes, multi_anchor_assign=args.multi_anchor_assign ), validation_steps=max(1, num_val // args.batch_size), epochs=args.total_epoch, initial_epoch=epochs, # verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks ) # Finally store model if args.model_pruning: model = sparsity.strip_pruning(model) model.save(os.path.join(log_dir, 'trained_final.h5'))
def main(args): log_dir = 'logs/000/' # get class info, add background class to match model & GT class_names = get_classes(args.classes_path) assert len(class_names) < 254, 'PNG image label only support less than 254 classes.' class_names = ['background'] + class_names num_classes = len(class_names) # callbacks for training process monitor = 'Jaccard' tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') checkpoint = ModelCheckpoint(os.path.join(log_dir, 'ep{epoch:03d}-loss{loss:.3f}-Jaccard{Jaccard:.3f}-val_loss{val_loss:.3f}-val_Jaccard{val_Jaccard:.3f}.h5'), monitor='val_{}'.format(monitor), mode='max', verbose=1, save_weights_only=False, save_best_only=True, period=1) reduce_lr = ReduceLROnPlateau(monitor='val_{}'.format(monitor), factor=0.5, mode='max', patience=5, verbose=1, cooldown=0, min_lr=1e-6) early_stopping = EarlyStopping(monitor='val_{}'.format(monitor), min_delta=0, patience=100, verbose=1, mode='max') terminate_on_nan = TerminateOnNaN() callbacks=[tensorboard, checkpoint, reduce_lr, early_stopping, terminate_on_nan] # get train&val dataset dataset = get_data_list(args.dataset_file) if args.val_dataset_file: val_dataset = get_data_list(args.val_dataset_file) num_train = len(dataset) num_val = len(val_dataset) dataset.extend(val_dataset) else: val_split = args.val_split num_val = int(len(dataset)*val_split) num_train = len(dataset) - num_val # prepare train&val data generator train_generator = SegmentationGenerator(args.dataset_path, dataset[:num_train], args.batch_size, num_classes, resize_shape=args.model_input_shape[::-1], crop_shape=None, weighted_type=args.weighted_type, augment=True, do_ahisteq=False) valid_generator = SegmentationGenerator(args.dataset_path, dataset[num_train:], args.batch_size, num_classes, resize_shape=args.model_input_shape[::-1], crop_shape=None, weighted_type=args.weighted_type, augment=False, do_ahisteq=False) # prepare online evaluation callback if args.eval_online: eval_callback = EvalCallBack(args.dataset_path, dataset[num_train:], class_names, args.model_input_shape, args.model_pruning, log_dir, eval_epoch_interval=args.eval_epoch_interval, save_eval_checkpoint=args.save_eval_checkpoint) callbacks.append(eval_callback) # prepare optimizer #optimizer = Adam(lr=7e-4, epsilon=1e-8, decay=1e-6) optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=None) # prepare loss according to loss type & weigted type if args.weighted_type == 'balanced': classes_weights_path = os.path.join(args.dataset_path, 'classes_weights.txt') if os.path.isfile(classes_weights_path): weights = load_class_weights(classes_weights_path) else: weights = calculate_weigths_labels(train_generator, num_classes, save_path=args.dataset_path) losses = WeightedSparseCategoricalCrossEntropy(weights) sample_weight_mode = None elif args.weighted_type == 'adaptive': losses = sparse_crossentropy sample_weight_mode = 'temporal' elif args.weighted_type == None: losses = sparse_crossentropy sample_weight_mode = None else: raise ValueError('invalid weighted_type {}'.format(args.weighted_type)) if args.loss == 'focal': warnings.warn("Focal loss doesn't support weighted class balance, will ignore related config") losses = softmax_focal_loss sample_weight_mode = None elif args.loss == 'crossentropy': # using crossentropy will keep the weigted type setting pass else: raise ValueError('invalid loss type {}'.format(args.loss)) # prepare metric #metrics = {'pred_mask' : [Jaccard, sparse_accuracy_ignoring_last_label]} metrics = {'pred_mask' : Jaccard} # support multi-gpu training if args.gpu_num >= 2: # devices_list=["/gpu:0", "/gpu:1"] devices_list=["/gpu:{}".format(n) for n in range(args.gpu_num)] strategy = tf.distribute.MirroredStrategy(devices=devices_list) print ('Number of devices: {}'.format(strategy.num_replicas_in_sync)) with strategy.scope(): # get multi-gpu train model model = get_deeplabv3p_model(args.model_type, num_classes, args.model_input_shape, args.output_stride, args.freeze_level, weights_path=args.weights_path) # compile model model.compile(optimizer=optimizer, sample_weight_mode=sample_weight_mode, loss = losses, metrics = metrics) else: # get normal train model model = get_deeplabv3p_model(args.model_type, num_classes, args.model_input_shape, args.output_stride, args.freeze_level, weights_path=args.weights_path) # compile model model.compile(optimizer=optimizer, sample_weight_mode=sample_weight_mode, loss = losses, metrics = metrics) model.summary() # Transfer training some epochs with frozen layers first if needed, to get a stable loss. initial_epoch = args.init_epoch epochs = initial_epoch + args.transfer_epoch print("Transfer training stage") print('Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'.format(num_train, num_val, args.batch_size, args.model_input_shape)) model.fit_generator(generator=train_generator, steps_per_epoch=len(train_generator), validation_data=valid_generator, validation_steps=len(valid_generator), epochs=epochs, initial_epoch=initial_epoch, verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks = callbacks) # Wait 2 seconds for next stage time.sleep(2) if args.decay_type: # rebuild optimizer to apply learning rate decay, only after # unfreeze all layers callbacks.remove(reduce_lr) steps_per_epoch = max(1, len(train_generator)) decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch) optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=args.decay_type, decay_steps=decay_steps) # Unfreeze the whole network for further tuning # NOTE: more GPU memory is required after unfreezing the body print("Unfreeze and continue training, to fine-tune.") if args.gpu_num >= 2: with strategy.scope(): for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, sample_weight_mode=sample_weight_mode, loss = losses, metrics = metrics) # recompile to apply the change else: for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, sample_weight_mode=sample_weight_mode, loss = losses, metrics = metrics) # recompile to apply the change print('Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'.format(num_train, num_val, args.batch_size, args.model_input_shape)) model.fit_generator(generator=train_generator, steps_per_epoch=len(train_generator), validation_data=valid_generator, validation_steps=len(valid_generator), epochs=args.total_epoch, initial_epoch=epochs, verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks = callbacks) # Finally store model model.save(os.path.join(log_dir, 'trained_final.h5'))