def evaluate_model(args, model, input_shape):
    # eval data generator
    eval_datagen = ImageDataGenerator(preprocessing_function=preprocess)
    eval_generator = eval_datagen.flow_from_directory(
        args.val_data_path,
        target_size=input_shape,
        batch_size=args.batch_size)

    # get optimizer
    optimizer = get_optimizer(args.optimizer,
                              args.learning_rate,
                              average_type=None,
                              decay_type=None)

    # start evaluate
    model.compile(optimizer=optimizer,
                  metrics=['accuracy', 'top_k_categorical_accuracy'],
                  loss='categorical_crossentropy')

    print('Evaluate on {} samples, with batch size {}.'.format(
        eval_generator.samples, args.batch_size))
    scores = model.evaluate_generator(eval_generator,
                                      steps=eval_generator.samples //
                                      args.batch_size,
                                      max_queue_size=10,
                                      workers=1,
                                      use_multiprocessing=False,
                                      verbose=1)

    print('Evaluate loss:', scores[0])
    print('Top-1 accuracy:', scores[1])
    print('Top-k accuracy:', scores[2])
Пример #2
0
def main(args):
    annotation_file = args.annotation_file
    log_dir = os.path.join('logs', '000')
    classes_path = args.classes_path
    class_names = get_classes(classes_path)
    num_classes = len(class_names)

    print('classes_path =', classes_path)
    print('class_names = ', class_names)
    print('num_classes = ', num_classes)

    anchors = get_anchors(args.anchors_path)
    num_anchors = len(anchors)

    # get freeze level according to CLI option
    if args.weights_path:
        freeze_level = 0
    else:
        freeze_level = 1

    if args.freeze_level is not None:
        freeze_level = args.freeze_level

    # callbacks for training process
    logging = TensorBoard(log_dir=log_dir,
                          histogram_freq=0,
                          write_graph=False,
                          write_grads=False,
                          write_images=False,
                          update_freq='batch')
    checkpoint = ModelCheckpoint(os.path.join(
        log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'),
                                 monitor='val_loss',
                                 verbose=1,
                                 save_weights_only=False,
                                 save_best_only=True,
                                 period=1)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.5,
                                  patience=10,
                                  verbose=1,
                                  cooldown=0,
                                  min_lr=1e-10)
    early_stopping = EarlyStopping(monitor='val_loss',
                                   min_delta=0,
                                   patience=50,
                                   verbose=1)
    terminate_on_nan = TerminateOnNaN()

    callbacks = [
        logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan
    ]

    # get train&val dataset
    dataset = get_dataset(annotation_file)
    if args.val_annotation_file:
        val_dataset = get_dataset(args.val_annotation_file)
        num_train = len(dataset)
        print('num_train = ', num_train)
        num_val = len(val_dataset)
        dataset.extend(val_dataset)
    else:
        val_split = args.val_split
        num_val = int(len(dataset) * val_split)
        num_train = len(dataset) - num_val

    # assign multiscale interval
    if args.multiscale:
        rescale_interval = args.rescale_interval
    else:
        rescale_interval = -1  #Doesn't rescale

    # model input shape check
    input_shape = args.model_image_size
    assert (input_shape[0] % 32 == 0
            and input_shape[1] % 32 == 0), 'Multiples of 32 required'

    # get different model type & train&val data generator
    if num_anchors == 9:
        # YOLOv3 use 9 anchors
        get_train_model = get_yolo3_train_model
        data_generator = yolo3_data_generator_wrapper

        # tf.keras.Sequence style data generator
        #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval)
        #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes)

        tiny_version = False
    elif num_anchors == 6:
        # Tiny YOLOv3 use 6 anchors
        get_train_model = get_yolo3_train_model
        data_generator = yolo3_data_generator_wrapper

        # tf.keras.Sequence style data generator
        #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval)
        #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes)

        tiny_version = True
    elif num_anchors == 5:
        # YOLOv2 use 5 anchors
        get_train_model = get_yolo2_train_model
        data_generator = yolo2_data_generator_wrapper

        # tf.keras.Sequence style data generator
        #train_data_generator = Yolo2DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval)
        #val_data_generator = Yolo2DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes)

        tiny_version = False
    else:
        raise ValueError('Unsupported anchors number')

    # prepare online evaluation callback
    if args.eval_online:
        eval_callback = EvalCallBack(
            args.model_type,
            dataset[num_train:],
            anchors,
            class_names,
            args.model_image_size,
            args.model_pruning,
            log_dir,
            eval_epoch_interval=args.eval_epoch_interval,
            save_eval_checkpoint=args.save_eval_checkpoint)
        callbacks.append(eval_callback)

    # prepare train/val data shuffle callback
    if args.data_shuffle:
        shuffle_callback = DatasetShuffleCallBack(dataset)
        callbacks.append(shuffle_callback)

    # prepare model pruning config
    pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype(
        np.int32) * args.total_epoch
    if args.model_pruning:
        pruning_callbacks = [
            sparsity.UpdatePruningStep(),
            sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0)
        ]
        callbacks = callbacks + pruning_callbacks

    # prepare optimizer
    optimizer = get_optimizer(args.optimizer,
                              args.learning_rate,
                              decay_type=None)

    # get train model
    model = get_train_model(args.model_type,
                            anchors,
                            num_classes,
                            weights_path=args.weights_path,
                            freeze_level=freeze_level,
                            optimizer=optimizer,
                            label_smoothing=args.label_smoothing,
                            model_pruning=args.model_pruning,
                            pruning_end_step=pruning_end_step)
    # support multi-gpu training
    template_model = None
    if args.gpu_num >= 2:
        # keep the template model for saving result
        template_model = model
        model = multi_gpu_model(model, gpus=args.gpu_num)
        # recompile multi gpu model
        model.compile(optimizer=optimizer,
                      loss={
                          'yolo_loss': lambda y_true, y_pred: y_pred
                      })
    model.summary()

    # Transfer training some epochs with frozen layers first if needed, to get a stable loss.
    initial_epoch = args.init_epoch  #####################################################################################################
    epochs = initial_epoch + args.transfer_epoch
    print("Transfer training stage")
    print(
        'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'
        .format(num_train, num_val, args.batch_size, input_shape))
    #model.fit_generator(train_data_generator,
    model.fit_generator(
        data_generator(dataset[:num_train], args.batch_size, input_shape,
                       anchors, num_classes, args.enhance_augment),
        steps_per_epoch=max(1, num_train // args.batch_size),
        #validation_data=val_data_generator,
        validation_data=data_generator(dataset[num_train:], args.batch_size,
                                       input_shape, anchors, num_classes),
        validation_steps=max(1, num_val // args.batch_size),
        epochs=epochs,
        initial_epoch=initial_epoch,
        #verbose=1,
        workers=1,
        use_multiprocessing=False,
        max_queue_size=10,
        callbacks=callbacks)

    # Wait 2 seconds for next stage
    time.sleep(2)

    if args.decay_type:
        # rebuild optimizer to apply learning rate decay, only after
        # unfreeze all layers
        callbacks.remove(reduce_lr)
        steps_per_epoch = max(1, num_train // args.batch_size)
        decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch -
                                         args.transfer_epoch)
        optimizer = get_optimizer(args.optimizer,
                                  args.learning_rate,
                                  decay_type=args.decay_type,
                                  decay_steps=decay_steps)

    # Unfreeze the whole network for further tuning
    # NOTE: more GPU memory is required after unfreezing the body
    print("Unfreeze and continue training, to fine-tune.")
    for i in range(len(model.layers)):
        model.layers[i].trainable = True
    model.compile(optimizer=optimizer,
                  loss={
                      'yolo_loss': lambda y_true, y_pred: y_pred
                  })  # recompile to apply the change

    print(
        'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'
        .format(num_train, num_val, args.batch_size, input_shape))
    #model.fit_generator(train_data_generator,
    model.fit_generator(
        data_generator(dataset[:num_train], args.batch_size, input_shape,
                       anchors, num_classes, args.enhance_augment,
                       rescale_interval),
        steps_per_epoch=max(1, num_train // args.batch_size),
        #validation_data=val_data_generator,
        validation_data=data_generator(dataset[num_train:], args.batch_size,
                                       input_shape, anchors, num_classes),
        validation_steps=max(1, num_val // args.batch_size),
        epochs=args.total_epoch,
        initial_epoch=epochs,
        #verbose=1,
        workers=1,
        use_multiprocessing=False,
        max_queue_size=10,
        callbacks=callbacks)

    # Finally store model
    if args.model_pruning:
        if template_model is not None:
            template_model = sparsity.strip_pruning(template_model)
        else:
            model = sparsity.strip_pruning(model)

    if template_model is not None:
        template_model.save(os.path.join(log_dir, 'trained_final.h5'))
    else:
        model.save(os.path.join(log_dir, 'trained_final.h5'))
Пример #3
0
def main(args):
    log_dir = 'logs/'
    class_names = get_classes(args.classes_path)
    num_classes = len(class_names)
    if args.matchpoint_path:
        matchpoints = get_matchpoints(args.matchpoint_path)
    else:
        matchpoints = None

    # choose model type
    if args.tiny:
        num_channels = 128
        #input_size = (192, 192)
    else:
        num_channels = 256
        #input_size = (256, 256)

    input_size = args.model_image_size

    # get train/val dataset
    train_dataset = hourglass_dataset(args.dataset_path,
                                      class_names,
                                      input_size=input_size,
                                      is_train=True,
                                      matchpoints=matchpoints)
    val_dataset = hourglass_dataset(args.dataset_path,
                                    class_names,
                                    input_size=input_size,
                                    is_train=False)

    train_gen = train_dataset.generator(args.batch_size,
                                        args.num_stacks,
                                        sigma=1,
                                        is_shuffle=True,
                                        rot_flag=True,
                                        scale_flag=True,
                                        h_flip_flag=True,
                                        v_flip_flag=True)

    model_type = get_model_type(args.num_stacks, args.mobile, args.tiny,
                                input_size)

    # callbacks for training process
    tensorboard = TensorBoard(log_dir=log_dir,
                              histogram_freq=0,
                              write_graph=False,
                              write_grads=False,
                              write_images=False,
                              update_freq='batch')
    eval_callback = EvalCallBack(log_dir, val_dataset, class_names, input_size,
                                 model_type)
    terminate_on_nan = TerminateOnNaN()
    callbacks = [tensorboard, eval_callback, terminate_on_nan]

    # prepare optimizer
    #optimizer = RMSprop(lr=5e-4)
    optimizer = get_optimizer(args.optimizer,
                              args.learning_rate,
                              decay_type=None)

    # get train model, doesn't specify input size
    model = get_hourglass_model(num_classes,
                                args.num_stacks,
                                num_channels,
                                mobile=args.mobile)
    print(
        'Create {} Stacked Hourglass model with stack number {}, channel number {}. train input size {}'
        .format('Mobile' if args.mobile else '', args.num_stacks, num_channels,
                input_size))
    model.summary()

    if args.weights_path:
        model.load_weights(args.weights_path,
                           by_name=True)  #, skip_mismatch=True)
        print('Load weights {}.'.format(args.weights_path))

    # support multi-gpu training
    template_model = None
    if args.gpu_num >= 2:
        # keep the template model for saving result
        template_model = model
        model = multi_gpu_model(model, gpus=args.gpu_num)

    model.compile(optimizer=optimizer, loss=mean_squared_error)

    # start training
    model.fit_generator(generator=train_gen,
                        steps_per_epoch=train_dataset.get_dataset_size() //
                        args.batch_size,
                        epochs=args.total_epoch,
                        initial_epoch=args.init_epoch,
                        workers=1,
                        use_multiprocessing=False,
                        max_queue_size=10,
                        callbacks=callbacks)

    if template_model is not None:
        template_model.save(os.path.join(log_dir, 'trained_final.h5'))
    else:
        model.save(os.path.join(log_dir, 'trained_final.h5'))

    return
Пример #4
0
def main(args):
    #데이터 annotation 파일 경로
    annotation_file = args.annotation_file

    # 결과 log 및 weight가 저장될 경로
    log_dir = os.path.join('logs', '000')

    #클래스 파일 경로
    classes_path = args.classes_path
    class_names = get_classes(classes_path)
    num_classes = len(class_names)

    # anchors 받아오는 라인
    anchors = get_anchors(args.anchors_path)
    num_anchors = len(anchors)

    # get freeze level according to CLI option
    if args.weights_path:
        freeze_level = 0
    else:
        freeze_level = 1

    if args.freeze_level is not None:
        freeze_level = args.freeze_level

    # callbacks for training process
    logging = TensorBoard(log_dir=log_dir,
                          histogram_freq=0,
                          write_graph=False,
                          write_grads=False,
                          write_images=False,
                          update_freq='batch')
    checkpoint = ModelCheckpoint(os.path.join(
        log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'),
                                 monitor='val_loss',
                                 mode='min',
                                 verbose=1,
                                 save_weights_only=False,
                                 save_best_only=True,
                                 period=1)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.5,
                                  mode='min',
                                  patience=10,
                                  verbose=1,
                                  cooldown=0,
                                  min_lr=1e-10)
    early_stopping = EarlyStopping(monitor='val_loss',
                                   min_delta=0,
                                   patience=50,
                                   verbose=1,
                                   mode='min')
    terminate_on_nan = TerminateOnNaN()

    callbacks = [
        logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan
    ]

    # 데이터셋 로딩
    dataset = get_dataset(annotation_file)
    if args.val_annotation_file:
        val_dataset = get_dataset(args.val_annotation_file)
        num_train = len(dataset)
        num_val = len(val_dataset)
        dataset.extend(val_dataset)
    else:
        val_split = args.val_split
        num_val = int(len(dataset) * val_split)
        num_train = len(dataset) - num_val

    # assign multiscale interval
    if args.multiscale:
        rescale_interval = args.rescale_interval
    else:
        rescale_interval = -1  #Doesn't rescale

    # model input shape check
    input_shape = args.model_image_size
    assert (input_shape[0] % 32 == 0 and input_shape[1] % 32
            == 0), 'model_image_size should be multiples of 32'

    # 모델종류에 따른 data generator 및 모델 생성
    if num_anchors == 9:
        # YOLOv3 use 9 anchors
        get_train_model = get_yolo3_train_model
        data_generator = yolo3_data_generator_wrapper

        tiny_version = False
    elif num_anchors == 6:
        # Tiny YOLOv3 use 6 anchors
        get_train_model = get_yolo3_train_model
        data_generator = yolo3_data_generator_wrapper

        tiny_version = True
    elif num_anchors == 5:
        # YOLOv2 use 5 anchors
        get_train_model = get_yolo2_train_model
        data_generator = yolo2_data_generator_wrapper

        tiny_version = False
    else:
        raise ValueError('Unsupported anchors number')

    # prepare online evaluation callback
    if args.eval_online:
        eval_callback = EvalCallBack(
            args.model_type,
            dataset[num_train:],
            anchors,
            class_names,
            args.model_image_size,
            args.model_pruning,
            log_dir,
            eval_epoch_interval=args.eval_epoch_interval,
            save_eval_checkpoint=args.save_eval_checkpoint,
            elim_grid_sense=args.elim_grid_sense)
        callbacks.append(eval_callback)

    # prepare train/val data shuffle callback
    if args.data_shuffle:
        shuffle_callback = DatasetShuffleCallBack(dataset)
        callbacks.append(shuffle_callback)

    # prepare model pruning config
    pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype(
        np.int32) * args.total_epoch
    if args.model_pruning:
        pruning_callbacks = [
            sparsity.UpdatePruningStep(),
            sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0)
        ]
        callbacks = callbacks + pruning_callbacks

    # prepare optimizer
    optimizer = get_optimizer(args.optimizer,
                              args.learning_rate,
                              decay_type=None)

    # support multi-gpu training
    if args.gpu_num >= 2:
        # devices_list=["/gpu:0", "/gpu:1"]
        devices_list = ["/gpu:{}".format(n) for n in range(args.gpu_num)]
        strategy = tf.distribute.MirroredStrategy(devices=devices_list)
        print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
        with strategy.scope():
            # get multi-gpu train model
            model = get_train_model(args.model_type,
                                    anchors,
                                    num_classes,
                                    weights_path=args.weights_path,
                                    freeze_level=freeze_level,
                                    optimizer=optimizer,
                                    label_smoothing=args.label_smoothing,
                                    elim_grid_sense=args.elim_grid_sense,
                                    model_pruning=args.model_pruning,
                                    pruning_end_step=pruning_end_step)

    else:
        # get normal train model
        model = get_train_model(args.model_type,
                                anchors,
                                num_classes,
                                weights_path=args.weights_path,
                                freeze_level=freeze_level,
                                optimizer=optimizer,
                                label_smoothing=args.label_smoothing,
                                elim_grid_sense=args.elim_grid_sense,
                                model_pruning=args.model_pruning,
                                pruning_end_step=pruning_end_step)

    model.summary()

    # Transfer training some epochs with frozen layers first if needed, to get a stable loss.
    initial_epoch = args.init_epoch
    epochs = initial_epoch + args.transfer_epoch
    print("Transfer training stage")
    print(
        'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'
        .format(num_train, num_val, args.batch_size, input_shape))

    # 성능향상을 위해 초반 일부 epoch은 Transfer Learning 진행 (Initial Epoch ~ Transfer Epoch)
    model.fit_generator(
        data_generator(dataset[:num_train],
                       args.batch_size,
                       input_shape,
                       anchors,
                       num_classes,
                       args.enhance_augment,
                       rescale_interval,
                       multi_anchor_assign=args.multi_anchor_assign),
        steps_per_epoch=max(1, num_train // args.batch_size),
        #validation_data=val_data_generator,
        validation_data=data_generator(
            dataset[num_train:],
            args.batch_size,
            input_shape,
            anchors,
            num_classes,
            multi_anchor_assign=args.multi_anchor_assign),
        validation_steps=max(1, num_val // args.batch_size),
        epochs=epochs,
        initial_epoch=initial_epoch,
        #verbose=1,
        workers=1,
        use_multiprocessing=False,
        max_queue_size=10,
        callbacks=callbacks)

    # Wait 2 seconds for next stage
    time.sleep(2)

    if args.decay_type:
        # rebuild optimizer to apply learning rate decay, only after
        # unfreeze all layers
        callbacks.remove(reduce_lr)
        steps_per_epoch = max(1, num_train // args.batch_size)
        decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch -
                                         args.transfer_epoch)
        optimizer = get_optimizer(args.optimizer,
                                  args.learning_rate,
                                  decay_type=args.decay_type,
                                  decay_steps=decay_steps)

    # Unfreeze the whole network for further tuning
    # NOTE: more GPU memory is required after unfreezing the body
    print("Unfreeze and continue training, to fine-tune.")
    if args.gpu_num >= 2:
        with strategy.scope():
            for i in range(len(model.layers)):
                model.layers[i].trainable = True
            model.compile(optimizer=optimizer,
                          loss={
                              'yolo_loss': lambda y_true, y_pred: y_pred
                          })  # recompile to apply the change

    else:
        for i in range(len(model.layers)):
            model.layers[i].trainable = True
        model.compile(optimizer=optimizer,
                      loss={
                          'yolo_loss': lambda y_true, y_pred: y_pred
                      })  # recompile to apply the change

    print(
        'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'
        .format(num_train, num_val, args.batch_size, input_shape))

    # Transfer Learning 이후 나머지 Epoch에 대하여 학습 진행 (Transfer Epoch ~ Total Epoch)
    # 이 부분이 필요없거나 학습 시간이 너무 오래 걸릴 경우 Total Epoch을 Transfer와 동일하게 두고, 아래 학습을 진행하지 않고 넘어갈 수 있음
    # 본인 컴퓨터 사양에 맞춰서 진행
    model.fit_generator(
        data_generator(dataset[:num_train],
                       args.batch_size,
                       input_shape,
                       anchors,
                       num_classes,
                       args.enhance_augment,
                       rescale_interval,
                       multi_anchor_assign=args.multi_anchor_assign),
        steps_per_epoch=max(1, num_train // args.batch_size),
        #validation_data=val_data_generator,
        validation_data=data_generator(
            dataset[num_train:],
            args.batch_size,
            input_shape,
            anchors,
            num_classes,
            multi_anchor_assign=args.multi_anchor_assign),
        validation_steps=max(1, num_val // args.batch_size),
        epochs=args.total_epoch,
        initial_epoch=epochs,
        #verbose=1,
        workers=1,
        use_multiprocessing=False,
        max_queue_size=10,
        callbacks=callbacks)

    # Finally store model
    if args.model_pruning:
        model = sparsity.strip_pruning(model)
    model.save(os.path.join(log_dir, 'trained_final.h5'))
def train(args, model, input_shape, strategy):
    log_dir = os.path.join('logs', '000')

    # callbacks for training process
    checkpoint = ModelCheckpoint(os.path.join(
        log_dir,
        'ep{epoch:03d}-val_loss{val_loss:.3f}-val_accuracy{val_accuracy:.3f}-val_top_k_categorical_accuracy{val_top_k_categorical_accuracy:.3f}.h5'
    ),
                                 monitor='val_accuracy',
                                 mode='max',
                                 verbose=1,
                                 save_weights_only=False,
                                 save_best_only=True,
                                 period=1)
    logging = TensorBoard(log_dir=log_dir,
                          histogram_freq=0,
                          write_graph=False,
                          write_grads=False,
                          write_images=False,
                          update_freq='batch')
    terminate_on_nan = TerminateOnNaN()
    learn_rates = [0.05, 0.01, 0.005, 0.001, 0.0005]
    lr_scheduler = LearningRateScheduler(
        lambda epoch: learn_rates[epoch // 30])
    checkpoint_clean = CheckpointCleanCallBack(log_dir, max_val_keep=3)

    callbacks = [
        logging, checkpoint, lr_scheduler, terminate_on_nan, checkpoint_clean
    ]

    # data generator
    train_datagen = ImageDataGenerator(
        preprocessing_function=preprocess,
        #featurewise_center=False,
        #samplewise_center=False,
        #featurewise_std_normalization=False,
        #samplewise_std_normalization=False,
        #zca_whitening=False,
        #zca_epsilon=1e-06,
        zoom_range=0.25,
        brightness_range=[0.5, 1.5],
        channel_shift_range=0.1,
        shear_range=0.2,
        rotation_range=30,
        width_shift_range=0.05,
        height_shift_range=0.05,
        vertical_flip=True,
        horizontal_flip=True,
        #rescale=1./255,
        #validation_split=0.1,
        fill_mode='constant',
        cval=0.,
        data_format=None,
        dtype=None)

    test_datagen = ImageDataGenerator(preprocessing_function=preprocess)

    train_generator = train_datagen.flow_from_directory(
        args.train_data_path,
        target_size=input_shape,
        batch_size=args.batch_size,
        color_mode='rgb',
        classes=None,
        class_mode='categorical',
        shuffle=True,
        #save_to_dir='check',
        #save_prefix='augmented_',
        #save_format='jpg',
        interpolation='nearest')

    test_generator = test_datagen.flow_from_directory(
        args.val_data_path,
        target_size=input_shape,
        batch_size=args.batch_size,
        color_mode='rgb',
        classes=None,
        class_mode='categorical',
        shuffle=True,
        #save_to_dir='check',
        #save_prefix='augmented_',
        #save_format='jpg',
        interpolation='nearest')

    # get optimizer
    if args.decay_type:
        callbacks.remove(lr_scheduler)
    steps_per_epoch = max(1, train_generator.samples // args.batch_size)
    decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch)
    optimizer = get_optimizer(args.optimizer,
                              args.learning_rate,
                              average_type=None,
                              decay_type=args.decay_type,
                              decay_steps=decay_steps)

    # get loss
    losses = CategoricalCrossentropy(label_smoothing=args.label_smoothing)

    # model compile
    if strategy:
        with strategy.scope():
            model.compile(optimizer=optimizer,
                          metrics=['accuracy', 'top_k_categorical_accuracy'],
                          loss=losses)
    else:
        model.compile(optimizer=optimizer,
                      metrics=['accuracy', 'top_k_categorical_accuracy'],
                      loss=losses)

    # start training
    print('Train on {} samples, val on {} samples, with batch size {}.'.format(
        train_generator.samples, test_generator.samples, args.batch_size))
    model.fit_generator(
        train_generator,
        steps_per_epoch=train_generator.samples // args.batch_size,
        epochs=args.total_epoch,
        workers=cpu_count() -
        1,  #Try to parallized feeding image data but leave one cpu core idle
        initial_epoch=args.init_epoch,
        use_multiprocessing=True,
        max_queue_size=10,
        validation_data=test_generator,
        validation_steps=test_generator.samples // args.batch_size,
        callbacks=callbacks)

    # Finally store model
    model.save(os.path.join(log_dir, 'trained_final.h5'))
Пример #6
0
def main(args):
    annotation_file = args.annotation_file
    log_dir = os.path.join('logs', '000')
    classes_path = args.classes_path
    class_names = get_classes(classes_path)
    num_classes = len(class_names)

    anchors = get_anchors(args.anchors_path)
    num_anchors = len(anchors)

    # get freeze level according to CLI option
    if args.weights_path:
        freeze_level = 0
    else:
        freeze_level = 1

    if args.freeze_level is not None:
        freeze_level = args.freeze_level

    # callbacks for training process
    logging = TensorBoard(log_dir=log_dir,
                          histogram_freq=0,
                          write_graph=False,
                          write_grads=False,
                          write_images=False,
                          update_freq='batch')
    checkpoint = ModelCheckpoint(os.path.join(
        log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'),
                                 monitor='val_loss',
                                 mode='min',
                                 verbose=1,
                                 save_weights_only=False,
                                 save_best_only=True,
                                 period=1)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.5,
                                  mode='min',
                                  patience=10,
                                  verbose=1,
                                  cooldown=0,
                                  min_lr=1e-10)
    early_stopping = EarlyStopping(monitor='val_loss',
                                   min_delta=0,
                                   patience=50,
                                   verbose=1,
                                   mode='min')
    terminate_on_nan = TerminateOnNaN()

    callbacks = [
        logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan
    ]

    # get train&val dataset
    dataset = get_dataset(annotation_file)
    if args.val_annotation_file:
        val_dataset = get_dataset(args.val_annotation_file)
        num_train = len(dataset)
        num_val = len(val_dataset)
        dataset.extend(val_dataset)
    else:
        val_split = args.val_split
        num_val = int(len(dataset) * val_split)
        num_train = len(dataset) - num_val

    # assign multiscale interval
    if args.multiscale:
        rescale_interval = args.rescale_interval
    else:
        rescale_interval = -1  #Doesn't rescale

    # model input shape check
    input_shape = args.model_image_size
    assert (input_shape[0] % 32 == 0 and input_shape[1] % 32
            == 0), 'model_image_size should be multiples of 32'

    # get different model type & train&val data generator
    if args.model_type.startswith(
            'scaled_yolo4_') or args.model_type.startswith('yolo5_'):
        # Scaled-YOLOv4 & YOLOv5 entrance, use yolo5 submodule but now still yolo3 data generator
        # TODO: create new yolo5 data generator to apply YOLOv5 anchor assignment
        get_train_model = get_yolo5_train_model
        data_generator = yolo5_data_generator_wrapper

        # tf.keras.Sequence style data generator
        #train_data_generator = Yolo5DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign)
        #val_data_generator = Yolo5DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign)

        tiny_version = False
    elif args.model_type.startswith('yolo3_') or args.model_type.startswith(
            'yolo4_'):
        #if num_anchors == 9:
        # YOLOv3 & v4 entrance, use 9 anchors
        get_train_model = get_yolo3_train_model
        data_generator = yolo3_data_generator_wrapper

        # tf.keras.Sequence style data generator
        #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign)
        #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign)

        tiny_version = False
    elif args.model_type.startswith(
            'tiny_yolo3_') or args.model_type.startswith('tiny_yolo4_'):
        #elif num_anchors == 6:
        # Tiny YOLOv3 & v4 entrance, use 6 anchors
        get_train_model = get_yolo3_train_model
        data_generator = yolo3_data_generator_wrapper

        # tf.keras.Sequence style data generator
        #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign)
        #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign)

        tiny_version = True
    elif args.model_type.startswith('yolo2_') or args.model_type.startswith(
            'tiny_yolo2_'):
        #elif num_anchors == 5:
        # YOLOv2 & Tiny YOLOv2 use 5 anchors
        get_train_model = get_yolo2_train_model
        data_generator = yolo2_data_generator_wrapper

        # tf.keras.Sequence style data generator
        #train_data_generator = Yolo2DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval)
        #val_data_generator = Yolo2DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes)

        tiny_version = False
    else:
        raise ValueError('Unsupported model type')

    # prepare online evaluation callback
    if args.eval_online:
        eval_callback = EvalCallBack(
            args.model_type,
            dataset[num_train:],
            anchors,
            class_names,
            args.model_image_size,
            args.model_pruning,
            log_dir,
            eval_epoch_interval=args.eval_epoch_interval,
            save_eval_checkpoint=args.save_eval_checkpoint,
            elim_grid_sense=args.elim_grid_sense)
        callbacks.append(eval_callback)

    # prepare train/val data shuffle callback
    if args.data_shuffle:
        shuffle_callback = DatasetShuffleCallBack(dataset)
        callbacks.append(shuffle_callback)

    # prepare model pruning config
    pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype(
        np.int32) * args.total_epoch
    if args.model_pruning:
        pruning_callbacks = [
            sparsity.UpdatePruningStep(),
            sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0)
        ]
        callbacks = callbacks + pruning_callbacks

    # prepare optimizer
    optimizer = get_optimizer(args.optimizer,
                              args.learning_rate,
                              average_type=None,
                              decay_type=None)

    # support multi-gpu training
    if args.gpu_num >= 2:
        # devices_list=["/gpu:0", "/gpu:1"]
        devices_list = ["/gpu:{}".format(n) for n in range(args.gpu_num)]
        strategy = tf.distribute.MirroredStrategy(devices=devices_list)
        print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
        with strategy.scope():
            # get multi-gpu train model
            model = get_train_model(args.model_type,
                                    anchors,
                                    num_classes,
                                    weights_path=args.weights_path,
                                    freeze_level=freeze_level,
                                    optimizer=optimizer,
                                    label_smoothing=args.label_smoothing,
                                    elim_grid_sense=args.elim_grid_sense,
                                    model_pruning=args.model_pruning,
                                    pruning_end_step=pruning_end_step)

    else:
        # get normal train model
        model = get_train_model(args.model_type,
                                anchors,
                                num_classes,
                                weights_path=args.weights_path,
                                freeze_level=freeze_level,
                                optimizer=optimizer,
                                label_smoothing=args.label_smoothing,
                                elim_grid_sense=args.elim_grid_sense,
                                model_pruning=args.model_pruning,
                                pruning_end_step=pruning_end_step)

    model.summary()

    # Transfer training some epochs with frozen layers first if needed, to get a stable loss.
    initial_epoch = args.init_epoch
    epochs = initial_epoch + args.transfer_epoch
    print("Transfer training stage")
    print(
        'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'
        .format(num_train, num_val, args.batch_size, input_shape))
    #model.fit_generator(train_data_generator,
    model.fit_generator(
        data_generator(dataset[:num_train],
                       args.batch_size,
                       input_shape,
                       anchors,
                       num_classes,
                       args.enhance_augment,
                       rescale_interval,
                       multi_anchor_assign=args.multi_anchor_assign),
        steps_per_epoch=max(1, num_train // args.batch_size),
        #validation_data=val_data_generator,
        validation_data=data_generator(
            dataset[num_train:],
            args.batch_size,
            input_shape,
            anchors,
            num_classes,
            multi_anchor_assign=args.multi_anchor_assign),
        validation_steps=max(1, num_val // args.batch_size),
        epochs=epochs,
        initial_epoch=initial_epoch,
        #verbose=1,
        workers=1,
        use_multiprocessing=False,
        max_queue_size=10,
        callbacks=callbacks)

    # Wait 2 seconds for next stage
    time.sleep(2)

    if args.decay_type or args.average_type:
        # rebuild optimizer to apply learning rate decay or weights averager,
        # only after unfreeze all layers
        if args.decay_type:
            callbacks.remove(reduce_lr)

        if args.average_type == 'ema' or args.average_type == 'swa':
            # weights averager need tensorflow-addons,
            # which request TF 2.x and have version compatibility
            import tensorflow_addons as tfa
            callbacks.remove(checkpoint)
            avg_checkpoint = tfa.callbacks.AverageModelCheckpoint(
                filepath=os.path.join(
                    log_dir,
                    'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'),
                update_weights=True,
                monitor='val_loss',
                mode='min',
                verbose=1,
                save_weights_only=False,
                save_best_only=True,
                period=1)
            callbacks.append(avg_checkpoint)

        steps_per_epoch = max(1, num_train // args.batch_size)
        decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch -
                                         args.transfer_epoch)
        optimizer = get_optimizer(args.optimizer,
                                  args.learning_rate,
                                  average_type=args.average_type,
                                  decay_type=args.decay_type,
                                  decay_steps=decay_steps)

    # Unfreeze the whole network for further tuning
    # NOTE: more GPU memory is required after unfreezing the body
    print("Unfreeze and continue training, to fine-tune.")
    if args.gpu_num >= 2:
        with strategy.scope():
            for i in range(len(model.layers)):
                model.layers[i].trainable = True
            model.compile(optimizer=optimizer,
                          loss={
                              'yolo_loss': lambda y_true, y_pred: y_pred
                          })  # recompile to apply the change

    else:
        for i in range(len(model.layers)):
            model.layers[i].trainable = True
        model.compile(optimizer=optimizer,
                      loss={
                          'yolo_loss': lambda y_true, y_pred: y_pred
                      })  # recompile to apply the change

    print(
        'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'
        .format(num_train, num_val, args.batch_size, input_shape))
    #model.fit_generator(train_data_generator,
    model.fit_generator(
        data_generator(dataset[:num_train],
                       args.batch_size,
                       input_shape,
                       anchors,
                       num_classes,
                       args.enhance_augment,
                       rescale_interval,
                       multi_anchor_assign=args.multi_anchor_assign),
        steps_per_epoch=max(1, num_train // args.batch_size),
        #validation_data=val_data_generator,
        validation_data=data_generator(
            dataset[num_train:],
            args.batch_size,
            input_shape,
            anchors,
            num_classes,
            multi_anchor_assign=args.multi_anchor_assign),
        validation_steps=max(1, num_val // args.batch_size),
        epochs=args.total_epoch,
        initial_epoch=epochs,
        #verbose=1,
        workers=1,
        use_multiprocessing=False,
        max_queue_size=10,
        callbacks=callbacks)

    # Finally store model
    if args.model_pruning:
        model = sparsity.strip_pruning(model)
    model.save(os.path.join(log_dir, 'trained_final.h5'))
def main(args):
    annotation_file = args.annotation_file
    classes_path = args.classes_path
    class_names = get_classes(classes_path)
    num_classes = len(class_names)

    anchors = get_anchors(args.anchors_path)
    num_anchors = len(anchors)

    log_dir_path = args.log_directory
    try:
        log_dir = os.path.join('logs', log_dir_path)
    except TypeError:
        date_now = datetime.now()
        log_dir_folder_name = f'{date_now.strftime("%Y_%m_%d_%H%M%S")}_{args.model_type}_TransferEp_{args.transfer_epoch}_TotalEP_{args.total_epoch}'

        log_dir = os.path.realpath(os.path.join(
            'logs',
            log_dir_folder_name
        ))

    # get freeze level according to CLI option
    if args.weights_path:
        freeze_level = 0
    else:
        freeze_level = 1

    if args.freeze_level is not None:
        freeze_level = args.freeze_level

    # How many percentage of layers to unfreeze in fine tuning
    unfreeze_level = args.unfreeze_level

    # callbacks for training process
    logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False,
                          update_freq='batch')
    checkpoint = ModelCheckpoint(
        filepath=log_dir + os.sep + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
        monitor='val_loss',
        mode='min',
        verbose=1,
        save_weights_only=False,
        save_best_only=True,
        period=1
    )
    reduce_lr = ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5, mode='min',
        patience=10,
        verbose=1,
        cooldown=0,
        min_lr=1e-10
    )
    early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=1, mode='min')
    terminate_on_nan = TerminateOnNaN()

    callbacks = [logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan]

    # get train&val dataset
    dataset = get_dataset(annotation_file)
    if args.val_annotation_file:
        val_dataset = get_dataset(args.val_annotation_file)
        num_train = len(dataset)
        num_val = len(val_dataset)
        dataset.extend(val_dataset)
    else:
        val_split = args.val_split
        num_val = int(len(dataset) * val_split)
        num_train = len(dataset) - num_val

    # assign multiscale interval
    if args.multiscale:
        rescale_interval = args.rescale_interval
    else:
        rescale_interval = -1  # Doesn't rescale

    # model input shape check
    input_shape = args.model_image_size
    assert (input_shape[0] % 32 == 0 and input_shape[1] % 32 == 0), 'model_image_size should be multiples of 32'

    # get different model type & train&val data generator
    if num_anchors == 9:
        # YOLOv3 use 9 anchors
        get_train_model = get_yolo3_train_model
        data_generator = yolo3_data_generator_wrapper

        # tf.keras.Sequence style data generator
        # train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign)
        # val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign)

        tiny_version = False
    elif num_anchors == 6:
        # Tiny YOLOv3 use 6 anchors
        get_train_model = get_yolo3_train_model
        data_generator = yolo3_data_generator_wrapper

        # tf.keras.Sequence style data generator
        # train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign)
        # val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign)

        tiny_version = True
    elif num_anchors == 5:
        # YOLOv2 use 5 anchors
        get_train_model = get_yolo2_train_model
        data_generator = yolo2_data_generator_wrapper

        # tf.keras.Sequence style data generator
        # train_data_generator = Yolo2DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval)
        # val_data_generator = Yolo2DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes)

        tiny_version = False
    else:
        raise ValueError('Unsupported anchors number')

    # prepare online evaluation callback
    if args.eval_online:
        eval_callback = EvalCallBack(
            model_type=args.model_type,
            annotation_lines=dataset[num_train:],
            anchors=anchors,
            class_names=class_names,
            model_image_size=args.model_image_size,
            model_pruning=args.model_pruning,
            log_dir=log_dir,
            eval_epoch_interval=args.eval_epoch_interval,
            save_eval_checkpoint=args.save_eval_checkpoint,
            elim_grid_sense=args.elim_grid_sense
        )
        callbacks.append(eval_callback)

    # prepare train/val data shuffle callback
    if args.data_shuffle:
        shuffle_callback = DatasetShuffleCallBack(dataset)
        callbacks.append(shuffle_callback)

    # prepare model pruning config
    pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype(np.int32) * args.total_epoch
    if args.model_pruning:
        pruning_callbacks = [sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0)]
        callbacks = callbacks + pruning_callbacks

    # prepare optimizer
    optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=None)

    # support multi-gpu training
    if args.gpu_num >= 2:
        # devices_list=["/gpu:0", "/gpu:1"]
        devices_list = ["/gpu:{}".format(n) for n in range(args.gpu_num)]
        strategy = tf.distribute.MirroredStrategy(devices=devices_list)
        print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
        with strategy.scope():
            # get multi-gpu train model
            model = get_train_model(
                model_type=args.model_type,
                anchors=anchors,
                num_classes=num_classes,
                weights_path=args.weights_path,
                freeze_level=freeze_level,
                optimizer=optimizer,
                label_smoothing=args.label_smoothing,
                elim_grid_sense=args.elim_grid_sense,
                model_pruning=args.model_pruning,
                pruning_end_step=pruning_end_step
            )

    else:
        # get normal train model
        model = get_train_model(
            model_type=args.model_type,
            anchors=anchors,
            num_classes=num_classes,
            weights_path=args.weights_path,
            freeze_level=freeze_level,
            optimizer=optimizer,
            label_smoothing=args.label_smoothing,
            elim_grid_sense=args.elim_grid_sense,
            model_pruning=args.model_pruning,
            pruning_end_step=pruning_end_step
        )

    if args.show_history:
        model.summary()

    layers_count = len(model.layers)
    print(f'Total layers: {layers_count}')

    # Transfer training some epochs with frozen layers first if needed, to get a stable loss.
    initial_epoch = args.init_epoch
    epochs = initial_epoch + args.transfer_epoch
    print("Transfer training stage")
    print('Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'.format(num_train, num_val,
                                                                                               args.batch_size,
                                                                                               input_shape))
    # model.fit_generator(train_data_generator,
    """
    Transfer training steps, train with freeze layers
    """
    model.fit(
        data_generator(
            annotation_lines=dataset[:num_train],
            batch_size=args.batch_size,
            input_shape=input_shape,
            anchors=anchors,
            num_classes=num_classes,
            enhance_augment=args.enhance_augment,
            rescale_interval=rescale_interval,
            multi_anchor_assign=args.multi_anchor_assign
        ),
        steps_per_epoch=max(1, num_train // args.batch_size),
        # validation_data=val_data_generator,
        validation_data=data_generator(
            annotation_lines=dataset[num_train:],
            batch_size=args.batch_size,
            input_shape=input_shape,
            anchors=anchors,
            num_classes=num_classes,
            multi_anchor_assign=args.multi_anchor_assign
        ),
        validation_steps=max(1, num_val // args.batch_size),
        epochs=epochs,
        initial_epoch=initial_epoch,
        # verbose=1,
        workers=1,
        use_multiprocessing=False,
        max_queue_size=10,
        callbacks=callbacks
    )

    # Wait 2 seconds for next stage
    time.sleep(2)

    if args.decay_type:
        # rebuild optimizer to apply learning rate decay, only after
        # unfreeze all layers
        callbacks.remove(reduce_lr)
        steps_per_epoch = max(1, num_train // args.batch_size)
        decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch)
        optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=args.decay_type,
                                  decay_steps=decay_steps)

    # Unfreeze the whole network for further tuning
    # NOTE: more GPU memory is required after unfreezing the body
    fine_tune_layers = int(layers_count * unfreeze_level)
    print(f"Unfreeze {unfreeze_level * 100}% of layers and continue training, to fine-tune.")
    print(f"Unfroze {fine_tune_layers} layers of {layers_count}")

    if args.gpu_num >= 2:
        with strategy.scope():
            for i in range(layers_count - fine_tune_layers, layers_count):
                model.layers[i].trainable = True
            model.compile(optimizer=optimizer,
                          loss={'yolo_loss': lambda y_true, y_pred: y_pred})  # recompile to apply the change

    else:
        for i in range(layers_count - fine_tune_layers, layers_count):
            model.layers[i].trainable = True
        model.compile(optimizer=optimizer,
                      loss={'yolo_loss': lambda y_true, y_pred: y_pred})  # recompile to apply the change

    print('Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'.format(num_train, num_val,
                                                                                               args.batch_size,
                                                                                               input_shape))
    """
    Fine-tuning steps, more memory will be used. LR (Learning Rate) will be decayed
    """
    # model.fit_generator(train_data_generator,
    model.fit(
        # The YOLO data augmentation generator tool
        data_generator(
            annotation_lines=dataset[:num_train],
            batch_size=args.batch_size,
            input_shape=input_shape,
            anchors=anchors,
            num_classes=num_classes,
            enhance_augment=args.enhance_augment,
            rescale_interval=rescale_interval,
            multi_anchor_assign=args.multi_anchor_assign
        ),
        steps_per_epoch=max(1, num_train // args.batch_size),
        # validation_data=val_data_generator,
        # Validation generator
        validation_data=data_generator(
            annotation_lines=dataset[num_train:],
            batch_size=args.batch_size,
            input_shape=input_shape,
            anchors=anchors,
            num_classes=num_classes,
            multi_anchor_assign=args.multi_anchor_assign
        ),
        validation_steps=max(1, num_val // args.batch_size),
        epochs=args.total_epoch,
        initial_epoch=epochs,
        # verbose=1,
        workers=1,
        use_multiprocessing=False,
        max_queue_size=10,
        callbacks=callbacks
    )

    # Finally store model
    if args.model_pruning:
        model = sparsity.strip_pruning(model)
    model.save(os.path.join(log_dir, 'trained_final.h5'))
Пример #8
0
def main(args):
    model_type = "yolo3_darknet_spp"  # yolo3_darknet_spp, yolo3_darknet
    current_dir = os.path.dirname(__file__) + "/"
    print("current_dir == ", current_dir)
    annotation_file = current_dir + "sample/trainval/train.txt"
    val_annotation_file = current_dir + "sample/trainval/val.txt"

    classes_path = current_dir + "sample/trainval/train_classes.txt"
    anchors_path = current_dir + "sample/trainval/yolo_anchors.txt"
    weights_path = current_dir + "weights/yolov3-spp.h5"
    load_weights_path = None  # None or "{weights path}"
    is_one_stage_train = True
    learning_rate_1 = 1e-4
    learning_rate_2 = 1e-5
    epoch_1 = args.max_epochs_1
    epoch_2 = args.max_epochs_2
    batch_size_1 = args.batch_size_1
    batch_size_2 = args.batch_size_2
    freeze_level = 2
    model_image_size = (416, 416)
    val_split = 0.1
    label_smoothing = 0
    enhance_augment = None  # enhance data augmentation type (None/mosaic)
    rescale_interval = 0  # Number of iteration(batches) interval to rescale input size, default=10

    log_dir = os.path.join('logs', '20200602')

    class_names = get_classes(classes_path)
    num_classes = len(class_names)
    anchors = get_anchors(anchors_path)

    logging = TensorBoard(log_dir=log_dir, update_freq='batch')
    checkpoint = ModelCheckpoint(os.path.join(
        log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'),
                                 monitor='val_loss',
                                 verbose=1,
                                 save_weights_only=True,
                                 save_best_only=True,
                                 period=1)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.1,
                                  patience=10,
                                  verbose=1,
                                  cooldown=0,
                                  min_lr=1e-10)
    early_stopping = EarlyStopping(monitor='val_loss',
                                   min_delta=0,
                                   patience=50,
                                   verbose=1)
    # terminate_on_nan = TerminateOnNaN()

    callbacks = [
        logging, checkpoint, reduce_lr, early_stopping,
        ModertFileToObs(log_dir, args)
    ]
    # callbacks = [logging, checkpoint, reduce_lr]

    # get train&val dataset
    dataset = get_dataset(annotation_file)

    dataset = [current_dir + d for d in dataset]
    if val_annotation_file != "":
        val_dataset = get_dataset(val_annotation_file)
        num_train = len(dataset)
        num_val = len(val_dataset)
        dataset.extend(val_dataset)
    else:
        val_split = val_split
        num_val = int(len(dataset) * val_split)
        num_train = len(dataset) - num_val

        # num_val = 100
        # num_train = 200

    # model input shape check
    input_shape = model_image_size
    assert (input_shape[0] % 32 == 0
            and input_shape[1] % 32 == 0), 'Multiples of 32 required'

    get_train_model = get_yolo3_train_model
    data_generator = yolo3_data_generator_wrapper

    # get train model
    model = get_train_model(model_type,
                            anchors,
                            num_classes,
                            input_shape,
                            weights_path=weights_path,
                            freeze_level=freeze_level,
                            label_smoothing=label_smoothing)

    if load_weights_path:
        model.load_weights(load_weights_path)
        print("reload weights: {}".format(load_weights_path))

    if is_one_stage_train:
        model.compile(optimizer=get_optimizer(learning_rate_1),
                      loss={
                          'yolo_loss': lambda y_true, y_pred: y_pred
                      })

        print(
            'One stage Train on {} samples, val on {} samples, with batch size {}, '
            'input_shape {}.'.format(num_train, num_val, batch_size_1,
                                     input_shape))
        model.fit_generator(
            data_generator(dataset[:num_train], batch_size_1, input_shape,
                           anchors, num_classes, enhance_augment),
            steps_per_epoch=max(1, num_train // batch_size_1),
            validation_data=data_generator(dataset[:num_val], batch_size_1,
                                           input_shape, anchors, num_classes),
            validation_steps=max(1, num_val // batch_size_1),
            epochs=epoch_1,
            initial_epoch=0,
            workers=1,
            use_multiprocessing=False,
            max_queue_size=10,
            callbacks=callbacks)

        model.save_weights(os.path.join(log_dir, 'trained_weights_stage_1.h5'))

    if True:
        print("Unfreeze and continue training, to fine-tune.")
        for i in range(len(model.layers)):
            model.layers[i].trainable = True

        model.compile(optimizer=get_optimizer(learning_rate_2),
                      loss={
                          'yolo_loss': lambda y_true, y_pred: y_pred
                      })
        print(
            'Two stage Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'
            .format(num_train, num_val, batch_size_2, input_shape))

        model.fit_generator(data_generator(dataset[:num_train], batch_size_2,
                                           input_shape, anchors, num_classes,
                                           enhance_augment, rescale_interval),
                            steps_per_epoch=max(1, num_train // batch_size_2),
                            validation_data=data_generator(
                                dataset[:num_val], batch_size_2, input_shape,
                                anchors, num_classes),
                            validation_steps=max(1, num_val // batch_size_2),
                            epochs=epoch_2,
                            initial_epoch=epoch_1,
                            workers=1,
                            use_multiprocessing=False,
                            max_queue_size=10,
                            callbacks=callbacks)
        model.save_weights(os.path.join(log_dir, 'trained_weights_final.h5'))

    gen_model_dir(log_dir, args, classes_path, anchors_path)
Пример #9
0
def main():
    parser = argparse.ArgumentParser(
        description='train a simple CNN classifier with PyTorch')
    log_dir = os.path.join('logs', '000')

    # Model definition options
    parser.add_argument(
        '--model_type',
        type=str,
        required=False,
        default='mobilenetv2',
        help=
        'backbone model type: mobilenetv3/v2/simple_cnn, default=%(default)s')
    parser.add_argument(
        '--model_input_shape',
        type=str,
        required=False,
        default='224x224',
        help="model image input shape as <height>x<width>, default=%(default)s"
    )
    parser.add_argument(
        '--head_conv_channel',
        type=int,
        required=False,
        default=128,
        help="channel number for head part convolution, default=%(default)s")
    parser.add_argument('--weights_path',
                        type=str,
                        required=False,
                        default=None,
                        help="Pretrained model/weights file for fine tune")

    # Data options
    parser.add_argument('--train_data_path',
                        type=str,
                        required=True,
                        help='path to train image dataset')
    parser.add_argument('--val_data_path',
                        type=str,
                        required=True,
                        help='path to validation image dataset')
    # Training settings
    parser.add_argument('--batch_size',
                        type=int,
                        required=False,
                        default=64,
                        help="batch size for train, default=%(default)s")
    parser.add_argument(
        '--optimizer',
        type=str,
        required=False,
        default='adam',
        choices=['adam', 'rmsprop', 'sgd'],
        help="optimizer for training (adam/rmsprop/sgd), default=%(default)s")
    parser.add_argument('--learning_rate',
                        type=float,
                        required=False,
                        default=1e-3,
                        help="Initial learning rate, default=%(default)s")
    parser.add_argument(
        '--decay_type',
        type=str,
        required=False,
        default=None,
        choices=[None, 'cosine', 'plateau', 'exponential', 'step'],
        help="Learning rate decay type, default=%(default)s")
    parser.add_argument('--weight_decay',
                        type=float,
                        required=False,
                        default=5e-4,
                        help="Weight decay for optimizer, default=%(default)s")

    parser.add_argument(
        '--init_epoch',
        type=int,
        required=False,
        default=0,
        help=
        "Initial training epochs for fine tune training, default=%(default)s")
    parser.add_argument(
        '--transfer_epoch',
        type=int,
        required=False,
        default=5,
        help=
        "Transfer training (from Imagenet) stage epochs, default=%(default)s")
    parser.add_argument('--total_epoch',
                        type=int,
                        required=False,
                        default=100,
                        help="Total training epochs, default=%(default)s")
    #parser.add_argument('--gpu_num', type=int, required=False, default=1,
    #help='Number of GPU to use, default=%(default)s')
    parser.add_argument('--no_cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')

    args = parser.parse_args()
    height, width = args.model_input_shape.split('x')
    args.model_input_shape = (int(height), int(width))

    use_cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    torch.manual_seed(1)

    # prepare train&val dataset loader
    train_loader = get_dataloader(args.train_data_path,
                                  args.model_input_shape,
                                  args.batch_size,
                                  use_cuda=use_cuda,
                                  mode='train')
    val_loader = get_dataloader(args.val_data_path,
                                args.model_input_shape,
                                args.batch_size,
                                use_cuda=use_cuda,
                                mode='val')

    # get tensorboard summary writer
    summary_writer = SummaryWriter(os.path.join(log_dir, 'tensorboard'))

    # check if classes match on train & val dataset
    assert train_loader.dataset.classes == val_loader.dataset.classes, 'class mismatch between train & val dataset'
    num_classes = len(train_loader.dataset.classes)
    print('Classes:', train_loader.dataset.classes)

    # get train model
    model = Classifier(args.model_type, num_classes,
                       args.head_conv_channel).to(device)
    summary(model, input_size=(3, ) + args.model_input_shape)

    if args.weights_path:
        model.load_state_dict(
            torch.load(args.weights_path, map_location=device))
        print('Load weights {}.'.format(args.weights_path))

    optimizer = get_optimizer(args.optimizer, model, args.learning_rate,
                              args.weight_decay)

    # Freeze feature extractor part for transfer learning
    print('Freeze feature extractor part.')
    for child in model.features.children():
        for param in child.parameters():
            param.requires_grad = False

    # Transfer training some epochs with frozen layers first if needed, to get a stable loss.
    initial_epoch = args.init_epoch
    epochs = args.init_epoch + args.transfer_epoch
    print("Transfer training stage")
    print(
        'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'
        .format(len(train_loader.dataset), len(val_loader.dataset),
                args.batch_size, args.model_input_shape))

    # Transfer train loop
    for epoch in range(initial_epoch, epochs):
        print('Epoch %d/%d' % (epoch, epochs))
        train(args, epoch, model, device, train_loader, optimizer, None,
              summary_writer)
        validate(args, epoch, epoch * len(train_loader), model, device,
                 val_loader, log_dir, summary_writer)
        checkpoint_clean(log_dir, max_keep=5)

    # Unfreeze the whole network for further tuning
    # NOTE: more GPU memory is required after unfreezing the body
    print("Unfreeze and continue training, to fine-tune.")
    for child in model.children():
        for param in child.parameters():
            param.requires_grad = True

    # apply learning rate decay only after unfreeze all layers
    # NOTE: PyTorch apply learning rate scheduler for every epoch, not batch
    #steps_per_epoch = max(1, len(train_loader.dataset)//args.batch_size)
    #decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch)
    decay_steps = args.total_epoch - args.init_epoch - args.transfer_epoch
    lr_scheduler = get_lr_scheduler(args.decay_type, optimizer, decay_steps)

    # Fine tune train loop
    for epoch in range(epochs, args.total_epoch):
        print('Epoch %d/%d' % (epoch, args.total_epoch))
        train(args, epoch, model, device, train_loader, optimizer,
              lr_scheduler, summary_writer)
        validate(args, epoch, epoch * len(train_loader), model, device,
                 val_loader, log_dir, summary_writer)
        checkpoint_clean(log_dir, max_keep=5)

    # Finally store model
    torch.save(model, os.path.join(log_dir, 'trained_final.pth'))
def main(args):
    log_dir = 'logs/000/'
    # get class info, add background class to match model & GT
    class_names = get_classes(args.classes_path)
    assert len(class_names) < 254, 'PNG image label only support less than 254 classes.'
    class_names = ['background'] + class_names
    num_classes = len(class_names)

    # callbacks for training process
    monitor = 'Jaccard'

    tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch')
    checkpoint = ModelCheckpoint(os.path.join(log_dir, 'ep{epoch:03d}-loss{loss:.3f}-Jaccard{Jaccard:.3f}-val_loss{val_loss:.3f}-val_Jaccard{val_Jaccard:.3f}.h5'),
        monitor='val_{}'.format(monitor),
        mode='max',
        verbose=1,
        save_weights_only=False,
        save_best_only=True,
        period=1)

    reduce_lr = ReduceLROnPlateau(monitor='val_{}'.format(monitor), factor=0.5, mode='max',
                patience=5, verbose=1, cooldown=0, min_lr=1e-6)
    early_stopping = EarlyStopping(monitor='val_{}'.format(monitor), min_delta=0, patience=100, verbose=1, mode='max')
    terminate_on_nan = TerminateOnNaN()

    callbacks=[tensorboard, checkpoint, reduce_lr, early_stopping, terminate_on_nan]


    # get train&val dataset
    dataset = get_data_list(args.dataset_file)
    if args.val_dataset_file:
        val_dataset = get_data_list(args.val_dataset_file)
        num_train = len(dataset)
        num_val = len(val_dataset)
        dataset.extend(val_dataset)
    else:
        val_split = args.val_split
        num_val = int(len(dataset)*val_split)
        num_train = len(dataset) - num_val

    # prepare train&val data generator
    train_generator = SegmentationGenerator(args.dataset_path, dataset[:num_train],
                                            args.batch_size,
                                            num_classes,
                                            resize_shape=args.model_input_shape[::-1],
                                            crop_shape=None,
                                            weighted_type=args.weighted_type,
                                            augment=True,
                                            do_ahisteq=False)

    valid_generator = SegmentationGenerator(args.dataset_path, dataset[num_train:],
                                            args.batch_size,
                                            num_classes,
                                            resize_shape=args.model_input_shape[::-1],
                                            crop_shape=None,
                                            weighted_type=args.weighted_type,
                                            augment=False,
                                            do_ahisteq=False)


    # prepare online evaluation callback
    if args.eval_online:
        eval_callback = EvalCallBack(args.dataset_path, dataset[num_train:], class_names, args.model_input_shape, args.model_pruning, log_dir, eval_epoch_interval=args.eval_epoch_interval, save_eval_checkpoint=args.save_eval_checkpoint)
        callbacks.append(eval_callback)

    # prepare optimizer
    #optimizer = Adam(lr=7e-4, epsilon=1e-8, decay=1e-6)
    optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=None)

    # prepare loss according to loss type & weigted type
    if args.weighted_type == 'balanced':
        classes_weights_path = os.path.join(args.dataset_path, 'classes_weights.txt')
        if os.path.isfile(classes_weights_path):
            weights = load_class_weights(classes_weights_path)
        else:
            weights = calculate_weigths_labels(train_generator, num_classes, save_path=args.dataset_path)
        losses = WeightedSparseCategoricalCrossEntropy(weights)
        sample_weight_mode = None
    elif args.weighted_type == 'adaptive':
        losses = sparse_crossentropy
        sample_weight_mode = 'temporal'
    elif args.weighted_type == None:
        losses = sparse_crossentropy
        sample_weight_mode = None
    else:
        raise ValueError('invalid weighted_type {}'.format(args.weighted_type))

    if args.loss == 'focal':
        warnings.warn("Focal loss doesn't support weighted class balance, will ignore related config")
        losses = softmax_focal_loss
        sample_weight_mode = None
    elif args.loss == 'crossentropy':
        # using crossentropy will keep the weigted type setting
        pass
    else:
        raise ValueError('invalid loss type {}'.format(args.loss))

    # prepare metric
    #metrics = {'pred_mask' : [Jaccard, sparse_accuracy_ignoring_last_label]}
    metrics = {'pred_mask' : Jaccard}

    # support multi-gpu training
    if args.gpu_num >= 2:
        # devices_list=["/gpu:0", "/gpu:1"]
        devices_list=["/gpu:{}".format(n) for n in range(args.gpu_num)]
        strategy = tf.distribute.MirroredStrategy(devices=devices_list)
        print ('Number of devices: {}'.format(strategy.num_replicas_in_sync))
        with strategy.scope():
            # get multi-gpu train model
            model = get_deeplabv3p_model(args.model_type, num_classes, args.model_input_shape, args.output_stride, args.freeze_level, weights_path=args.weights_path)
            # compile model
            model.compile(optimizer=optimizer, sample_weight_mode=sample_weight_mode,
                          loss = losses, metrics = metrics)
    else:
        # get normal train model
        model = get_deeplabv3p_model(args.model_type, num_classes, args.model_input_shape, args.output_stride, args.freeze_level, weights_path=args.weights_path)
        # compile model
        model.compile(optimizer=optimizer, sample_weight_mode=sample_weight_mode,
                      loss = losses, metrics = metrics)
    model.summary()

    # Transfer training some epochs with frozen layers first if needed, to get a stable loss.
    initial_epoch = args.init_epoch
    epochs = initial_epoch + args.transfer_epoch
    print("Transfer training stage")
    print('Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'.format(num_train, num_val, args.batch_size, args.model_input_shape))
    model.fit_generator(generator=train_generator,
                        steps_per_epoch=len(train_generator),
                        validation_data=valid_generator,
                        validation_steps=len(valid_generator),
                        epochs=epochs,
                        initial_epoch=initial_epoch,
                        verbose=1,
                        workers=1,
                        use_multiprocessing=False,
                        max_queue_size=10,
                        callbacks = callbacks)

    # Wait 2 seconds for next stage
    time.sleep(2)

    if args.decay_type:
        # rebuild optimizer to apply learning rate decay, only after
        # unfreeze all layers
        callbacks.remove(reduce_lr)
        steps_per_epoch = max(1, len(train_generator))
        decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch)
        optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=args.decay_type, decay_steps=decay_steps)

    # Unfreeze the whole network for further tuning
    # NOTE: more GPU memory is required after unfreezing the body
    print("Unfreeze and continue training, to fine-tune.")
    if args.gpu_num >= 2:
        with strategy.scope():
            for i in range(len(model.layers)):
                model.layers[i].trainable = True
            model.compile(optimizer=optimizer, sample_weight_mode=sample_weight_mode,
                          loss = losses, metrics = metrics) # recompile to apply the change

    else:
        for i in range(len(model.layers)):
            model.layers[i].trainable = True
        model.compile(optimizer=optimizer, sample_weight_mode=sample_weight_mode,
                      loss = losses, metrics = metrics) # recompile to apply the change

    print('Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'.format(num_train, num_val, args.batch_size, args.model_input_shape))
    model.fit_generator(generator=train_generator,
                        steps_per_epoch=len(train_generator),
                        validation_data=valid_generator,
                        validation_steps=len(valid_generator),
                        epochs=args.total_epoch,
                        initial_epoch=epochs,
                        verbose=1,
                        workers=1,
                        use_multiprocessing=False,
                        max_queue_size=10,
                        callbacks = callbacks)

    # Finally store model
    model.save(os.path.join(log_dir, 'trained_final.h5'))
Пример #11
0
def main(args):
    log_dir = 'logs/000'

    # get class info
    if args.classes_path:
        class_names = get_classes(args.classes_path)
    else:
        class_names = None

    # callbacks for training process
    logging = TensorBoard(log_dir=log_dir,
                          histogram_freq=0,
                          write_graph=False,
                          write_grads=False,
                          write_images=False,
                          update_freq='batch')
    checkpoint = ModelCheckpoint(os.path.join(
        log_dir,
        'ep{epoch:03d}-loss{loss:.3f}-acc{acc:.3f}-val_loss{val_loss:.3f}-val_acc{val_acc:.3f}.h5'
    ),
                                 monitor='val_acc',
                                 mode='max',
                                 verbose=1,
                                 save_weights_only=False,
                                 save_best_only=True,
                                 period=1)
    reduce_lr = ReduceLROnPlateau(monitor='val_acc',
                                  mode='max',
                                  factor=0.5,
                                  patience=10,
                                  verbose=1,
                                  cooldown=0,
                                  min_lr=1e-10)
    early_stopping = EarlyStopping(monitor='val_acc',
                                   mode='max',
                                   min_delta=0,
                                   patience=50,
                                   verbose=1)
    terminate_on_nan = TerminateOnNaN()
    checkpoint_clean = CheckpointCleanCallBack(log_dir, max_keep=5)
    #learn_rates = [0.05, 0.01, 0.005, 0.001, 0.0005]
    #lr_scheduler = LearningRateScheduler(lambda epoch: learn_rates[epoch // 30])

    callbacks = [
        logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan,
        checkpoint_clean
    ]

    # prepare train&val data generator
    train_generator = get_data_generator(args.train_data_path,
                                         args.model_input_shape,
                                         args.batch_size,
                                         class_names,
                                         mode='train')
    val_generator = get_data_generator(args.val_data_path,
                                       args.model_input_shape,
                                       args.batch_size,
                                       class_names,
                                       mode='val')

    # check if classes match on train & val dataset
    assert train_generator.class_indices == val_generator.class_indices, 'class mismatch between train & val dataset'
    if not class_names:
        class_names = list(train_generator.class_indices.keys())
    print('Classes:', class_names)

    # prepare optimizer
    optimizer = get_optimizer(args.optimizer,
                              args.learning_rate,
                              average_type=None,
                              decay_type=None)

    # get train model
    model, backbone_len = get_model(args.model_type, len(class_names),
                                    args.model_input_shape,
                                    args.head_conv_channel, args.weights_path)
    model.summary()

    # Freeze backbone part for transfer learning
    for i in range(backbone_len):
        model.layers[i].trainable = False
    print('Freeze the first {} layers of total {} layers.'.format(
        backbone_len, len(model.layers)))
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # Transfer training some epochs with frozen layers first if needed, to get a stable loss.
    initial_epoch = args.init_epoch
    epochs = initial_epoch + args.transfer_epoch
    print("Transfer training stage")
    print(
        'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'
        .format(train_generator.samples, val_generator.samples,
                args.batch_size, args.model_input_shape))
    model.fit_generator(
        train_generator,
        steps_per_epoch=train_generator.samples // args.batch_size,
        validation_data=val_generator,
        validation_steps=val_generator.samples // args.batch_size,
        epochs=epochs,
        initial_epoch=initial_epoch,
        #verbose=1,
        workers=1,
        use_multiprocessing=False,
        max_queue_size=10,
        callbacks=callbacks)

    # Wait 2 seconds for next stage
    time.sleep(2)

    if args.decay_type:
        # rebuild optimizer to apply learning rate decay, only after
        # unfreeze all layers
        callbacks.remove(reduce_lr)
        steps_per_epoch = max(1, train_generator.samples // args.batch_size)
        decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch -
                                         args.transfer_epoch)
        optimizer = get_optimizer(args.optimizer,
                                  args.learning_rate,
                                  average_type=None,
                                  decay_type=args.decay_type,
                                  decay_steps=decay_steps)

    # Unfreeze the whole network for further tuning
    # NOTE: more GPU memory is required after unfreezing the body
    for i in range(len(model.layers)):
        model.layers[i].trainable = True
    print("Unfreeze and continue training, to fine-tune.")
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    model.fit_generator(
        train_generator,
        steps_per_epoch=train_generator.samples // args.batch_size,
        validation_data=val_generator,
        validation_steps=val_generator.samples // args.batch_size,
        epochs=args.total_epoch,
        initial_epoch=epochs,
        #verbose=1,
        workers=1,
        use_multiprocessing=False,
        max_queue_size=10,
        callbacks=callbacks)

    # Finally store model
    model.save(os.path.join(log_dir, 'trained_final.h5'))
Пример #12
0
num_epochs = args.num_epochs
batch_size = args.batch_size
decay_epochs = args.decay_epochs
initial_learning_rate = args.initial_learning_rate
end_learning_rate = args.end_learning_rate
weight_decay = args.weight_decay
warmup_proportion = args.warmup_proportion
num_train_steps = int(
    (train_df.shape[0] * 1 - 1 / num_folds) / batch_size * decay_epochs)
num_warmup_steps = int(num_train_steps * warmup_proportion)
dropout_rate = args.dropout_rate
rnn_units = args.rnn_units
num_hidden_states = args.num_hidden_states

optimizer = model_utils.get_optimizer(initial_learning_rate, end_learning_rate,
                                      weight_decay, num_train_steps,
                                      num_warmup_steps)

loss_fn = model_utils.get_loss_function(from_logits=False)

transformer.Model.NUM_HIDDEN_STATES = num_hidden_states
transformer.Model.DROPOUT_RATE = dropout_rate
transformer.Model.RNN_UNITS = rnn_units

config = dataset.Config.from_pretrained(dataset.PATH,
                                        output_hidden_states=True)
model = transformer.Model.from_pretrained(dataset.PATH, config=config)

kfold = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=42)

for fold_num, (train_idx, valid_idx) in enumerate(