示例#1
0
    def __init__(self, lr, lr_mode, lr_interval, lr_value, total_epochs,
                 steps_per_epoch, initial_epoch):

        super(OptionalLearningRateSchedule, self).__init__()
        self.lr = lr
        self.lr_mode = lr_mode
        self.lr_interval = lr_interval
        self.lr_value = lr_value
        self.total_epochs = total_epochs
        self.steps_per_epoch = steps_per_epoch
        self.initial_epoch = initial_epoch

        if self.lr_mode == 'exponential':
            decay_epochs = [int(e) for e in self.lr_interval.split(',')]
            lr_values = [
                self.lr * (self.lr_value**k)
                for k in range(len(decay_epochs) + 1)
            ]
            self.lr_scheduler = PiecewiseConstantDecay(decay_epochs, lr_values)

        elif self.lr_mode == 'cosine':
            self.lr_scheduler = CosineDecay(self.lr, self.total_epochs)

        elif self.lr_mode == 'constant':
            self.lr_scheduler = lambda x: self.lr

        else:
            raise ValueError(self.lr_mode)
def get_lr_scheduler(learning_rate, decay_type, decay_steps):
    if decay_type:
        decay_type = decay_type.lower()

    if decay_type == None:
        lr_scheduler = learning_rate
    elif decay_type == 'cosine':
        lr_scheduler = CosineDecay(
            initial_learning_rate=learning_rate,
            decay_steps=decay_steps,
            alpha=0.2)  # use 0.2*learning_rate as final minimum learning rate
    elif decay_type == 'exponential':
        lr_scheduler = ExponentialDecay(initial_learning_rate=learning_rate,
                                        decay_steps=decay_steps,
                                        decay_rate=0.9)
    elif decay_type == 'polynomial':
        lr_scheduler = PolynomialDecay(initial_learning_rate=learning_rate,
                                       decay_steps=decay_steps,
                                       end_learning_rate=learning_rate / 100)
    elif decay_type == 'piecewise_constant':
        #apply a piecewise constant lr scheduler, including warmup stage
        boundaries = [500, int(decay_steps * 0.9), decay_steps]
        values = [
            0.001, learning_rate, learning_rate / 10., learning_rate / 100.
        ]
        lr_scheduler = PiecewiseConstantDecay(boundaries=boundaries,
                                              values=values)
    else:
        raise ValueError('Unsupported lr decay type')

    return lr_scheduler
示例#3
0
def get_cosine_decay_scheduler(initial_learning_rate: float,
                               decay_steps,
                               alpha: float = 0.0):
    return LearningRateScheduler(CosineDecay(
        initial_learning_rate,
        decay_steps,
        alpha=alpha,
    ),
                                 verbose=True)
示例#4
0
    def __init__(self, args, steps_per_epoch, initial_epoch):
        super(OptionalLearningRateSchedule, self).__init__()
        self.args = args
        self.steps_per_epoch = steps_per_epoch
        self.initial_epoch = initial_epoch

        if self.args.lr_mode == 'cosine':
            self.lr_scheduler = CosineDecay(self.args.lr, self.args.epochs)

        elif self.args.lr_mode == 'constant':
            self.lr_scheduler = lambda x: self.args.lr
示例#5
0
def PickLearnRateDecay(option, ParamDic={}):
    """
    Build a changing learning rate object.

    It gets the type of changing learning rate needed and the parameters that the users gives.
    It adds all the other default parameters and return the object.

    :param option: string. The type of learning rate desired. Supports:
                                                                        CosineDecay
                                                                        CosineDecayRestarts
                                                                        LinearCosineDecay
    :param ParamDic: dictionary. If a parameter is in the dictionary then take that value, if not then use defaults.
    :return: A keras learning_rate_schedule object according to the request in the option parameter.
    """

    # Defaults parameters
    DefltInitLearning = 0.001  # 0.0001
    DefltSteps = 40
    DefltAlpha = 0.2
    DefltTMul = 0.2
    DefltMmul = 1.0
    defltNumPer = 0.5
    defltBeta = 0.001

    # Find parameters values.
    # If the values in the dic. take the value if not then use default

    ILR = ParamDic['initial_learning_rate'] if 'initial_learning_rate' in ParamDic else DefltInitLearning
    DS = ParamDic['decay_steps'] if 'decay_steps' in ParamDic else DefltSteps
    Alp = ParamDic['alpha'] if 'alpha' in ParamDic else DefltAlpha
    tMul = ParamDic['t_mul'] if 't_mul' in ParamDic else DefltTMul
    mMul = ParamDic['m_mul'] if 'm_mul' in ParamDic else DefltMmul
    NPer = ParamDic['num_periods'] if 'num_periods' in ParamDic else defltNumPer
    Beta = ParamDic['beta'] if 'beta' in ParamDic else defltBeta

    # pick the right decay according to option
    if option == 'CosineDecay':
        lr_decayed = CosineDecay(initial_learning_rate=ILR, decay_steps=DS, alpha=Alp, name='CosineDecay')
    elif option == 'CosineDecayRestarts':
        lr_decayed = CosineDecayRestarts(initial_learning_rate=ILR, first_decay_steps=DS, t_mul=tMul, m_mul=mMul,
                                         alpha=Alp, name='CosineDecayRestarts')
    elif option == 'LinearCosineDecay':
        lr_decayed = LinearCosineDecay(initial_learning_rate=ILR, decay_steps=DS, num_periods=NPer, alpha=Alp,
                                       beta=Beta, name='CosineDecayRestarts')
    else:
        print('Incorrect parameter:' + str(option))
        return

    return lr_decayed
def get_lr_scheduler(learning_rate, decay_type, decay_steps):
    if decay_type:
        decay_type = decay_type.lower()

    if decay_type == None:
        lr_scheduler = learning_rate
    elif decay_type == 'cosine':
        lr_scheduler = CosineDecay(initial_learning_rate=learning_rate, decay_steps=decay_steps)
    elif decay_type == 'exponential':
        lr_scheduler = ExponentialDecay(initial_learning_rate=learning_rate, decay_steps=decay_steps, decay_rate=0.9)
    elif decay_type == 'polynomial':
        lr_scheduler = PolynomialDecay(initial_learning_rate=learning_rate, decay_steps=decay_steps, end_learning_rate=learning_rate/100)
    else:
        raise ValueError('Unsupported lr decay type')

    return lr_scheduler
示例#7
0
def zeroshot_train(t_depth,
                   t_width,
                   t_wght_path,
                   s_depth,
                   s_width,
                   seed=42,
                   savedir=None,
                   dataset='cifar10',
                   sample_per_class=0):

    set_seed(seed)

    train_name = '%s_T-%d-%d_S-%d-%d_seed_%d' % (dataset, t_depth, t_width,
                                                 s_depth, s_width, seed)
    if sample_per_class > 0:
        train_name += "-m%d" % sample_per_class
    log_filename = train_name + '_training_log.csv'

    # save dir
    if not savedir:
        savedir = 'zeroshot_' + train_name
    full_savedir = os.path.join(os.getcwd(), savedir)
    mkdir(full_savedir)

    log_filepath = os.path.join(full_savedir, log_filename)
    logger = CustomizedCSVLogger(log_filepath)

    # Teacher
    teacher = WideResidualNetwork(t_depth,
                                  t_width,
                                  input_shape=Config.input_dim,
                                  dropout_rate=0.0,
                                  output_activations=True,
                                  has_softmax=False)

    teacher.load_weights(t_wght_path)
    teacher.trainable = False

    # Student
    student = WideResidualNetwork(s_depth,
                                  s_width,
                                  input_shape=Config.input_dim,
                                  dropout_rate=0.0,
                                  output_activations=True,
                                  has_softmax=False)

    if sample_per_class > 0:
        s_decay_steps = Config.n_outer_loop * Config.n_s_in_loop + Config.n_outer_loop
    else:
        s_decay_steps = Config.n_outer_loop * Config.n_s_in_loop

    s_optim = Adam(learning_rate=CosineDecay(Config.student_init_lr,
                                             decay_steps=s_decay_steps))
    # ---------------------------------------------------------------------------
    # Generator
    generator = NavieGenerator(input_dim=Config.z_dim)
    g_optim = Adam(learning_rate=CosineDecay(Config.generator_init_lr,
                                             decay_steps=Config.n_outer_loop *
                                             Config.n_g_in_loop))
    # ---------------------------------------------------------------------------
    # Test data
    if dataset == 'cifar10':
        (x_train, y_train_lbl), (x_test, y_test) = get_cifar10_data()
    elif dataset == 'fashion_mnist':
        (x_train, y_train_lbl), (x_test, y_test) = get_fashion_mnist_data()
    else:
        raise ValueError("Only Cifar-10 and Fashion-MNIST supported !!")
    test_data_loader = tf.data.Dataset.from_tensor_slices(
        (x_test, y_test)).batch(200)
    # ---------------------------------------------------------------------------
    # Train data (if using train data)
    train_dataflow = None
    if sample_per_class > 0:
        # sample first
        x_train, y_train_lbl = \
            balance_sampling(x_train, y_train_lbl, data_per_class=sample_per_class)
        datagen = ImageDataGenerator(width_shift_range=4,
                                     height_shift_range=4,
                                     horizontal_flip=True,
                                     vertical_flip=False,
                                     rescale=None,
                                     fill_mode='reflect')
        datagen.fit(x_train)
        y_train = to_categorical(y_train_lbl)
        train_dataflow = datagen.flow(x_train,
                                      y_train,
                                      batch_size=Config.batch_size,
                                      shuffle=True)

    # Generator loss metrics
    g_loss_met = tf.keras.metrics.Mean()

    # Student loss metrics
    s_loss_met = tf.keras.metrics.Mean()

    #
    n_cls_t_pred_metric = tf.keras.metrics.Mean()
    n_cls_s_pred_metric = tf.keras.metrics.Mean()

    max_g_grad_norm_metric = tf.keras.metrics.Mean()
    max_s_grad_norm_metric = tf.keras.metrics.Mean()

    test_data_loader = tf.data.Dataset.from_tensor_slices(
        (x_test, y_test)).batch(200)

    teacher.trainable = False

    # checkpoint
    chkpt_dict = {
        'teacher': teacher,
        'student': student,
        'generator': generator,
        's_optim': s_optim,
        'g_optim': g_optim,
    }
    # Saving checkpoint
    ckpt = tf.train.Checkpoint(**chkpt_dict)
    ckpt_manager = tf.train.CheckpointManager(ckpt,
                                              os.path.join(savedir, 'chpt'),
                                              max_to_keep=2)
    # ==========================================================================
    # if a checkpoint exists, restore the latest checkpoint.
    start_iter = 0
    if ckpt_manager.latest_checkpoint:
        ckpt.restore(ckpt_manager.latest_checkpoint)
        print('Latest checkpoint restored!!')
        with open(os.path.join(savedir, 'chpt', 'iteration'), 'r') as f:
            start_iter = int(f.read())
        logger = CustomizedCSVLogger(log_filepath, append=True)

    for iter_ in range(start_iter, Config.n_outer_loop):
        iter_stime = time.time()

        max_s_grad_norm = 0
        max_g_grad_norm = 0
        # sample from latern space to have an image
        z_val = tf.random.normal([Config.batch_size, Config.z_dim])

        # Generator training
        loss = 0
        for ng in range(Config.n_g_in_loop):
            loss, g_grad_norm = train_gen(generator, g_optim, z_val, teacher,
                                          student)
            max_g_grad_norm = max(max_g_grad_norm, g_grad_norm.numpy())
            g_loss_met(loss)

        # ==========================================================================
        # Student training
        loss = 0
        pseudo_imgs, t_logits, t_acts = prepare_train_student(
            generator, z_val, teacher)
        for ns in range(Config.n_s_in_loop):
            # pseudo_imgs, t_logits, t_acts = prepare_train_student(generator, z_val, teacher)
            loss, s_grad_norm, s_logits = train_student(
                pseudo_imgs, s_optim, t_logits, t_acts, student)
            max_s_grad_norm = max(max_s_grad_norm, s_grad_norm.numpy())

            n_cls_t_pred = len(np.unique(np.argmax(t_logits, axis=-1)))
            n_cls_s_pred = len(np.unique(np.argmax(s_logits, axis=-1)))
            # logging
            s_loss_met(loss)
            n_cls_t_pred_metric(n_cls_t_pred)
            n_cls_s_pred_metric(n_cls_s_pred)
        # ==========================================================================
        # train if provided n samples
        if train_dataflow:
            x_batch_train, y_batch_train = next(train_dataflow)
            t_logits, t_acts = forward(teacher, x_batch_train, training=False)
            loss = train_student_with_labels(student, s_optim, x_batch_train,
                                             t_logits, t_acts, y_batch_train)
        # ==========================================================================

        # --------------------------------------------------------------------
        iter_etime = time.time()
        max_g_grad_norm_metric(max_g_grad_norm)
        max_s_grad_norm_metric(max_s_grad_norm)
        # --------------------------------------------------------------------
        is_last_epoch = (iter_ == Config.n_outer_loop - 1)

        if iter_ != 0 and (iter_ % Config.print_freq == 0 or is_last_epoch):
            n_cls_t_pred_avg = n_cls_t_pred_metric.result().numpy()
            n_cls_s_pred_avg = n_cls_s_pred_metric.result().numpy()
            time_per_epoch = iter_etime - iter_stime

            s_loss = s_loss_met.result().numpy()
            g_loss = g_loss_met.result().numpy()
            max_g_grad_norm_avg = max_g_grad_norm_metric.result().numpy()
            max_s_grad_norm_avg = max_s_grad_norm_metric.result().numpy()

            # build ordered dict
            row_dict = OrderedDict()

            row_dict['time_per_epoch'] = time_per_epoch
            row_dict['epoch'] = iter_
            row_dict['generator_loss'] = g_loss
            row_dict['student_kd_loss'] = s_loss
            row_dict['n_cls_t_pred_avg'] = n_cls_t_pred_avg
            row_dict['n_cls_s_pred_avg'] = n_cls_s_pred_avg
            row_dict['max_g_grad_norm_avg'] = max_g_grad_norm_avg
            row_dict['max_s_grad_norm_avg'] = max_s_grad_norm_avg

            if sample_per_class > 0:
                s_optim_iter = iter_ * (Config.n_s_in_loop + 1)
            else:
                s_optim_iter = iter_ * Config.n_s_in_loop
            row_dict['s_optim_lr'] = s_optim.learning_rate(
                s_optim_iter).numpy()
            row_dict['g_optim_lr'] = g_optim.learning_rate(iter_).numpy()

            pprint.pprint(row_dict)
        # ======================================================================
        if iter_ != 0 and (iter_ % Config.log_freq == 0 or is_last_epoch):
            # calculate acc
            test_accuracy = evaluate(test_data_loader, student).numpy()
            row_dict['test_acc'] = test_accuracy
            logger.log_with_order(row_dict)
            print('Test Accuracy: ', test_accuracy)

            # for check poing
            ckpt_save_path = ckpt_manager.save()
            print('Saving checkpoint for epoch {} at {}'.format(
                iter_ + 1, ckpt_save_path))
            with open(os.path.join(savedir, 'chpt', 'iteration'), 'w') as f:
                f.write(str(iter_ + 1))

            s_loss_met.reset_states()
            g_loss_met.reset_states()
            max_g_grad_norm_metric.reset_states()
            max_s_grad_norm_metric.reset_states()

        if iter_ != 0 and (iter_ % 5000 == 0 or is_last_epoch):
            generator.save_weights(
                join(full_savedir, "generator_i{}.h5".format(iter_)))
            student.save_weights(
                join(full_savedir, "student_i{}.h5".format(iter_)))
示例#8
0
def train(model, train_dataset, val_dataset):
    flag_utils.log_flag()
    """Configure the model for transfer learning"""
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny()
        else:
            model_pretrained = YoloV3()
        model_pretrained.load_weights(FLAGS.weights, by_name=True)

        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            yolo_utils.freeze_all(model.get_layer('yolo_darknet'),
                                  until_layer=FLAGS.freeze)

        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(
                        model_pretrained.get_layer(l.name).get_weights())
                    yolo_utils.freeze_all(l, until_layer=FLAGS.freeze)

    else:
        # All other transfer require matching classes
        model.load_weights(weight_path=FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            yolo_utils.freeze_all(darknet, until_layer=FLAGS.freeze)
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            yolo_utils.freeze_all(model, until_layer=FLAGS.freeze)

    lr_scheduler = CosineDecay(initial_learning_rate=FLAGS.learning_rate,
                               decay_steps=FLAGS.decay_steps,
                               alpha=FLAGS.lr_alpha)
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_scheduler)
    loss = [
        YoloLoss(model.anchors[mask], classes=model.num_classes)
        for mask in model.anchor_masks
    ]
    """Start training"""
    if FLAGS.fit == 'eager_tf':
        run_eager_fit(model, train_dataset, val_dataset, optimizer, loss)
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))
        checkpoint_path = os_path.join(
            os_path.dirname(FLAGS.weights), "checkpoints",
            'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5')
        os_path.make_dir(os_path.dirname(checkpoint_path))
        plt_his = PlotHistory()
        callbacks = [
            # ReduceLROnPlateau(verbose=1, patience=FLAGS.lr_patience),
            EarlyStopping(patience=FLAGS.early_stop, verbose=1),
            ModelCheckpoint(checkpoint_path,
                            monitor='val_loss',
                            verbose=0,
                            save_best_only=False,
                            save_weights_only=True,
                            mode='auto',
                            save_freq='epoch'),
            # TensorBoard(log_dir='../logs'),
            LogHistory(),
            plt_his
        ]
        try:
            history = model.fit(train_dataset,
                                epochs=FLAGS.epochs,
                                callbacks=callbacks,
                                validation_data=val_dataset)
        except Exception as e:
            raise e
        finally:
            plt_his.plot(save_path=os_path.join(
                os_path.dirname(FLAGS.weights),
                f"{datetime.datetime.now().strftime('%Y%m%d-%H%M')}.png"))
示例#9
0
        ops=best_enc['module_operations'],
        data_format=model_config['data_format'])

    batch_size = 250
    # model_config['num_stacks'] = 0

    print(model_config)

    inputs = tf.keras.layers.Input(x_train.shape[1:], batch_size)
    net_outputs = build_keras_model(model_spec, inputs,
        model_spec.ops, model_config)
    model = tf.keras.Model(inputs=inputs, outputs=net_outputs)

    num_train_imgs = int(x_train.shape[0]*(1-val_split))
    decay_steps = int(epochs * num_train_imgs / batch_size)
    cos_decay = CosineDecay(model_config['learning_rate'], decay_steps)

    model.compile(
        loss='categorical_crossentropy',
        optimizer=RMSprop(cos_decay),
        metrics=['accuracy'])
    plot_model(model, to_file='model_nas_plot.png',
        show_shapes=True, show_layer_names=True)

    # model.summary()
    # print(len(model.layers), "layers")
    # print("batch_size:", batch_size)
    # print(model_config['data_format'])
    # print(x_train.shape)

    # # Prepare model model saving directory.
示例#10
0
            )
        ))
        input_shape = (32, 32, input_shape[2])
    model.add(arch(weights=None, classes=n_class, input_shape=input_shape,
                   include_top=False))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dropout(0.3))
    model.add(keras.layers.Dense(2 ** (n_class - 1), activation='relu'))
    model.add(keras.layers.Dropout(0.3))
    model.add(keras.layers.Dense(2 ** (n_class - 2), activation='relu'))
    model.add(keras.layers.Dense(n_class, activation='softmax'))

model.summary()

optim = Adam(lr=args.lr)
model.compile(loss='categorical_crossentropy', optimizer=optim,
              metrics=['accuracy'])

# callbacks
checkpoint = ModelCheckpoint(model_path, monitor='val_accuracy', verbose=1,
                             save_best_only=True, mode='max')
lrate = LearningRateScheduler(CosineDecay(0.0001, args.epochs))
callbacks = [checkpoint, lrate]

model.fit(x_train, to_categorical(y_train, n_class),
          batch_size=args.batch_size,
          epochs=args.epochs,
          validation_data=(x_val, to_categorical(y_val, n_class)),
          callbacks=callbacks)