def __init__(self, lr, lr_mode, lr_interval, lr_value, total_epochs, steps_per_epoch, initial_epoch): super(OptionalLearningRateSchedule, self).__init__() self.lr = lr self.lr_mode = lr_mode self.lr_interval = lr_interval self.lr_value = lr_value self.total_epochs = total_epochs self.steps_per_epoch = steps_per_epoch self.initial_epoch = initial_epoch if self.lr_mode == 'exponential': decay_epochs = [int(e) for e in self.lr_interval.split(',')] lr_values = [ self.lr * (self.lr_value**k) for k in range(len(decay_epochs) + 1) ] self.lr_scheduler = PiecewiseConstantDecay(decay_epochs, lr_values) elif self.lr_mode == 'cosine': self.lr_scheduler = CosineDecay(self.lr, self.total_epochs) elif self.lr_mode == 'constant': self.lr_scheduler = lambda x: self.lr else: raise ValueError(self.lr_mode)
def get_lr_scheduler(learning_rate, decay_type, decay_steps): if decay_type: decay_type = decay_type.lower() if decay_type == None: lr_scheduler = learning_rate elif decay_type == 'cosine': lr_scheduler = CosineDecay( initial_learning_rate=learning_rate, decay_steps=decay_steps, alpha=0.2) # use 0.2*learning_rate as final minimum learning rate elif decay_type == 'exponential': lr_scheduler = ExponentialDecay(initial_learning_rate=learning_rate, decay_steps=decay_steps, decay_rate=0.9) elif decay_type == 'polynomial': lr_scheduler = PolynomialDecay(initial_learning_rate=learning_rate, decay_steps=decay_steps, end_learning_rate=learning_rate / 100) elif decay_type == 'piecewise_constant': #apply a piecewise constant lr scheduler, including warmup stage boundaries = [500, int(decay_steps * 0.9), decay_steps] values = [ 0.001, learning_rate, learning_rate / 10., learning_rate / 100. ] lr_scheduler = PiecewiseConstantDecay(boundaries=boundaries, values=values) else: raise ValueError('Unsupported lr decay type') return lr_scheduler
def get_cosine_decay_scheduler(initial_learning_rate: float, decay_steps, alpha: float = 0.0): return LearningRateScheduler(CosineDecay( initial_learning_rate, decay_steps, alpha=alpha, ), verbose=True)
def __init__(self, args, steps_per_epoch, initial_epoch): super(OptionalLearningRateSchedule, self).__init__() self.args = args self.steps_per_epoch = steps_per_epoch self.initial_epoch = initial_epoch if self.args.lr_mode == 'cosine': self.lr_scheduler = CosineDecay(self.args.lr, self.args.epochs) elif self.args.lr_mode == 'constant': self.lr_scheduler = lambda x: self.args.lr
def PickLearnRateDecay(option, ParamDic={}): """ Build a changing learning rate object. It gets the type of changing learning rate needed and the parameters that the users gives. It adds all the other default parameters and return the object. :param option: string. The type of learning rate desired. Supports: CosineDecay CosineDecayRestarts LinearCosineDecay :param ParamDic: dictionary. If a parameter is in the dictionary then take that value, if not then use defaults. :return: A keras learning_rate_schedule object according to the request in the option parameter. """ # Defaults parameters DefltInitLearning = 0.001 # 0.0001 DefltSteps = 40 DefltAlpha = 0.2 DefltTMul = 0.2 DefltMmul = 1.0 defltNumPer = 0.5 defltBeta = 0.001 # Find parameters values. # If the values in the dic. take the value if not then use default ILR = ParamDic['initial_learning_rate'] if 'initial_learning_rate' in ParamDic else DefltInitLearning DS = ParamDic['decay_steps'] if 'decay_steps' in ParamDic else DefltSteps Alp = ParamDic['alpha'] if 'alpha' in ParamDic else DefltAlpha tMul = ParamDic['t_mul'] if 't_mul' in ParamDic else DefltTMul mMul = ParamDic['m_mul'] if 'm_mul' in ParamDic else DefltMmul NPer = ParamDic['num_periods'] if 'num_periods' in ParamDic else defltNumPer Beta = ParamDic['beta'] if 'beta' in ParamDic else defltBeta # pick the right decay according to option if option == 'CosineDecay': lr_decayed = CosineDecay(initial_learning_rate=ILR, decay_steps=DS, alpha=Alp, name='CosineDecay') elif option == 'CosineDecayRestarts': lr_decayed = CosineDecayRestarts(initial_learning_rate=ILR, first_decay_steps=DS, t_mul=tMul, m_mul=mMul, alpha=Alp, name='CosineDecayRestarts') elif option == 'LinearCosineDecay': lr_decayed = LinearCosineDecay(initial_learning_rate=ILR, decay_steps=DS, num_periods=NPer, alpha=Alp, beta=Beta, name='CosineDecayRestarts') else: print('Incorrect parameter:' + str(option)) return return lr_decayed
def get_lr_scheduler(learning_rate, decay_type, decay_steps): if decay_type: decay_type = decay_type.lower() if decay_type == None: lr_scheduler = learning_rate elif decay_type == 'cosine': lr_scheduler = CosineDecay(initial_learning_rate=learning_rate, decay_steps=decay_steps) elif decay_type == 'exponential': lr_scheduler = ExponentialDecay(initial_learning_rate=learning_rate, decay_steps=decay_steps, decay_rate=0.9) elif decay_type == 'polynomial': lr_scheduler = PolynomialDecay(initial_learning_rate=learning_rate, decay_steps=decay_steps, end_learning_rate=learning_rate/100) else: raise ValueError('Unsupported lr decay type') return lr_scheduler
def zeroshot_train(t_depth, t_width, t_wght_path, s_depth, s_width, seed=42, savedir=None, dataset='cifar10', sample_per_class=0): set_seed(seed) train_name = '%s_T-%d-%d_S-%d-%d_seed_%d' % (dataset, t_depth, t_width, s_depth, s_width, seed) if sample_per_class > 0: train_name += "-m%d" % sample_per_class log_filename = train_name + '_training_log.csv' # save dir if not savedir: savedir = 'zeroshot_' + train_name full_savedir = os.path.join(os.getcwd(), savedir) mkdir(full_savedir) log_filepath = os.path.join(full_savedir, log_filename) logger = CustomizedCSVLogger(log_filepath) # Teacher teacher = WideResidualNetwork(t_depth, t_width, input_shape=Config.input_dim, dropout_rate=0.0, output_activations=True, has_softmax=False) teacher.load_weights(t_wght_path) teacher.trainable = False # Student student = WideResidualNetwork(s_depth, s_width, input_shape=Config.input_dim, dropout_rate=0.0, output_activations=True, has_softmax=False) if sample_per_class > 0: s_decay_steps = Config.n_outer_loop * Config.n_s_in_loop + Config.n_outer_loop else: s_decay_steps = Config.n_outer_loop * Config.n_s_in_loop s_optim = Adam(learning_rate=CosineDecay(Config.student_init_lr, decay_steps=s_decay_steps)) # --------------------------------------------------------------------------- # Generator generator = NavieGenerator(input_dim=Config.z_dim) g_optim = Adam(learning_rate=CosineDecay(Config.generator_init_lr, decay_steps=Config.n_outer_loop * Config.n_g_in_loop)) # --------------------------------------------------------------------------- # Test data if dataset == 'cifar10': (x_train, y_train_lbl), (x_test, y_test) = get_cifar10_data() elif dataset == 'fashion_mnist': (x_train, y_train_lbl), (x_test, y_test) = get_fashion_mnist_data() else: raise ValueError("Only Cifar-10 and Fashion-MNIST supported !!") test_data_loader = tf.data.Dataset.from_tensor_slices( (x_test, y_test)).batch(200) # --------------------------------------------------------------------------- # Train data (if using train data) train_dataflow = None if sample_per_class > 0: # sample first x_train, y_train_lbl = \ balance_sampling(x_train, y_train_lbl, data_per_class=sample_per_class) datagen = ImageDataGenerator(width_shift_range=4, height_shift_range=4, horizontal_flip=True, vertical_flip=False, rescale=None, fill_mode='reflect') datagen.fit(x_train) y_train = to_categorical(y_train_lbl) train_dataflow = datagen.flow(x_train, y_train, batch_size=Config.batch_size, shuffle=True) # Generator loss metrics g_loss_met = tf.keras.metrics.Mean() # Student loss metrics s_loss_met = tf.keras.metrics.Mean() # n_cls_t_pred_metric = tf.keras.metrics.Mean() n_cls_s_pred_metric = tf.keras.metrics.Mean() max_g_grad_norm_metric = tf.keras.metrics.Mean() max_s_grad_norm_metric = tf.keras.metrics.Mean() test_data_loader = tf.data.Dataset.from_tensor_slices( (x_test, y_test)).batch(200) teacher.trainable = False # checkpoint chkpt_dict = { 'teacher': teacher, 'student': student, 'generator': generator, 's_optim': s_optim, 'g_optim': g_optim, } # Saving checkpoint ckpt = tf.train.Checkpoint(**chkpt_dict) ckpt_manager = tf.train.CheckpointManager(ckpt, os.path.join(savedir, 'chpt'), max_to_keep=2) # ========================================================================== # if a checkpoint exists, restore the latest checkpoint. start_iter = 0 if ckpt_manager.latest_checkpoint: ckpt.restore(ckpt_manager.latest_checkpoint) print('Latest checkpoint restored!!') with open(os.path.join(savedir, 'chpt', 'iteration'), 'r') as f: start_iter = int(f.read()) logger = CustomizedCSVLogger(log_filepath, append=True) for iter_ in range(start_iter, Config.n_outer_loop): iter_stime = time.time() max_s_grad_norm = 0 max_g_grad_norm = 0 # sample from latern space to have an image z_val = tf.random.normal([Config.batch_size, Config.z_dim]) # Generator training loss = 0 for ng in range(Config.n_g_in_loop): loss, g_grad_norm = train_gen(generator, g_optim, z_val, teacher, student) max_g_grad_norm = max(max_g_grad_norm, g_grad_norm.numpy()) g_loss_met(loss) # ========================================================================== # Student training loss = 0 pseudo_imgs, t_logits, t_acts = prepare_train_student( generator, z_val, teacher) for ns in range(Config.n_s_in_loop): # pseudo_imgs, t_logits, t_acts = prepare_train_student(generator, z_val, teacher) loss, s_grad_norm, s_logits = train_student( pseudo_imgs, s_optim, t_logits, t_acts, student) max_s_grad_norm = max(max_s_grad_norm, s_grad_norm.numpy()) n_cls_t_pred = len(np.unique(np.argmax(t_logits, axis=-1))) n_cls_s_pred = len(np.unique(np.argmax(s_logits, axis=-1))) # logging s_loss_met(loss) n_cls_t_pred_metric(n_cls_t_pred) n_cls_s_pred_metric(n_cls_s_pred) # ========================================================================== # train if provided n samples if train_dataflow: x_batch_train, y_batch_train = next(train_dataflow) t_logits, t_acts = forward(teacher, x_batch_train, training=False) loss = train_student_with_labels(student, s_optim, x_batch_train, t_logits, t_acts, y_batch_train) # ========================================================================== # -------------------------------------------------------------------- iter_etime = time.time() max_g_grad_norm_metric(max_g_grad_norm) max_s_grad_norm_metric(max_s_grad_norm) # -------------------------------------------------------------------- is_last_epoch = (iter_ == Config.n_outer_loop - 1) if iter_ != 0 and (iter_ % Config.print_freq == 0 or is_last_epoch): n_cls_t_pred_avg = n_cls_t_pred_metric.result().numpy() n_cls_s_pred_avg = n_cls_s_pred_metric.result().numpy() time_per_epoch = iter_etime - iter_stime s_loss = s_loss_met.result().numpy() g_loss = g_loss_met.result().numpy() max_g_grad_norm_avg = max_g_grad_norm_metric.result().numpy() max_s_grad_norm_avg = max_s_grad_norm_metric.result().numpy() # build ordered dict row_dict = OrderedDict() row_dict['time_per_epoch'] = time_per_epoch row_dict['epoch'] = iter_ row_dict['generator_loss'] = g_loss row_dict['student_kd_loss'] = s_loss row_dict['n_cls_t_pred_avg'] = n_cls_t_pred_avg row_dict['n_cls_s_pred_avg'] = n_cls_s_pred_avg row_dict['max_g_grad_norm_avg'] = max_g_grad_norm_avg row_dict['max_s_grad_norm_avg'] = max_s_grad_norm_avg if sample_per_class > 0: s_optim_iter = iter_ * (Config.n_s_in_loop + 1) else: s_optim_iter = iter_ * Config.n_s_in_loop row_dict['s_optim_lr'] = s_optim.learning_rate( s_optim_iter).numpy() row_dict['g_optim_lr'] = g_optim.learning_rate(iter_).numpy() pprint.pprint(row_dict) # ====================================================================== if iter_ != 0 and (iter_ % Config.log_freq == 0 or is_last_epoch): # calculate acc test_accuracy = evaluate(test_data_loader, student).numpy() row_dict['test_acc'] = test_accuracy logger.log_with_order(row_dict) print('Test Accuracy: ', test_accuracy) # for check poing ckpt_save_path = ckpt_manager.save() print('Saving checkpoint for epoch {} at {}'.format( iter_ + 1, ckpt_save_path)) with open(os.path.join(savedir, 'chpt', 'iteration'), 'w') as f: f.write(str(iter_ + 1)) s_loss_met.reset_states() g_loss_met.reset_states() max_g_grad_norm_metric.reset_states() max_s_grad_norm_metric.reset_states() if iter_ != 0 and (iter_ % 5000 == 0 or is_last_epoch): generator.save_weights( join(full_savedir, "generator_i{}.h5".format(iter_))) student.save_weights( join(full_savedir, "student_i{}.h5".format(iter_)))
def train(model, train_dataset, val_dataset): flag_utils.log_flag() """Configure the model for transfer learning""" if FLAGS.transfer == 'none': pass # Nothing to do elif FLAGS.transfer in ['darknet', 'no_output']: if FLAGS.tiny: model_pretrained = YoloV3Tiny() else: model_pretrained = YoloV3() model_pretrained.load_weights(FLAGS.weights, by_name=True) if FLAGS.transfer == 'darknet': model.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) yolo_utils.freeze_all(model.get_layer('yolo_darknet'), until_layer=FLAGS.freeze) elif FLAGS.transfer == 'no_output': for l in model.layers: if not l.name.startswith('yolo_output'): l.set_weights( model_pretrained.get_layer(l.name).get_weights()) yolo_utils.freeze_all(l, until_layer=FLAGS.freeze) else: # All other transfer require matching classes model.load_weights(weight_path=FLAGS.weights) if FLAGS.transfer == 'fine_tune': # freeze darknet and fine tune other layers darknet = model.get_layer('yolo_darknet') yolo_utils.freeze_all(darknet, until_layer=FLAGS.freeze) elif FLAGS.transfer == 'frozen': # freeze everything yolo_utils.freeze_all(model, until_layer=FLAGS.freeze) lr_scheduler = CosineDecay(initial_learning_rate=FLAGS.learning_rate, decay_steps=FLAGS.decay_steps, alpha=FLAGS.lr_alpha) optimizer = tf.keras.optimizers.Adam(learning_rate=lr_scheduler) loss = [ YoloLoss(model.anchors[mask], classes=model.num_classes) for mask in model.anchor_masks ] """Start training""" if FLAGS.fit == 'eager_tf': run_eager_fit(model, train_dataset, val_dataset, optimizer, loss) else: model.compile(optimizer=optimizer, loss=loss, run_eagerly=(FLAGS.mode == 'eager_fit')) checkpoint_path = os_path.join( os_path.dirname(FLAGS.weights), "checkpoints", 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5') os_path.make_dir(os_path.dirname(checkpoint_path)) plt_his = PlotHistory() callbacks = [ # ReduceLROnPlateau(verbose=1, patience=FLAGS.lr_patience), EarlyStopping(patience=FLAGS.early_stop, verbose=1), ModelCheckpoint(checkpoint_path, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=True, mode='auto', save_freq='epoch'), # TensorBoard(log_dir='../logs'), LogHistory(), plt_his ] try: history = model.fit(train_dataset, epochs=FLAGS.epochs, callbacks=callbacks, validation_data=val_dataset) except Exception as e: raise e finally: plt_his.plot(save_path=os_path.join( os_path.dirname(FLAGS.weights), f"{datetime.datetime.now().strftime('%Y%m%d-%H%M')}.png"))
ops=best_enc['module_operations'], data_format=model_config['data_format']) batch_size = 250 # model_config['num_stacks'] = 0 print(model_config) inputs = tf.keras.layers.Input(x_train.shape[1:], batch_size) net_outputs = build_keras_model(model_spec, inputs, model_spec.ops, model_config) model = tf.keras.Model(inputs=inputs, outputs=net_outputs) num_train_imgs = int(x_train.shape[0]*(1-val_split)) decay_steps = int(epochs * num_train_imgs / batch_size) cos_decay = CosineDecay(model_config['learning_rate'], decay_steps) model.compile( loss='categorical_crossentropy', optimizer=RMSprop(cos_decay), metrics=['accuracy']) plot_model(model, to_file='model_nas_plot.png', show_shapes=True, show_layer_names=True) # model.summary() # print(len(model.layers), "layers") # print("batch_size:", batch_size) # print(model_config['data_format']) # print(x_train.shape) # # Prepare model model saving directory.
) )) input_shape = (32, 32, input_shape[2]) model.add(arch(weights=None, classes=n_class, input_shape=input_shape, include_top=False)) model.add(keras.layers.Flatten()) model.add(keras.layers.Dropout(0.3)) model.add(keras.layers.Dense(2 ** (n_class - 1), activation='relu')) model.add(keras.layers.Dropout(0.3)) model.add(keras.layers.Dense(2 ** (n_class - 2), activation='relu')) model.add(keras.layers.Dense(n_class, activation='softmax')) model.summary() optim = Adam(lr=args.lr) model.compile(loss='categorical_crossentropy', optimizer=optim, metrics=['accuracy']) # callbacks checkpoint = ModelCheckpoint(model_path, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max') lrate = LearningRateScheduler(CosineDecay(0.0001, args.epochs)) callbacks = [checkpoint, lrate] model.fit(x_train, to_categorical(y_train, n_class), batch_size=args.batch_size, epochs=args.epochs, validation_data=(x_val, to_categorical(y_val, n_class)), callbacks=callbacks)