def apiConstructor(): # get trainable variables train_vars = tf.trainable_variables() if scope is not None: opt_vars = [var for var in train_vars if scope in var.name] else: opt_vars = train_vars lr_method = get_lr_scheduler(lr_scheduler, lr) global_step = tf.train.get_or_create_global_step() momentum = tf.train.MomentumOptimizer(lr_method, mom) if clip_grad is not None: grads = momentum.compute_gradients(loss, var_list=opt_vars) clipped_grads = [(tf.clip_by_value(grad, -1.0 * clip_grad, 1.0 * clip_grad), var) for grad, var in grads] train_op = momentum.apply_gradients(clipped_grads, global_step=global_step) else: train_op = momentum.minimize(loss, global_step=global_step, var_list=opt_vars) # set output self.set_output('output', train_op) self.set_output('global_step', global_step)
def apiConstructor(): # get trainable variables train_vars = tf.trainable_variables() if scope is not None: opt_vars = [var for var in train_vars if scope in var.name] else: opt_vars = train_vars lr_method = get_lr_scheduler(lr_scheduler, lr) global_step = tf.train.get_or_create_global_step() adam = tf.train.AdamOptimizer(learning_rate=lr_method, beta1=beta1, beta2=beta2, epsilon=eps) if clip_grad is not None: grads = adam.compute_gradients(loss, var_list=opt_vars) clipped_grads = [(tf.clip_by_value(grad, -1.0 * clip_grad, 1.0 * clip_grad), var) for grad, var in grads] train_op = adam.apply_gradients(clipped_grads, global_step=global_step) else: update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = adam.minimize(loss, global_step=global_step, var_list=opt_vars) # set output self.set_output('output', train_op) self.set_output('global_step', global_step)
def compile_time_operation(self, learning_option, cluster): """ define RMSProp optimizer given loss and weights/biases """ # get attr # required field lr = float(self.get_attr('lr', default=None)) if lr is None: raise Exception( '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format( 'lr', self.name)) # optional field decay = float(self.get_attr('decay', default=0.9)) mom = float(self.get_attr('mom', default=0.0)) lr_scheduler = self.get_attr('lr_scheduler', default={}) # default will set later lr_dic = get_lr_scheduler(lr_scheduler) opt_dic = { 'type': 'RMSProp', 'base_lr': lr, 'momentum': mom, 'rms_decay': decay } # setting to learning option learning_option['opt_dic'] = opt_dic learning_option['lr_sched_dic'] = lr_dic
def compile_time_operation(self, learning_option, cluster): """ define adadelta optimizer given loss and weights/biases refer: ADADELTA: An Adaptive Learning Rate Method """ # get attr # required field lr = float(self.get_attr('lr', default=None)) if lr is None: raise Exception( '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format( 'lr', self.name)) # optional field rho = float(self.get_attr('rho', default=0.95)) eps = float(self.get_attr('epsilon', default=10**-8)) lr_scheduler = self.get_attr('lr_scheduler', default={}) # default will set later lr_dic = get_lr_scheduler(lr_scheduler) opt_dic = { 'type': 'AdaDelta', 'base_lr': lr, 'momentum': rho, 'delta': eps } # setting to learning option learning_option['opt_dic'] = opt_dic learning_option['lr_sched_dic'] = lr_dic
def compile_time_operation(self, learning_option, cluster): """ define adam optimizer given loss and weights/biases """ # get attr # required field lr = float(self.get_attr('lr', default=None)) if lr is None: raise Exception( '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format( 'lr', self.name)) # optional field beta1 = float(self.get_attr('beta1', default=0.9)) beta2 = float(self.get_attr('beta2', default=0.999)) eps = float(self.get_attr('epsilon', default=10**-8)) lr_scheduler = self.get_attr('lr_scheduler', default={}) # default will set later lr_dic = get_lr_scheduler(lr_scheduler) opt_dic = { 'type': 'Adam', 'base_lr': lr, 'momentum': beta1, 'momentum2': beta2, 'delta': eps } # setting to learning option learning_option['opt_dic'] = opt_dic learning_option['lr_sched_dic'] = lr_dic
def compile_time_operation(self, learning_option, cluster): """ define stochastic gradient descent(SGD) optimizer given loss and weights/biases """ # get attr # required field lr = float(self.get_attr('lr', default=None)) if lr is None: raise Exception( '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format( 'lr', self.name)) # optional field lr_scheduler = self.get_attr('lr_scheduler', default={}) # default will set later lr_dic = get_lr_scheduler(lr_scheduler) opt_dic = {'type': 'SGD', 'base_lr': lr} # setting to learning option learning_option['opt_dic'] = opt_dic learning_option['lr_sched_dic'] = lr_dic
def compile_time_operation(self, learning_option, cluster): """ define adagrad optimizer given loss and weights/biases refer: Adaptive Subgradient Methods for Online Learning and Stochastic Optimization, Journal of Machine Learning Research 2011 """ # get attr # required field lr = float(self.get_attr('lr', default=None)) if lr is None: raise Exception( '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format( 'lr', self.name)) # optional field lr_scheduler = self.get_attr('lr_scheduler', default={}) # default will set later lr_dic = get_lr_scheduler(lr_scheduler) opt_dic = {'type': 'AdaGrad', 'base_lr': lr} # setting to learning option learning_option['opt_dic'] = opt_dic learning_option['lr_sched_dic'] = lr_dic
def create_lr_scheduler( args: argparse.Namespace, resume_training: bool, training_state_dir: str) -> lr_scheduler.LearningRateScheduler: """ Create the learning rate scheduler. :param args: Arguments as returned by argparse. :param resume_training: When True, the scheduler will be loaded from disk. :param training_state_dir: Directory where the training state is stored. :return: The learning rate scheduler. """ learning_rate_half_life = none_if_negative(args.learning_rate_half_life) # TODO: The loading for continuation of the scheduler is done separately from the other parts if not resume_training: lr_scheduler_instance = lr_scheduler.get_lr_scheduler( args.learning_rate_scheduler_type, args.checkpoint_frequency, learning_rate_half_life, args.learning_rate_reduce_factor, args.learning_rate_reduce_num_not_improved, args.learning_rate_schedule, args.learning_rate_warmup) else: with open(os.path.join(training_state_dir, C.SCHEDULER_STATE_NAME), "rb") as fp: lr_scheduler_instance = pickle.load(fp) return lr_scheduler_instance
def __init__(self, args): super(Trainer, self).__init__() self.args = args self.experiment_dir = args.experiment_dir if not osp.exists(self.experiment_dir): os.makedirs(self.experiment_dir) print("The experiment dir has been created:{}".format( self.experiment_dir)) self.trainer_log = TrainerLog(args=args, append=True) self.ctx = set_ctx(args=args) self.check_point = CheckPoint(args=args, trainer_log=self.trainer_log, ctx=self.ctx) self.train_loader, self.test_loader = dataloader(args=args) self.lr_scheduler = None self.optimizer = None self.model = None if self.train_loader is not None: self.train_samples_num = self.train_loader._dataset.__len__() print("train dataset samples: {}".format(self.train_samples_num)) self.test_samples_num = self.test_loader._dataset.__len__() print("test dataset samples: {}".format(self.test_samples_num)) self.resume_epoch = 0 if args.only_test is False: if args.use_tensorboard is True: from tensorboardX import SummaryWriter self.tb_writer = SummaryWriter( log_dir=osp.join(args.experiment_dir, 'tensorboard')) else: self.tb_writer = None if args.resume is True: self.checkpoint_epoch = args.checkpoint_epoch self.model = get_networks(args=args, ctx=self.ctx) self.resume_epoch = self.check_point.load_checkpoint_parameters( epoch=self.checkpoint_epoch, model=self.model) else: self.model = get_networks(args=args, ctx=self.ctx) self.model.classifier.initialize(ctx=self.ctx) self.lr_scheduler = get_lr_scheduler( args=args, train_loader=self.train_loader) self.optimizer, self.trainer = set_optimizer( model=self.model, lr_scheduler=self.lr_scheduler, args=args) self.loss_functions = set_loss(args=args, tb_writer=self.tb_writer) self.current_epoch = None elif args.only_test is True: self.checkpoint_epoch = args.checkpoint_epoch self.model = get_networks(args=args, ctx=self.ctx) self.epoch_test = args.epoch_test _ = self.check_point.load_checkpoint_parameters( epoch=self.checkpoint_epoch, model=self.model, epoch_test=self.epoch_test) if self.lr_scheduler is not None: self.trainer_log.print_use_lr_scheduler() if self.optimizer is not None and self.trainer is not None: self.trainer_log.print_use_optimizer() if self.model is not None: self.trainer_log.print_use_network() self.test_accuracy_metric = metric.Accuracy() self.epochs = args.epochs self.train_total = 0 self.best_accuracy = None self.current_accuracy = None