def optimizer(self): lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False) tf.summary.scalar('learning_rate-summary', lr) opt = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True) if self.accum_grad != 1: opt = AccumGradOptimizer(opt, self.accum_grad) return opt
def optimizer(self): """ Sec 5.1: We use Nesterov momentum with m of 0.9. Sec 3: momentum correction Tensorflow's momentum optimizer does not need correction. """ lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False) tf.summary.scalar('learning_rate-summary', lr) opt = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True) if self.accum_grad != 1: opt = AccumGradOptimizer(opt, self.accum_grad) return opt
def train(self, batch_size=4, iter_size=1, learning_rate=1e-4, model_path='model/model.ckpt', start_step=0, max_steps=1000000, weight_decay=1e-4, pretrain=True, new_train=False, data_path='./ADEChallengeData2016/', crop_height=713, crop_width=713, print_every=10, eval_every=1000, flip=False, blur=False): # define loss self.create_loss_function(weight_decay=weight_decay) # learning rate, optimizer and train op optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) optimizer = AccumGradOptimizer(optimizer, iter_size) self.train_op = slim.learning.create_train_op(self.loss, optimizer) # sess gpu_options = tf.GPUOptions(allow_growth=True) self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # init or restore if new_train: self.sess.run(tf.global_variables_initializer()) if pretrain: saver = tf.train.Saver(var_list=self.vars) saver.restore(self.sess, self.pretrained_path) print('Using pretrained model:', self.pretrained_path) print('Initialize all parameters') else: saver = tf.train.Saver() saver.restore(self.sess, model_path) print('Restore from', model_path) saver = tf.train.Saver() # dataset ade20k = ADE20k(root_path=data_path, mode='train', crop_height=crop_height, crop_width=crop_width, flip=flip, blur=blur) max_miou = 0 for iter in range(start_step, max_steps): start_time = time.time() batch_x, batch_y = ade20k.get_batch_fast(batch_size=batch_size) feed_dict = {self.input_op: batch_x, self.label_op: batch_y, self.is_training: True} self.sess.run(self.train_op, feed_dict=feed_dict) if iter % print_every == 0: loss_value, cross_entropy_value = self.sess.run([self.loss, self.cross_entropy], feed_dict=feed_dict) print('iter:', iter, 'loss:', round(loss_value, 3), 'cross_entropy:', round(cross_entropy_value, 3), 'time:', round((time.time() - start_time), 3), 's') if iter % eval_every == 0 and iter > 0: miou_value = self.eval(iter, data_path=data_path, restore_path=model_path) if miou_value > max_miou: saver.save(self.sess, 'model/model.ckpt') max_miou = miou_value print('Current Max mIoU:', max_miou)
count_neg = tf.reduce_sum(Neg_Mask) count_pos = tf.reduce_sum(Pos_Mask) beta = count_neg / (count_neg + 1.1*count_pos) pos_weight = beta / (1 - beta) cost = tf.nn.weighted_cross_entropy_with_logits(logits=pred, targets=y, pos_weight=pos_weight) cost = tf.reduce_mean(cost * (1 - beta)*(Pos_Mask + Neg_Mask)) zero = tf.equal(count_pos, 0.0) loss_context = tf.where(zero, 0.0, cost) loss_context = tf.reduce_mean(loss_context)+regularizer tf.summary.scalar('loss_context', loss_context) learning_rate = tf.train.exponential_decay(args.learning_rate,global_steps,40000, 0.1, staircase=True) AdamOpt = tf.train.AdamOptimizer(learning_rate,0.9) # like caffe iter_size AccumGradOpt = AccumGradOptimizer(AdamOpt,2) opt = AccumGradOpt.minimize(loss_context,var_list=vgg_vars+gen_vars) merged = tf.summary.merge_all() gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) config = tf.ConfigProto(gpu_options=gpu_options) config.gpu_options.allow_growth = True with tf.Session(config = config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(write_version=tf.train.SaverDef.V1) train_writer = tf.summary.FileWriter('summary_drop', sess.graph) for i,k in enumerate(keys[:25]): sess.run(vgg.parameters[i].assign(weights[k])) # checkpoint = tf.train.get_checkpoint_state("models/BSDS/")