def recreate_bn_model(input_imgs_tensor, is_training=True, crop_size=(600, 800)): snapshot_dir = './snapshots/' restore_from = './model/icnet_cityscapes_trainval_90k_bnnomerge.npy' img_r, img_g, img_b = tf.split(axis=3, num_or_size_splits=3, value=input_imgs_tensor) imgs = tf.cast(tf.concat(axis=3, values=[img_b, img_g, img_r]), dtype=tf.float32) imgs = imgs - IMG_MEAN net = ICNet_BN({'data': imgs}, is_training=is_training, num_classes=19, filter_scale=1) _, _, pred = extend_reclassifier(net) restore_var = tf.global_variables() # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) ckpt = tf.train.get_checkpoint_state(snapshot_dir) if ckpt and ckpt.model_checkpoint_path: print('restoring from', ckpt.model_checkpoint_path, file=sys.stderr) loader = tf.train.Saver(var_list=restore_var) loader.restore(sess, ckpt.model_checkpoint_path) else: print('restoring from', restore_from, file=sys.stderr) sess.run(tf.global_variables_initializer()) net.load(restore_from, sess) if crop_size is not None: pred = tf.image.crop_to_bounding_box(pred, 0, 0, crop_size[0], crop_size[1]) pred = tf.identity(pred, name='output_2positiveclasses') return sess, pred
def main(): args = get_arguments() print(args) coord = tf.train.Coordinator() tf.reset_default_graph() with tf.name_scope("create_inputs"): reader = ImageReader(DATA_DIRECTORY, DATA_LIST_PATH, input_size, None, None, ignore_label, IMG_MEAN, coord) image, label = reader.image, reader.label image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims( label, dim=0) # Add one batch dimension. # Create network. if args.model[-2:] == 'bn': net = ICNet_BN({'data': image_batch}, num_classes=num_classes) else: net = ICNet({'data': image_batch}, num_classes=num_classes) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers['conv6_cls'] raw_output_up = tf.image.resize_bilinear(raw_output, size=input_size, align_corners=True) raw_output_up = tf.argmax(raw_output_up, dimension=3) raw_pred = tf.expand_dims(raw_output_up, dim=3) # mIoU pred_flatten = tf.reshape(raw_pred, [ -1, ]) raw_gt = tf.reshape(label_batch, [ -1, ]) indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) pred = tf.gather(pred_flatten, indices) mIoU, update_op = tf.contrib.metrics.streaming_mean_iou( pred, gt, num_classes=num_classes) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) sess.run(tf.local_variables_initializer()) restore_var = tf.global_variables() if args.model == 'train': print('Restore from train30k model...') net.load(model_train30k, sess) elif args.model == 'trainval': print('Restore from trainval90k model...') net.load(model_trainval90k, sess) elif args.model == 'train_bn': print('Restore from train30k bnnomerge model...') net.load(model_train30k_bn, sess) elif args.model == 'trainval_bn': print('Restore from trainval90k bnnomerge model...') net.load(model_trainval90k_bn, sess) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) for step in range(num_steps): preds, _ = sess.run([pred, update_op]) if step > 0 and args.measure_time: calculate_time(sess, net, raw_pred) if step % 10 == 0: print('Finish {0}/{1}'.format(step, num_steps)) print('step {0} mIoU: {1}'.format(step, sess.run(mIoU))) coord.request_stop() coord.join(threads)
def main(): """Create the model and start the training.""" args = get_arguments() h, w = map(int, args.input_size.split(',')) input_size = (h, w) coord = tf.train.Coordinator() with tf.name_scope("create_inputs"): reader = ImageReader(DATA_DIR, DATA_LIST_PATH, input_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN, coord) image_batch, label_batch = reader.dequeue(args.batch_size) net = ICNet_BN({'data': image_batch}, is_training=True, num_classes=args.num_classes, filter_scale=args.filter_scale) sub4_out = net.layers['sub4_out'] sub24_out = net.layers['sub24_out'] sub124_out = net.layers['conv6_cls'] restore_var = tf.global_variables() all_trainable = [ v for v in tf.trainable_variables() if ('beta' not in v.name and 'gamma' not in v.name) or args.train_beta_gamma ] with tf.name_scope('loss'): loss_sub4 = create_loss(sub4_out, label_batch, args.num_classes, args.ignore_label) loss_sub24 = create_loss(sub24_out, label_batch, args.num_classes, args.ignore_label) loss_sub124 = create_loss(sub124_out, label_batch, args.num_classes, args.ignore_label) l2_losses = [ args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name ] reduced_loss = LAMBDA1 * loss_sub4 + LAMBDA2 * loss_sub24 + LAMBDA3 * loss_sub124 + tf.add_n( l2_losses) tf.summary.scalar('sub4', loss_sub4) tf.summary.scalar('sub24', loss_sub24) tf.summary.scalar('sub124', loss_sub124) tf.summary.scalar('total_loss', reduced_loss) # Using Poly learning rate policy base_lr = tf.constant(args.learning_rate) step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul( base_lr, tf.pow((1 - step_ph / args.num_steps), args.power)) # Gets moving_mean and moving_variance update operations from tf.GraphKeys.UPDATE_OPS if args.update_mean_var == False: update_ops = None else: update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum) grads = tf.gradients(reduced_loss, all_trainable) train_op = opt_conv.apply_gradients(zip(grads, all_trainable)) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=20) summ = tf.summary.merge_all() tenboard_dir = tfboard_dir + str(LEARNING_RATE) + '_' + str(NUM_STEPS) writer = tf.summary.FileWriter(tenboard_dir) writer.add_graph(sess.graph) ckpt = tf.train.get_checkpoint_state(args.snapshot_dir) # net.load(args.restore_from, sess) if ckpt and ckpt.model_checkpoint_path: loader = tf.train.Saver(var_list=restore_var) load_step = int( os.path.basename(ckpt.model_checkpoint_path).split('-')[1]) load(loader, sess, './snapshots/3wDataSet/model.ckpt-' + str(START_STEP)) else: print('Restore from pre-trained model...') net.load(args.restore_from, sess) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(START_STEP, args.num_steps): start_time = time.time() feed_dict = {step_ph: step} if step % args.save_pred_every == 0: s, loss_value, loss1, loss2, loss3, _ = sess.run( [ summ, reduced_loss, loss_sub4, loss_sub24, loss_sub124, train_op ], feed_dict=feed_dict) save(saver, sess, args.snapshot_dir, step) writer.add_summary(s, step) else: s, loss_value, loss1, loss2, loss3, _ = sess.run( [ summ, reduced_loss, loss_sub4, loss_sub24, loss_sub124, train_op ], feed_dict=feed_dict) writer.add_summary(s, step) duration = time.time() - start_time print( 'step {:d} \t total loss = {:.3f}, sub4 = {:.3f}, sub24 = {:.3f}, sub124 = {:.3f} ({:.3f} sec/step)' .format(step, loss_value, loss1, loss2, loss3, duration)) coord.request_stop() coord.join(threads)
def main(): args = get_arguments() img, filename = load_img(args.img_path) shape = img.shape[0:2] x = tf.placeholder(dtype=tf.float32, shape=img.shape) img_tf = preprocess(x) img_tf, n_shape = check_input(img_tf) # Create network. if args.model[-2:] == 'bn': net = ICNet_BN({'data': img_tf}, num_classes=num_classes) elif args.model == 'others': net = ICNet_BN({'data': img_tf}, num_classes=num_classes) else: net = ICNet({'data': img_tf}, num_classes=num_classes) raw_output = net.layers['conv6_cls'] # Predictions. raw_output_up = tf.image.resize_bilinear(raw_output, size=n_shape, align_corners=True) raw_output_up = tf.image.crop_to_bounding_box(raw_output_up, 0, 0, shape[0], shape[1]) raw_output_up = tf.argmax(raw_output_up, axis=3) pred = decode_labels(raw_output_up, shape, num_classes) # Init tf Session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) restore_var = tf.global_variables() if args.model == 'train': print('Restore from train30k model...') net.load(model_train30k, sess) elif args.model == 'trainval': print('Restore from trainval90k model...') net.load(model_trainval90k, sess) elif args.model == 'train_bn': print('Restore from train30k bnnomerge model...') net.load(model_train30k_bn, sess) elif args.model == 'trainval_bn': print('Restore from trainval90k bnnomerge model...') net.load(model_trainval90k_bn, sess) else: ckpt = tf.train.get_checkpoint_state(snapshot_dir) if ckpt and ckpt.model_checkpoint_path: loader = tf.train.Saver(var_list=restore_var) load_step = int( os.path.basename(ckpt.model_checkpoint_path).split('-')[1]) load(loader, sess, ckpt.model_checkpoint_path) preds = sess.run(pred, feed_dict={x: img}) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) misc.imsave(args.save_dir + filename, preds[0])
def main(): """Create the model and start the training.""" args = get_arguments() print("SAVE TO " + args.snapshot_dir) datalists_epoch = { 1: args.datalist_path_epoch1, 2: args.datalist_path_epoch2, 3: args.datalist_path_epoch3, 4: args.datalist_path_epoch4, 5: args.datalist_path_epoch5 } if args.cross_val: val_epoch = int(args.cross_val) train_epochs = [1, 2, 3, 4, 5] train_epochs.remove(val_epoch) train_lists = [datalists_epoch[i] for i in train_epochs] val_lists = datalists_epoch[val_epoch] h, w = map(int, args.input_size.split(',')) input_size = (h, w) max_runtime = args.max_runtime max_time_seconds = 3600 * max_runtime epochs_until_val = 3 global dataset_class_weights if args.weights_for_dataset is None: dataset_class_weights = None elif args.weights_for_dataset == 'de_top15': dataset_class_weights = weights_detop15 elif args.weights_for_dataset == 'eu_top25': dataset_class_weights = weights_eutop25 elif args.weights_for_dataset == 'world2k': dataset_class_weights = weights_world2k elif args.weights_for_dataset == 'kaggle_dstl': dataset_class_weights = weights_kaggledstl elif args.weights_for_dataset == 'vaihingen': dataset_class_weights = weights_vaihingen elif args.weights_for_dataset == 'de_top15_nores': dataset_class_weights = weights_detop15_nores elif args.weights_for_dataset == 'eu_top25_nores': dataset_class_weights = weights_eutop25_nores elif args.weights_for_dataset == 'world2k_nores': dataset_class_weights = weights_world2k_nores coord = tf.train.Coordinator() if args.cross_val: with tf.name_scope("create_inputs"): reader = ImageReader(args.datadir, train_lists, input_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN, coord) image_batch, label_batch = reader.dequeue(args.batch_size) # for validation reader_val = ImageReader(args.datadir, val_lists, input_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN, coord) image_batch_val, label_batch_val = reader_val.dequeue( args.batch_size) else: with tf.name_scope("create_inputs"): reader = ImageReader(args.datadir, args.datalist_path, input_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN, coord) image_batch, label_batch = reader.dequeue(args.batch_size) # for validation reader_val = ImageReader(args.datadir, args.datalist_path_val, input_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN, coord) image_batch_val, label_batch_val = reader_val.dequeue( args.batch_size) net = ICNet_BN({'data': image_batch}, is_training=True, num_classes=args.num_classes, filter_scale=args.filter_scale) with tf.variable_scope("val"): net_val = ICNet_BN({'data': image_batch_val}, is_training=True, num_classes=args.num_classes, filter_scale=args.filter_scale) sub4_out = net.layers['sub4_out'] sub24_out = net.layers['sub24_out'] sub124_out = net.layers['conv6_cls'] # early stop variables last_val_loss_tf = tf.Variable(10000.0, name="last_loss") steps_total_tf = tf.Variable(0, name="steps_total") val_increased_t_tf = tf.Variable(0, name="loss_increased_t") if args.not_restore_last: restore_var = [ v for v in tf.global_variables() if 'conv6_cls' not in v.name and 'val' not in v.name and 'sub4_out' not in v.name and 'sub24_out' not in v.name and 'sub124_out' not in v.name ] else: # to load last layer, the line 78 in network.py has to be removed too and ignore_missing set to False # see https://github.com/hellochick/ICNet-tensorflow/issues/50 BCJuan # don't restore val vars restore_var = [ v for v in tf.trainable_variables() if 'val' not in v.name ] #tf.global_variables() # don't train val variables all_trainable = [ v for v in tf.trainable_variables() if (('beta' not in v.name and 'gamma' not in v.name) or args.train_beta_gamma) and 'val' not in v.name ] # all_trainable = [v for v in tf.trainable_variables() if # ('beta' not in v.name and 'gamma' not in v.name) or args.train_beta_gamma] # print([v for v in tf.global_variables() if v.name in["last_val_loss","steps_total","val_increased_t"]]) # restore_var.extend([v for v in tf.global_variables() if v.name in["last_val_loss","steps_total","val_increased_t"]]) # assert not np.any(np.isnan(sub4_out)) loss_sub4 = create_loss(sub4_out, label_batch, args.num_classes, args.ignore_label) loss_sub24 = create_loss(sub24_out, label_batch, args.num_classes, args.ignore_label) loss_sub124 = create_loss(sub124_out, label_batch, args.num_classes, args.ignore_label) # l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name] l2_losses = [ args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if ('weights' in v.name and 'val' not in v.name) ] reduced_loss = LAMBDA1 * loss_sub4 + LAMBDA2 * loss_sub24 + LAMBDA3 * loss_sub124 + tf.add_n( l2_losses) ####################### Loss Calculation FOR VALIDATION sub4_out_val = net_val.layers['sub4_out'] sub24_out_val = net_val.layers['sub24_out'] sub124_out_val = net_val.layers['conv6_cls'] loss_sub4_val = create_loss(sub4_out_val, label_batch_val, args.num_classes, args.ignore_label) loss_sub24_val = create_loss(sub24_out_val, label_batch_val, args.num_classes, args.ignore_label) loss_sub124_val = create_loss(sub124_out_val, label_batch_val, args.num_classes, args.ignore_label) l2_losses_val = [ args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if ('weights' in v.name and 'val' in v.name) ] reduced_loss_val = LAMBDA1 * loss_sub4_val + LAMBDA2 * loss_sub24_val + LAMBDA3 * loss_sub124_val + tf.add_n( l2_losses_val) ####################### End Loss Calculation FOR VALIDATION # Using Poly learning rate policy base_lr = tf.constant(args.learning_rate) step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul( base_lr, tf.pow((1 - step_ph / args.num_steps), args.power)) # Gets moving_mean and moving_variance update operations from tf.GraphKeys.UPDATE_OPS if args.update_mean_var == False: update_ops = None else: update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum) grads = tf.gradients(reduced_loss, all_trainable) train_op = opt_conv.apply_gradients(zip(grads, all_trainable)) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # start time glob_start_time = time.time() # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10) if '.npy' not in args.restore_from: ckpt = tf.train.get_checkpoint_state(args.restore_from) else: ckpt = tf.train.get_checkpoint_state(args.snapshot_dir) if ckpt and ckpt.model_checkpoint_path: vars_to_restore = get_tensors_in_checkpoint_file( file_name=ckpt.model_checkpoint_path) # print(vars_to_restore) # print([v.name for v in restore_var]) # thanks to https://stackoverflow.com/a/50216949/8862202 # v.name[:-2] to transform 'conv1_1_3x3_s2/weights:0' to 'conv1_1_3x3_s2/weights' vars_to_restore = [ v for v in restore_var if 'val' not in v.name and v.name[:-2] in vars_to_restore ] # print(vars_to_restore) #loader = tf.train.Saver(var_list=restore_var) loader = tf.train.Saver(var_list=vars_to_restore) load_step = int( os.path.basename(ckpt.model_checkpoint_path).split('-')[1]) load(loader, sess, ckpt.model_checkpoint_path) else: print('Restore from pre-trained model...') net.load(args.restore_from, sess) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) if args.reset_patience: z = tf.assign(val_increased_t_tf, 0) sess.run(z) print(sess.run(last_val_loss_tf)) print(sess.run(steps_total_tf)) print(sess.run(val_increased_t_tf)) if not args.cross_val: val_epoch_len = len(reader_val.image_list) val_num_steps = val_epoch_len // args.batch_size # Iterate over training steps. last_val_loss = sess.run(last_val_loss_tf) val_increased_t = sess.run(val_increased_t_tf) best_model_step = 0 total_steps = sess.run(steps_total_tf) for step in range(total_steps, args.num_steps + total_steps): start_time = time.time() feed_dict = {step_ph: step} if step % args.save_pred_every == 0: # validating if args.validate: print("validating: ") print_assign_vars(sess) print("Assigned vars for validation. ") loss_sum = 0 for val_step in trange(val_num_steps, desc='validation', leave=True): loss_value_v, loss1_v, loss2_v, loss3_v = sess.run( [ reduced_loss_val, loss_sub4_val, loss_sub24_val, loss_sub124_val ], feed_dict=feed_dict) loss_sum = loss_sum + loss_value_v loss_avg = loss_sum / val_num_steps if loss_avg > last_val_loss: val_increased_t = val_increased_t + 1 if val_increased_t >= args.patience: print( "Terminated Training, Best Model (at step %d) saved 4 validations ago" % best_model_step) f = open("./FINISHED_ICNET", "w+") f.close() break else: val_increased_t = 0 best_model_step = step print( 'VALIDATION COMPLETE step {:d}\tVal_Loss Increased {:d}/{:d} times\t total loss = {:.3f}' ' last loss = {:.3f}'.format(step, val_increased_t, args.patience, loss_avg, last_val_loss)) last_val_loss = loss_avg steps_assign = tf.assign(steps_total_tf, step) last_val_assign = tf.assign(last_val_loss_tf, last_val_loss) increased_assign = tf.assign(val_increased_t_tf, val_increased_t) print("loss avg " + str(loss_avg)) print(sess.run(steps_assign)) print(sess.run(last_val_assign)) print(sess.run(increased_assign)) # Saving loss_value, loss1, loss2, loss3, _ = sess.run( [ reduced_loss, loss_sub4, loss_sub24, loss_sub124, train_op ], feed_dict=feed_dict) save(saver, sess, args.snapshot_dir, step) # check if max run time is already over elapsed = time.time() - glob_start_time if (elapsed + 300) > max_time_seconds: print("Training stopped: max run time elapsed") os.remove("./RUNNING_ICNET") break else: loss_value, loss1, loss2, loss3, _ = sess.run( [ reduced_loss, loss_sub4, loss_sub24, loss_sub124, train_op ], feed_dict=feed_dict) duration = time.time() - start_time print( 'step {:d} \t total loss = {:.3f}, sub4 = {:.3f}, sub24 = {:.3f}, sub124 = {:.3f} ({:.3f} sec/step)' .format(step, loss_value, loss1, loss2, loss3, duration)) train_duration = time.time() - glob_start_time print('Total training time: ' + str(train_duration)) else: # Training with cross validation print("Training-Mode CROSS VALIDATION") val_epoch_len = len(reader_val.image_list) val_num_steps = val_epoch_len // args.batch_size print("Val epoch length %d, Num steps %d" % (val_epoch_len, val_num_steps)) last_val_loss = math.inf val_not_imp_t = 0 # train for step in range(1000000): feed_dict = {step_ph: step} train_start = time.time() loss_value, loss1, loss2, loss3, _ = sess.run( [reduced_loss, loss_sub4, loss_sub24, loss_sub124, train_op], feed_dict=feed_dict) duration_t = time.time() - train_start if args.print_steps: print( 'step {:d} \t total loss = {:.3f}, sub4 = {:.3f}, sub24 = {:.3f}, sub124 = {:.3f} ({:.3f} sec/step)' .format(step, loss_value, loss1, loss2, loss3, duration_t)) if step % args.save_pred_every == 0: # save and validate # SAVE previously trained model save(saver, sess, args.snapshot_dir, step) # Validate print("validating: ") start_time = time.time() print_assign_vars(sess) print("Assigned vars for validation. ") loss_sum = 0 for val_step in trange(val_num_steps, desc='validation', leave=True): loss_value_v, loss1_v, loss2_v, loss3_v = sess.run( [ reduced_loss_val, loss_sub4_val, loss_sub24_val, loss_sub124_val ], feed_dict=feed_dict) loss_sum = loss_sum + loss_value_v duration = time.time() - start_time loss_avg = loss_sum / val_num_steps print( 'VALIDATION COMPLETE step {:d} \t total loss = {:.3f} \t duration = {:.3f}' .format(step, loss_avg, duration)) if loss_avg >= last_val_loss: val_not_imp_t = val_not_imp_t + 1 if val_not_imp_t >= 4: print( "Terminated Training, Best Model saved 5 validations before" ) f = open("./FINISHED_ICNET", "w+") f.close() break else: val_not_imp_t = 0 last_val_loss = loss_avg # check if max run time is already over elapsed = time.time() - glob_start_time if (elapsed + 300) > max_time_seconds: print("Training stopped: max run time elapsed") os.remove("./RUNNING_ICNET") break coord.request_stop() coord.join(threads)
def main(): """Create the model and start the training.""" args = get_arguments() h, w = map(int, args.input_size.split(',')) input_size = (h, w) coord = tf.train.Coordinator() with tf.name_scope("create_inputs"): reader = ImageReader( args.data_list, input_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN, coord) image_batch, label_batch = reader.dequeue(args.batch_size) net = ICNet_BN({'data': image_batch}, is_training=True, num_classes=args.num_classes) sub4_out = net.layers['sub4_out'] sub24_out = net.layers['sub24_out'] sub124_out = net.layers['conv6_cls'] fc_list = ['conv6_cls'] restore_var = tf.global_variables() all_trainable = [v for v in tf.trainable_variables() if ('beta' not in v.name and 'gamma' not in v.name) or args.train_beta_gamma] restore_var = [v for v in tf.global_variables() if not (len([f for f in fc_list if f in v.name])) or not args.not_restore_last] for v in restore_var: print(v.name) loss_sub4 = create_loss(sub4_out, label_batch, args.num_classes, args.ignore_label, args.use_class_weights) loss_sub24 = create_loss(sub24_out, label_batch, args.num_classes, args.ignore_label, args.use_class_weights) loss_sub124 = create_loss(sub124_out, label_batch, args.num_classes, args.ignore_label, args.use_class_weights) l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name] loss = LAMBDA1 * loss_sub4 + LAMBDA2 * loss_sub24 + LAMBDA3 * loss_sub124 reduced_loss = loss + tf.add_n(l2_losses) ############################## # visualization and summary ############################## # Processed predictions: for visualisation. # Sub 4 raw_output_up4 = tf.image.resize_bilinear(sub4_out, tf.shape(image_batch)[1:3,]) raw_output_up4 = tf.argmax(raw_output_up4, dimension = 3) pred4 = tf.expand_dims(raw_output_up4, dim = 3) # Sub 24 raw_output_up24 = tf.image.resize_bilinear(sub24_out, tf.shape(image_batch)[1:3,]) raw_output_up24 = tf.argmax(raw_output_up24, dimension=3) pred24 = tf.expand_dims(raw_output_up24, dim=3) # Sub 124 raw_output_up124 = tf.image.resize_bilinear(sub124_out, tf.shape(image_batch)[1:3,]) raw_output_up124 = tf.argmax(raw_output_up124, dimension=3) pred124 = tf.expand_dims(raw_output_up124, dim=3) images_summary = tf.py_func(inv_preprocess, [image_batch, SAVE_NUM_IMAGES, IMG_MEAN], tf.uint8) labels_summary = tf.py_func(decode_labels, [label_batch,SAVE_NUM_IMAGES, args.num_classes], tf.uint8) preds_summary4 = tf.py_func(decode_labels, [pred4, SAVE_NUM_IMAGES, args.num_classes], tf.uint8) preds_summary24 = tf.py_func(decode_labels, [pred24, SAVE_NUM_IMAGES, args.num_classes], tf.uint8) preds_summary124 = tf.py_func(decode_labels, [pred124, SAVE_NUM_IMAGES, args.num_classes], tf.uint8) total_images_summary = tf.summary.image('images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary124]), max_outputs=SAVE_NUM_IMAGES) # Concatenate row-wise. total_summary = [total_images_summary] loss_summary = tf.summary.scalar('Total_loss', reduced_loss) total_summary.append(loss_summary) summary_writer = tf.summary.FileWriter(args.snapshot_dir, graph=tf.get_default_graph()) ############################## ############################## # Using Poly learning rate policy if LR_SHEDULE == {}: base_lr = tf.constant(args.learning_rate) step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / args.num_steps), args.power)) else: step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.Variable(LR_SHEDULE.popitem()[1], tf.float32) lr_summary = tf.summary.scalar('Learning_rate', learning_rate) total_summary.append(lr_summary) # Gets moving_mean and moving_variance update operations from tf.GraphKeys.UPDATE_OPS if args.update_mean_var == False: update_ops = None else: update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum) grads = tf.gradients(reduced_loss, all_trainable) train_op = opt_conv.apply_gradients(zip(grads, all_trainable)) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list = tf.global_variables(), max_to_keep = 10) ckpt = tf.train.get_checkpoint_state(args.snapshot_dir) if ckpt and ckpt.model_checkpoint_path: loader = tf.train.Saver(var_list=restore_var) load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1]) load(loader, sess, ckpt.model_checkpoint_path) else: print('Restore from pre-trained model...') net.load(args.restore_from, sess, ignore_layers = fc_list) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) summ_op = tf.summary.merge(total_summary) # Iterate over training steps. for step in range(args.num_steps): start_time = time.time() if LR_SHEDULE != {}: if step == LR_SHEDULE.keys()[0]: tf.assign(learning_rate, LR_SHEDULE.popitem()[0]) feed_dict = {step_ph: step} if step % args.save_pred_every == 0: loss_value, loss1, loss2, loss3, _, summary =\ sess.run([reduced_loss, loss_sub4, loss_sub24, loss_sub124, train_op, summ_op], feed_dict = feed_dict) save(saver, sess, args.snapshot_dir, step) summary_writer.add_summary(summary, step) else: loss_value, loss1, loss2, loss3, _ = sess.run([reduced_loss, loss_sub4, loss_sub24, loss_sub124, train_op], feed_dict=feed_dict) duration = time.time() - start_time #print('shape', sess.run(tf.shape(sub124_out))) #quit() print('step {:d} \t total loss = {:.3f}, sub4 = {:.3f}, sub24 = {:.3f}, sub124 = {:.3f} ({:.3f} sec/step)'.format(step, loss_value, loss1, loss2, loss3, duration)) coord.request_stop() coord.join(threads)
def main(): """Create the model and start the training.""" args = get_arguments() h, w = map(int, args.input_size.split(',')) input_size = (h, w) coord = tf.train.Coordinator() with tf.name_scope("create_inputs"): reader = ImageReader( ' ', args.data_list, input_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN, coord) image_batch, label_batch = reader.dequeue(args.batch_size) net = ICNet_BN({'data': image_batch}, is_training=True, num_classes=args.num_classes) sub4_out = net.layers['sub4_out'] sub24_out = net.layers['sub24_out'] sub124_out = net.layers['conv6_cls'] restore_var = tf.global_variables() all_trainable = [v for v in tf.trainable_variables() if ('beta' not in v.name and 'gamma' not in v.name) or args.train_beta_gamma] loss_sub4 = create_loss(sub4_out, label_batch, args.num_classes, args.ignore_label) loss_sub24 = create_loss(sub24_out, label_batch, args.num_classes, args.ignore_label) loss_sub124 = create_loss(sub124_out, label_batch, args.num_classes, args.ignore_label) l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name] reduced_loss = LAMBDA1 * loss_sub4 + LAMBDA2 * loss_sub24 + LAMBDA3 * loss_sub124 + tf.add_n(l2_losses) # Using Poly learning rate policy base_lr = tf.constant(args.learning_rate) step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / args.num_steps), args.power)) # Gets moving_mean and moving_variance update operations from tf.GraphKeys.UPDATE_OPS if args.update_mean_var == False: update_ops = None else: update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum) grads = tf.gradients(reduced_loss, all_trainable) train_op = opt_conv.apply_gradients(zip(grads, all_trainable)) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=5) ckpt = tf.train.get_checkpoint_state(args.snapshot_dir) if ckpt and ckpt.model_checkpoint_path: loader = tf.train.Saver(var_list=restore_var) load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1]) load(loader, sess, ckpt.model_checkpoint_path) else: print('Restore from pre-trained model...') net.load(args.restore_from, sess) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(args.num_steps): start_time = time.time() feed_dict = {step_ph: step} if step % args.save_pred_every == 0: loss_value, loss1, loss2, loss3, _ = sess.run([reduced_loss, loss_sub4, loss_sub24, loss_sub124, train_op], feed_dict=feed_dict) save(saver, sess, args.snapshot_dir, step) else: loss_value, loss1, loss2, loss3, _ = sess.run([reduced_loss, loss_sub4, loss_sub24, loss_sub124, train_op], feed_dict=feed_dict) duration = time.time() - start_time print('step {:d} \t total loss = {:.3f}, sub4 = {:.3f}, sub24 = {:.3f}, sub124 = {:.3f} ({:.3f} sec/step)'.format(step, loss_value, loss1, loss2, loss3, duration)) coord.request_stop() coord.join(threads)
def main(): """Create the model and start the training.""" args = get_arguments() h, w = map(int, args.input_size.split(',')) input_size = (h, w) if args.center_crop_size is None: center_crop_size = None else: hc, wc = map(int, args.center_crop_size.split(',')) center_crop_size = (hc, wc) with tf.name_scope("create_inputs"): reader = ImageReader( DATA_DIR, DATA_LIST_PATH, input_size, center_crop_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN) image_batch, label_batch = reader.dequeue(args.batch_size) net = ICNet_BN({'data': image_batch}, is_training=True, num_classes=args.num_classes, filter_scale=args.filter_scale) sub4_recls, sub24_recls, sub124_recls = bn_common.extend_reclassifier(net) restore_var = tf.global_variables() all_trainable = [v for v in tf.trainable_variables() if ('beta' not in v.name and 'gamma' not in v.name) or args.train_beta_gamma] loss_sub4 = create_loss(sub4_recls, label_batch, args) loss_sub24 = create_loss(sub24_recls, label_batch, args) loss_sub124 = create_loss(sub124_recls, label_batch, args) l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if ('weights' in v.name) or ('kernel' in v.name)] reduced_loss = LAMBDA1 * loss_sub4 + LAMBDA2 * loss_sub24 + LAMBDA3 * loss_sub124 + tf.add_n(l2_losses) # print(tf.get_variable_scope().name) # print(','.join([v.__op.original_name_scope for v in l2_losses])) # print(','.join([v for v in tf.trainable_variables() if ('beta' in v.name or 'gamma' in v.name)])) # tf.summary.FileWriter('./summary', tf.get_default_graph()) # exit(0) # Using Poly learning rate policy base_lr = tf.constant(args.learning_rate) step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / args.num_steps), args.power)) # Gets moving_mean and moving_variance update operations from tf.GraphKeys.UPDATE_OPS if args.update_mean_var == False: update_ops = None else: update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum) grads = tf.gradients(reduced_loss, all_trainable) train_op = opt_conv.apply_gradients(zip(grads, all_trainable)) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=99) ckpt = tf.train.get_checkpoint_state(args.snapshot_dir) if ckpt and ckpt.model_checkpoint_path: loader = tf.train.Saver(var_list=restore_var) load(loader, sess, ckpt.model_checkpoint_path) else: print('Restore from pre-trained model...') net.load(args.restore_from, sess) # Start queue threads. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(args.num_steps): start_time = time.time() feed_dict = {step_ph: step} if step % args.save_pred_every == 0: loss_value, loss1, loss2, loss3, _ = sess.run([reduced_loss, loss_sub4, loss_sub24, loss_sub124, train_op], feed_dict=feed_dict) save(saver, sess, args.snapshot_dir, step) else: loss_value, loss1, loss2, loss3, _ = sess.run([reduced_loss, loss_sub4, loss_sub24, loss_sub124, train_op], feed_dict=feed_dict) duration = time.time() - start_time print('step {:d} \t total loss = {:.3f}, sub4 = {:.3f}, sub24 = {:.3f}, sub124 = {:.3f} ({:.3f} sec/step)'.format(step, loss_value, loss1, loss2, loss3, duration)) coord.request_stop() coord.join(threads) sess.close()