def train(): data_loader = DataLoader(data_file=configs.TRAIN_FILE) images, angles, labels = data_loader.get_data print('Image shape: {}'.format(images.shape)) print('Angles shape: {}'.format(angles.shape)) print('Labels shape: {}'.format(labels.shape)) sss = StratifiedShuffleSplit(n_splits=5, test_size=0.16, random_state=1024) for idx, [train_ids, val_ids] in enumerate(sss.split(images, labels)): train_images, val_images = images[train_ids], images[val_ids] train_angles, val_angles = angles[train_ids], angles[val_ids] train_labels, val_labels = labels[train_ids], labels[val_ids] if os.path.isfile(configs.MODEL_FILE.format(idx)): model = load_model(configs.MODEL_FILE.format(idx)) else: model = models.Resnet(input_shape=data_loader.image_shape) optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.002) model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy']) model.fit_generator( generator=gen_flow_for_two_inputs(train_images, train_angles, train_labels, batch_size=BATCH_SIZE), steps_per_epoch=np.ceil(8 * len(train_ids) / BATCH_SIZE), epochs=EPOCHS, validation_data=([val_images, val_angles], val_labels), callbacks=get_callbacks(model_name=str(idx)), use_multiprocessing=False, workers=1) print('Ford: {}'.format(idx)) model = load_model(configs.MODEL_FILE.format(idx)) p = model.predict([train_images, train_angles], batch_size=BATCH_SIZE, verbose=1) print('\nEvaluate loss on training data: {}'.format( metrics.log_loss(train_labels, p)), flush=True) p = model.predict([val_images, val_angles], batch_size=BATCH_SIZE, verbose=1) print('\nEvaluate loss on validation data: {}'.format( metrics.log_loss(val_labels, p)), flush=True)
def __init__(self): parser = argparse.ArgumentParser( description='Pytroch Cifar10 Training') parser.add_argument('--lr', default=0.01, type=float, help='learning rate\n default:0.01') parser.add_argument('--bs', default=256, type=int, help='batch size\n default:256') parser.add_argument('--gpu', default=True, help='use gpu as accelerator') parser.add_argument( '--net', default='vgg16', choices=['vgg16', 'resnet18', 'sevgg16', 'seresnet18'], help='provided network:vgg16, resnet18, sevgg16, seresnet18') parser.add_argument('--verbose', default=True, help='print some useful info') parser.add_argument('--e', default=50, type=int, help='the num of training epoch\n default:50') args = parser.parse_args() # parse and preparing params self.batch_size = args.bs self.epoches = args.e self.lr = args.lr self.use_gpu = args.gpu self.net = args.net self.model = None self.verbose = args.verbose if self.net == 'vgg16': self.model = models.VGGNet() elif self.net == 'resnet18': self.model = models.Resnet() elif self.net == 'sevgg16': self.model = models.SeVGGNet() elif self.net == 'seresnet18': self.model = models.SeResnet() if self.use_gpu and torch.cuda.is_available(): self.model.to(self.gpu) if self.verbose: print(self.model) self.loss_fn = nn.CrossEntropyLoss() self.optimizer = optim.SGD(self.model.parameters(), lr=self.lr, momentum=0.9)
# Data torch.manual_seed(args.seed) if device == 'cuda': cudnn.benchmark = True torch.cuda.manual_seed(args.seed) print('load data: ', args.dataset) train_loader, test_loader = data_loader.getDataSet(args.dataset, args.batch_size, args.test_batch_size, args.imageSize) # Model print('==> Building model..') net = models.Resnet() net = net.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) # Training def train(epoch): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 correct = 0
def main(): best_val_acc = -1.0 start_epoch = 1 trn_ds = loaders.SatClassificationDataset(LBL_DATA_DIR, SPLIT_CSV, POSITIVE_CLASS, False, trn_tfms) print('Train Samples:', len(trn_ds)) trn_dl = DataLoader(trn_ds, BATCH_SIZE, shuffle=True, num_workers=WORKERS) unlbl_ds = loaders.UnlabeledDataset(UNLBL_DATA_DIR, IMAGE_SIZE) print('Unlabeled:', len(unlbl_ds)) unlbl_dl = DataLoader(unlbl_ds, BATCH_SIZE, shuffle=True, num_workers=WORKERS) val_ds = loaders.SatClassificationDataset(LBL_DATA_DIR, SPLIT_CSV, POSITIVE_CLASS, True, val_tfms) print('Val Samples:', len(val_ds)) val_dl = DataLoader(val_ds, BATCH_SIZE, shuffle=False, num_workers=WORKERS) model = models.Resnet(visionmodels.resnet50, 2) model.to(DEVICE) ce_loss_fn = nn.CrossEntropyLoss().to(DEVICE) vat_loss_fn = vat.VATLoss(IP, EPSILON, XI).to(DEVICE) optimizer = optim.Adam(model.parameters(), lr=LR) lr_sched = optim.lr_scheduler.StepLR(optimizer, LR_STEP, gamma=LR_DECAY) trn_metrics = BookKeeping(TENSORBOARD_LOGDIR, 'trn') val_metrics = BookKeeping(TENSORBOARD_LOGDIR, 'val') if not os.path.exists(WEIGHTS_SAVE_PATH): os.mkdir(WEIGHTS_SAVE_PATH) if LOAD_CHECKPOINT is not None: checkpoint = torch.load(LOAD_CHECKPOINT, pickle_module=dill) start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer = checkpoint['optimizer'] lr_sched = checkpoint['lr_scheduler'] best_val_acc = checkpoint['best_metrics'] for epoch in range(start_epoch, EPOCHS + 1): # Train t_pbar = tqdm(trn_dl, desc=pbar_desc('train', epoch, EPOCHS, 0.0, -1.0, -1.0)) ul_iter = iter(unlbl_dl) model.train() for (xs, ys) in t_pbar: try: xs_ul, ys_ul = next(ul_iter) except StopIteration: # Reset the iterator in case we've used # up all of the images ul_iter = iter(unlbl_dl) xs_ul, ys_ul = next(ul_iter) xs = xs.to(DEVICE) ys = ys.to(DEVICE) y_pred1 = model(xs) ce_loss = ce_loss_fn(y_pred1, ys) xs_ul = xs_ul.to(DEVICE) vat_loss = vat_loss_fn(xs_ul, model, logits=True) total_loss = ce_loss + vat_loss acc = metrics.accuracy(y_pred1, ys) f1 = metrics.f1_score(y_pred1, ys) trn_metrics.update(ce=ce_loss.item(), vat=vat_loss.item(), total=total_loss.item(), f1=f1.item(), accuracy=acc.item()) optimizer.zero_grad() total_loss.backward() optimizer.step() t_pbar.set_description( pbar_desc('train', epoch, EPOCHS, total_loss.item(), acc, f1)) # Final update to training bar avg_trn_metrics = trn_metrics.get_avg_losses() t_pbar.set_description( pbar_desc('train', epoch, EPOCHS, avg_trn_metrics['total'], avg_trn_metrics['accuracy'], avg_trn_metrics['f1'])) trn_metrics.update_tensorboard(epoch) # Validate v_pbar = tqdm(val_dl, desc=pbar_desc('valid', epoch, EPOCHS, 0.0, -1.0, -1.0)) model.eval() for xs, ys in v_pbar: xs = xs.to(DEVICE) ys = ys.to(DEVICE) y_pred1 = model(xs) ce_loss = ce_loss_fn(y_pred1, ys) acc = metrics.accuracy(y_pred1, ys) f1 = metrics.f1_score(y_pred1, ys) val_metrics.update(ce=ce_loss.item(), vat=0, total=ce_loss.item(), f1=f1.item(), accuracy=acc.item()) v_pbar.set_description( pbar_desc('valid', epoch, EPOCHS, ce_loss.item(), acc, f1)) avg_val_metrics = val_metrics.get_avg_losses() avg_acc = avg_val_metrics['accuracy'] if avg_acc > best_val_acc: best_val_acc = avg_acc torch.save( model.state_dict(), f'{WEIGHTS_SAVE_PATH}/{EXP_NO:02d}-{MODEL_NAME}_epoch-{epoch:04d}_acc-{avg_acc:.3f}.pth' ) # Final update to validation bar v_pbar.set_description( pbar_desc('train', epoch, EPOCHS, avg_val_metrics['total'], avg_val_metrics['accuracy'], avg_val_metrics['f1'])) val_metrics.update_tensorboard(epoch) # Update scheduler and save checkpoint lr_sched.step(epoch=epoch) save_checkpoint(epoch, model, best_val_acc, optimizer, lr_sched)
def train(args): import models import numpy as np np.random.seed(1234) if args.dataset == 'digits': n_dim, n_out, n_channels = 8, 10, 1 X_train, y_train, X_val, y_val = data.load_digits() elif args.dataset == 'mnist': n_dim, n_out, n_channels = 28, 10, 1 X_train, y_train, X_val, y_val, _, _ = data.load_mnist() elif args.dataset == 'svhn': n_dim, n_out, n_channels = 32, 10, 3 X_train, y_train, X_val, y_val = data.load_svhn() X_train, y_train, X_val, y_val = data.prepare_dataset(X_train, y_train, X_val, y_val) elif args.dataset == 'cifar10': n_dim, n_out, n_channels = 32, 10, 3 X_train, y_train, X_val, y_val = data.load_cifar10() X_train, y_train, X_val, y_val = data.prepare_dataset(X_train, y_train, X_val, y_val) elif args.dataset == 'random': n_dim, n_out, n_channels = 2, 2, 1 X_train, y_train = data.load_noise(n=1000, d=n_dim) X_val, y_val = X_train, y_train else: raise ValueError('Invalid dataset name: %s' % args.dataset) print 'dataset loaded, dim:', X_train.shape # set up optimization params p = { 'lr' : args.lr, 'b1': args.b1, 'b2': args.b2 } # create model if args.model == 'softmax': model = models.Softmax(n_dim=n_dim, n_out=n_out, n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p) elif args.model == 'mlp': model = models.MLP(n_dim=n_dim, n_out=n_out, n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p) elif args.model == 'cnn': model = models.CNN(n_dim=n_dim, n_out=n_out, n_chan=n_channels, model=args.dataset, n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p) elif args.model == 'kcnn': model = models.KCNN(n_dim=n_dim, n_out=n_out, n_chan=n_channels, model=args.dataset, n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p) elif args.model == 'resnet': model = models.Resnet(n_dim=n_dim, n_out=n_out, n_chan=n_channels, n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p) elif args.model == 'vae': model = models.VAE(n_dim=n_dim, n_out=n_out, n_chan=n_channels, n_batch=args.n_batch, n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p, model='bernoulli' if args.dataset in ('digits', 'mnist') else 'gaussian') elif args.model == 'convvae': model = models.ConvVAE(n_dim=n_dim, n_out=n_out, n_chan=n_channels, n_batch=args.n_batch, n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p, model='bernoulli' if args.dataset in ('digits', 'mnist') else 'gaussian') elif args.model == 'convadgm': model = models.ConvADGM(n_dim=n_dim, n_out=n_out, n_chan=n_channels, n_batch=args.n_batch, n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p, model='bernoulli' if args.dataset in ('digits', 'mnist') else 'gaussian') elif args.model == 'sbn': model = models.SBN(n_dim=n_dim, n_out=n_out, n_chan=n_channels, n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p) elif args.model == 'adgm': model = models.ADGM(n_dim=n_dim, n_out=n_out, n_chan=n_channels, n_batch=args.n_batch, n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p, model='bernoulli' if args.dataset in ('digits', 'mnist') else 'gaussian') elif args.model == 'hdgm': model = models.HDGM(n_dim=n_dim, n_out=n_out, n_chan=n_channels, n_batch=args.n_batch, n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p) elif args.model == 'dadgm': model = models.DADGM(n_dim=n_dim, n_out=n_out, n_chan=n_channels, n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p) elif args.model == 'dcgan': model = models.DCGAN(n_dim=n_dim, n_out=n_out, n_chan=n_channels, n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p) elif args.model == 'ssadgm': X_train_lbl, y_train_lbl, X_train_unl, y_train_unl \ = data.split_semisup(X_train, y_train, n_lbl=args.n_labeled) model = models.SSADGM(X_labeled=X_train_lbl, y_labeled=y_train_lbl, n_out=n_out, n_superbatch=args.n_superbatch, opt_alg=args.alg, opt_params=p) X_train, y_train = X_train_unl, y_train_unl else: raise ValueError('Invalid model') # train model model.fit(X_train, y_train, X_val, y_val, n_epoch=args.epochs, n_batch=args.n_batch, logname=args.logname)
outf = 'test/' + args.network if not os.path.isdir(outf): os.makedirs(outf) device = torch.device('cuda:' + str(args.gpu) if torch.cuda.is_available() else 'cpu') print("Random Seed: ", args.seed) torch.manual_seed(args.seed) if device == 'cuda': torch.cuda.manual_seed(args.seed) print('Load model') if args.network == 'resnet': model = models.Resnet() args.eva_iter = 1 elif args.network == 'sdenet': model = models.SDENet_mnist(layer_depth=6, num_classes=10, dim=64) elif args.network == 'mc_dropout': model = models.Resnet_dropout() model.load_state_dict(torch.load(args.pre_trained_net)) model = model.to(device) model_dict = model.state_dict() print('load target data: ', args.dataset) _, test_loader = data_loader.getDataSet(args.dataset, args.batch_size, args.test_batch_size, args.imageSize) print('load non target data: ', args.out_dataset)
def main(args): setproctitle.setproctitle('hdrnet_run') inputs = get_input_list(args.input) # -------- Load params ---------------------------------------------------- config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: checkpoint_path = tf.train.latest_checkpoint(args.checkpoint_dir) if checkpoint_path is None: log.error('Could not find a checkpoint in {}'.format(args.checkpoint_dir)) return # -------- Setup graph ---------------------------------------------------- tf.reset_default_graph() t_fullres_input = tf.placeholder(tf.float32, (1, width, heighth, 3)) target = tf.placeholder(tf.float32, (1, width, heighth, 3)) t_lowres_input = utils.blur(5,t_fullres_input) img_low = tf.image.resize_images( t_lowres_input, [width/args.scale, heighth/args.scale], method=tf.image.ResizeMethod.BICUBIC) img_high = utils.Getfilter(5,t_fullres_input) with tf.variable_scope('inference'): prediction = models.Resnet(img_low,img_high,t_fullres_input) ssim = MultiScaleSSIM(target,prediction) psnr = metrics.psnr(target, prediction) saver = tf.train.Saver() start = time.clock() with tf.Session(config=config) as sess: log.info('Restoring weights from {}'.format(checkpoint_path)) saver.restore(sess, checkpoint_path) SSIM = 0 PSNR = 0 for idx, input_path in enumerate(inputs): target_path = args.target + input_path.split('/')[2] log.info("Processing {}".format(input_path,target_path)) im_input = cv2.imread(input_path, -1) # -1 means read as is, no conversions. im_target = cv2.imread(target_path, -1) if im_input.shape[2] == 4: log.info("Input {} has 4 channels, dropping alpha".format(input_path)) im_input = im_input[:, :, :3] im_target = im_target[:, :, :3] im_input = np.flip(im_input, 2) # OpenCV reads BGR, convert back to RGB. im_target = np.flip(im_target, 2) im_input = skimage.img_as_float(im_input) im_target = skimage.img_as_float(im_target) im_input = im_input[np.newaxis, :, :, :] im_target = im_target[np.newaxis, :, :, :] feed_dict = { t_fullres_input: im_input, target:im_target } ssim1,psnr1 = sess.run([ssim,psnr], feed_dict=feed_dict) SSIM = SSIM + ssim1 PSNR = PSNR + psnr1 if idx>=1000: break print("SSIM:%s,PSNR:%s"%(SSIM/1000,PSNR/1000)) end = time.clock() print("耗时%s秒"%str(end-start))
def main(args): setproctitle.setproctitle('hdrnet_run') inputs = get_input_list(args.input) # -------- Load params ---------------------------------------------------- config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: checkpoint_path = tf.train.latest_checkpoint(args.checkpoint_dir) if checkpoint_path is None: log.error('Could not find a checkpoint in {}'.format( args.checkpoint_dir)) return # metapath = ".".join([checkpoint_path, "meta"]) # log.info('Loading graph from {}'.format(metapath)) # tf.train.import_meta_graph(metapath) # model_params = utils.get_model_params(sess) # -------- Setup graph ---------------------------------------------------- tf.reset_default_graph() t_fullres_input = tf.placeholder(tf.float32, (1, width, heighth, 3)) t_lowres_input = utils.blur(5, t_fullres_input) img_low = tf.image.resize_images( t_lowres_input, [width / args.scale, heighth / args.scale], method=tf.image.ResizeMethod.BICUBIC) img_high = utils.Getfilter(5, t_fullres_input) with tf.variable_scope('inference'): prediction = models.Resnet(img_low, img_high, t_fullres_input) output = tf.cast(255.0 * tf.squeeze(tf.clip_by_value(prediction, 0, 1)), tf.uint8) saver = tf.train.Saver() with tf.Session(config=config) as sess: log.info('Restoring weights from {}'.format(checkpoint_path)) saver.restore(sess, checkpoint_path) for idx, input_path in enumerate(inputs): log.info("Processing {}".format(input_path)) im_input = cv2.imread(input_path, -1) # -1 means read as is, no conversions. if im_input.shape[2] == 4: log.info("Input {} has 4 channels, dropping alpha".format( input_path)) im_input = im_input[:, :, :3] im_input = np.flip(im_input, 2) # OpenCV reads BGR, convert back to RGB. # log.info("Max level: {}".format(np.amax(im_input[:, :, 0]))) # log.info("Max level: {}".format(np.amax(im_input[:, :, 1]))) # log.info("Max level: {}".format(np.amax(im_input[:, :, 2]))) # HACK for HDR+. if im_input.dtype == np.uint16 and args.hdrp: log.info( "Using HDR+ hack for uint16 input. Assuming input white level is 32767." ) # im_input = im_input / 32767.0 # im_input = im_input / 32767.0 /2 # im_input = im_input / (1.0*2**16) im_input = skimage.img_as_float(im_input) else: im_input = skimage.img_as_float(im_input) # Make or Load lowres image # lowres_input = skimage.transform.resize( # im_input, [im_input.shape[0]/args.scale, im_input.shape[1]/args.scale], order = 0) # im_input = cv2.resize(lowres_input,(2000,1500),interpolation=cv2.INTER_CUBIC) # im_input1 = utils.blur(im_input) # lowres_input = cv2.resize(im_input1, (im_input1.shape[1]/args.scale,im_input1.shape[0]/args.scale), # interpolation=cv2.INTER_CUBIC ) fname = os.path.splitext(os.path.basename(input_path))[0] output_path = os.path.join(args.output, fname + ".png") basedir = os.path.dirname(output_path) im_input = im_input[np.newaxis, :, :, :] # lowres_input = lowres_input[np.newaxis, :, :, :] feed_dict = { t_fullres_input: im_input # t_lowres_input: lowres_input } out_ = sess.run(output, feed_dict=feed_dict) if not os.path.exists(basedir): os.makedirs(basedir) skimage.io.imsave(output_path, out_)
def main(args, data_params): procname = os.path.basename(args.checkpoint_dir) #setproctitle.setproctitle('hdrnet_{}'.format(procname)) log.info('Preparing summary and checkpoint directory {}'.format( args.checkpoint_dir)) if not os.path.exists(args.checkpoint_dir): os.makedirs(args.checkpoint_dir) tf.set_random_seed(1234) # Make experiments repeatable # Select an architecture # Add model parameters to the graph (so they are saved to disk at checkpoint) # --- Train/Test datasets --------------------------------------------------- data_pipe = getattr(dp, args.data_pipeline) with tf.variable_scope('train_data'): train_data_pipeline = data_pipe( args.data_dir, shuffle=True, batch_size=args.batch_size, nthreads=args.data_threads, fliplr=args.fliplr, flipud=args.flipud, rotate=args.rotate, random_crop=args.random_crop, params=data_params, output_resolution=args.output_resolution,scale=args.scale) train_samples = train_data_pipeline.samples train_samples['high_input'] = Getfilter(5,train_samples['image_input']) train_samples['lowres_input1'] = blur(5,train_samples['lowres_input']) train_samples['low_input'] = tf.image.resize_images(train_samples['lowres_input1'], [args.output_resolution[0]/args.scale, args.output_resolution[1]/args.scale], method = tf.image.ResizeMethod.BICUBIC) if args.eval_data_dir is not None: with tf.variable_scope('eval_data'): eval_data_pipeline = data_pipe( args.eval_data_dir, shuffle=False, batch_size=1, nthreads=1, fliplr=False, flipud=False, rotate=False, random_crop=False, params=data_params, output_resolution=args.output_resolution,scale=args.scale) eval_samples = train_data_pipeline.samples # --------------------------------------------------------------------------- swaps = np.reshape(np.random.randint(0, 2, args.batch_size), [args.batch_size, 1]) swaps = tf.convert_to_tensor(swaps) swaps = tf.cast(swaps, tf.float32) swaps1 = np.reshape(np.random.randint(0, 2, args.batch_size), [args.batch_size, 1]) swaps1 = tf.convert_to_tensor(swaps1) swaps1 = tf.cast(swaps1, tf.float32) # Training graph with tf.variable_scope('inference'): prediction = models.Resnet(train_samples['low_input'],train_samples['high_input'],train_samples['image_input']) loss,loss_content,loss_color,loss_filter,loss_texture,loss_tv,discim_accuracy,discim_accuracy1 =\ metrics.l2_loss(train_samples['image_output'], prediction, swaps, swaps1, args.batch_size) psnr = metrics.psnr(train_samples['image_output'], prediction) loss_ssim = MultiScaleSSIM(train_samples['image_output'],prediction) # Evaluation graph if args.eval_data_dir is not None: with tf.name_scope('eval'): with tf.variable_scope('inference', reuse=True): eval_prediction = models.Resnet( eval_samples['low_input'],eval_samples['high_input'],eval_samples['image_input']) eval_psnr = metrics.psnr(eval_samples['image_output'], eval_prediction) # Optimizer model_vars = [v for v in tf.global_variables() if not v.name.startswith("inference/l2_loss/discriminator") or v.name.startswith("inference/l2_loss/discriminator1")] discriminator_vars = [v for v in tf.global_variables() if v.name.startswith("inference/l2_loss/discriminator")] discriminator_vars1 = [v for v in tf.global_variables() if v.name.startswith("inference/l2_loss/discriminator1")] global_step = tf.contrib.framework.get_or_create_global_step() with tf.name_scope('optimizer'): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) updates = tf.group(*update_ops, name='update_ops') log.info("Adding {} update ops".format(len(update_ops))) reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) if reg_losses and args.weight_decay is not None and args.weight_decay > 0: print("Regularization losses:") for rl in reg_losses: print(" ", rl.name) opt_loss = loss + args.weight_decay*sum(reg_losses) else: print("No regularization.") opt_loss = loss with tf.control_dependencies([updates]): opt = tf.train.AdamOptimizer(args.learning_rate) minimize = opt.minimize(opt_loss, name='optimizer', global_step=global_step,var_list=model_vars) minimize1 = opt.minimize(-loss_filter, name='optimizer1', global_step=global_step,var_list=discriminator_vars) minimize2 = opt.minimize(-loss_texture, name='optimizer2', global_step=global_step, var_list=discriminator_vars1) # Average loss and psnr for display with tf.name_scope("moving_averages"): ema = tf.train.ExponentialMovingAverage(decay=0.99) update_ma = ema.apply([loss,loss_content,loss_color,loss_filter,loss_texture,loss_tv,discim_accuracy,discim_accuracy1,psnr,loss_ssim]) loss = ema.average(loss) loss_content=ema.average(loss_content) loss_color=ema.average(loss_color) loss_filter=ema.average(loss_filter) loss_texture=ema.average(loss_texture) loss_tv=ema.average(loss_tv) discim_accuracy = ema.average(discim_accuracy) discim_accuracy1 = ema.average(discim_accuracy1) psnr = ema.average(psnr) loss_ssim = ema.average(loss_ssim) # Training stepper operation train_op = tf.group(minimize,minimize1,minimize2,update_ma) # Save a few graphs to tensorboard summaries = [ tf.summary.scalar('loss', loss), tf.summary.scalar('loss_content',loss_content), tf.summary.scalar('loss_color',loss_color), tf.summary.scalar('loss_filter', loss_filter), tf.summary.scalar('loss_texture', loss_texture), tf.summary.scalar('loss_tv', loss_tv), tf.summary.scalar('discim_accuracy',discim_accuracy), tf.summary.scalar('discim_accuracy1', discim_accuracy1), tf.summary.scalar('psnr', psnr), tf.summary.scalar('ssim', loss_ssim), tf.summary.scalar('learning_rate', args.learning_rate), tf.summary.scalar('batch_size', args.batch_size), ] log_fetches = { "loss_content":loss_content, "loss_color":loss_color, "loss_filter":loss_filter, "loss_texture": loss_texture, "loss_tv":loss_tv, "discim_accuracy":discim_accuracy, "discim_accuracy1": discim_accuracy1, "step": global_step, "loss": loss, "psnr": psnr, "loss_ssim":loss_ssim} model_vars = [v for v in tf.global_variables() if not v.name.startswith("inference/l2_loss/discriminator" or "inference/l2_loss/discriminator1")] discriminator_vars = [v for v in tf.global_variables() if v.name.startswith("inference/l2_loss/discriminator")] discriminator_vars1 = [v for v in tf.global_variables() if v.name.startswith("inference/l2_loss/discriminator1")] # Train config config = tf.ConfigProto() config.gpu_options.allow_growth = True # Do not canibalize the entire GPU sv = tf.train.Supervisor( local_init_op=tf.initialize_variables(discriminator_vars), saver=tf.train.Saver(var_list=model_vars,max_to_keep=100), logdir=args.checkpoint_dir, save_summaries_secs=args.summary_interval, save_model_secs=args.checkpoint_interval) # Train loop with sv.managed_session(config=config) as sess: sv.loop(args.log_interval, log_hook, (sess,log_fetches)) last_eval = time.time() while True: if sv.should_stop(): log.info("stopping supervisor") break try: step, _ = sess.run([global_step, train_op]) since_eval = time.time()-last_eval if args.eval_data_dir is not None and since_eval > args.eval_interval: log.info("Evaluating on {} images at step {}".format( eval_data_pipeline.nsamples, step)) p_ = 0 eval_data_pipeline.nsamples = 3 for it in range(eval_data_pipeline.nsamples): p_ += sess.run(eval_psnr) p_ /= eval_data_pipeline.nsamples sv.summary_writer.add_summary(tf.Summary(value=[ tf.Summary.Value(tag="psnr/eval", simple_value=p_)]), global_step=step) log.info(" Evaluation PSNR = {:.1f} dB".format(p_)) last_eval = time.time() except tf.errors.AbortedError: log.error("Aborted") break except KeyboardInterrupt: break chkpt_path = os.path.join(args.checkpoint_dir, 'on_stop.ckpt') log.info("Training complete, saving chkpt {}".format(chkpt_path)) sv.saver.save(sess, chkpt_path) sv.request_stop()