def run_style_transfer(input_img: T.Tensor, num_steps=1000, style_weight=10000., content_weight=1., tv_weight=0): content_img_resized = F.resize(content_img, 1024) input_img = input_img.detach()[None].permute(0, 3, 1, 2).contiguous() input_img = F.resize(input_img, 1024) input_img = T.nn.Parameter(input_img, requires_grad=True) vgg_loss = losses.StyleTransferLosses(vgg_weight_file, content_img_resized, style_img, px_content_layers, px_style_layers) vgg_loss.to(device).eval() optimizer = optim.Adam([input_img], lr=1e-3) logger.info('Optimizing pixel-wise canvas..') for _ in mon.iter_batch(range(num_steps)): optimizer.zero_grad() input = T.clamp(input_img, 0., 1.) content_score, style_score = vgg_loss(input) style_score *= style_weight content_score *= content_weight tv_score = 0. if not tv_weight else tv_weight * losses.tv_loss( input_img) loss = style_score + content_score + tv_score loss.backward(inputs=[input_img]) optimizer.step() # plot some stuffs mon.plot('pixel style loss', style_score) mon.plot('pixel content loss', content_score) if tv_weight: mon.plot('pixel tv loss', tv_score) if mon.iter % mon.print_freq == 0: mon.imwrite('pixel stylized', input) return T.clamp(input_img, 0., 1.)
def main(unused_agrv=None): """main :param args: argparse.Namespace object from argparse.parse_args(). """ # Unpack command-line arguments. train_dir = FLAGS.train_dir style_dataset = FLAGS.style_dataset model_name = FLAGS.model_name preprocess_size = [FLAGS.image_size, FLAGS.image_size] batch_size = FLAGS.batch_size n_epochs = FLAGS.n_epochs learn_rate = FLAGS.learning_rate content_weights = FLAGS.content_weights style_weights = FLAGS.style_weights num_pipe_buffer = FLAGS.num_pipe_buffer num_styles = FLAGS.num_styles train_steps = FLAGS.train_steps upsample_method = FLAGS.upsample_method # Setup input pipeline (delegate it to CPU to let GPU handle neural net) files = tf.train.match_filenames_once(train_dir + '/train-*') style_files = tf.train.match_filenames_once(style_dataset) print("style %s" % style_files) with tf.variable_scope('input_pipe'), tf.device('/cpu:0'): _, style_labels, style_grams = datapipe.style_batcher( style_files, batch_size, preprocess_size, n_epochs, num_pipe_buffer) batch_op = datapipe.batcher(files, batch_size, preprocess_size, n_epochs, num_pipe_buffer) """ Set up weight of style and content image """ content_weights = ast.literal_eval(content_weights) style_weights = ast.literal_eval(style_weights) target_grams = [] for name, val in style_weights.iteritems(): target_grams.append(style_grams[name]) # Alter the names to include a namescope that we'll use + output suffix. loss_style_layers = [] loss_style_weights = [] loss_content_layers = [] loss_content_weights = [] for key, val in style_weights.iteritems(): loss_style_layers.append(key + ':0') loss_style_weights.append(val) for key, val in content_weights.iteritems(): loss_content_layers.append(key + ':0') loss_content_weights.append(val) # Load in image transformation network into default graph. shape = [batch_size] + preprocess_size + [3] with tf.variable_scope('styleNet'): X = tf.placeholder(tf.float32, shape=shape, name='input') Y = transform(X, style_labels, num_styles, upsample_method) print(Y) # Connect vgg directly to the image transformation network. with tf.variable_scope('vgg'): vggnet = vgg16.vgg16(Y) # Get the gram matrices' tensors for the style loss features. input_img_grams = losses.get_grams(loss_style_layers) # Get the tensors for content loss features. content_layers = losses.get_layers(loss_content_layers) # Create loss function content_targets = tuple( tf.placeholder(tf.float32, shape=layer.get_shape(), name='content_input_{}'.format(i)) for i, layer in enumerate(content_layers)) cont_loss = losses.content_loss(content_layers, content_targets, loss_content_weights) style_loss = losses.style_loss(input_img_grams, target_grams, loss_style_weights) tv_loss = losses.tv_loss(Y) loss = cont_loss + style_loss + tv_loss # We do not want to train VGG, so we must grab the subset. train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='styleNet') # Setup step + optimizer global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(learn_rate).minimize( loss, global_step, train_vars) if not os.path.exists('./models'): # Dir that save final models to os.makedirs('./models') final_saver = tf.train.Saver(train_vars) # We must include local variables because of batch pipeline. init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # Begin training. print 'Starting training...' with tf.Session() as sess: # Initialization sess.run(init_op) vggnet.load_weights(vgg16.checkpoint_file(), sess) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: while not coord.should_stop(): current_step = sess.run(global_step) batch = sess.run(batch_op) # Collect content targets content_data = sess.run(content_layers, feed_dict={Y: batch}) feed_dict = {X: batch, content_targets: content_data} _, loss_out = sess.run([optimizer, loss], feed_dict=feed_dict) if (current_step % 10 == 0): print current_step, loss_out # Throw error if we reach number of steps to break after. if current_step == train_steps: print('Done training.') break except tf.errors.OutOfRangeError: print('Done training.') finally: # Save the model (the image transformation network) for later usage # in predict.py final_saver.save(sess, 'models/' + model_name + '_final.ckpt', write_meta_graph=False) coord.request_stop() coord.join(threads)
def main(args): """main :param args: argparse.Namespace object from argparse.parse_args(). """ # Unpack command-line arguments. train_dir = args.train_dir style_img_path = args.style_img_path model_name = args.model_name preprocess_size = args.preprocess_size batch_size = args.batch_size n_epochs = args.n_epochs run_name = args.run_name learn_rate = args.learn_rate loss_content_layers = args.loss_content_layers loss_style_layers = args.loss_style_layers content_weights = args.content_weights style_weights = args.style_weights num_steps_ckpt = args.num_steps_ckpt num_pipe_buffer = args.num_pipe_buffer num_steps_break = args.num_steps_break beta_val = args.beta style_target_resize = args.style_target_resize upsample_method = args.upsample_method # Load in style image that will define the model. style_img = utils.imread(style_img_path) style_img = utils.imresize(style_img, style_target_resize) style_img = style_img[np.newaxis, :].astype(np.float32) # Alter the names to include a namescope that we'll use + output suffix. loss_style_layers = ['vgg/' + i + ':0' for i in loss_style_layers] loss_content_layers = ['vgg/' + i + ':0' for i in loss_content_layers] # Get target Gram matrices from the style image. with tf.variable_scope('vgg'): X_vgg = tf.placeholder(tf.float32, shape=style_img.shape, name='input') vggnet = vgg16.vgg16(X_vgg) with tf.Session() as sess: vggnet.load_weights('libs/vgg16_weights.npz', sess) print('Precomputing target style layers.') target_grams = sess.run(utils.get_grams(loss_style_layers), feed_dict={X_vgg: style_img}) # Clean up so we can re-create vgg connected to our image network. print('Resetting default graph.') tf.reset_default_graph() # Load in image transformation network into default graph. shape = [batch_size] + preprocess_size + [3] with tf.variable_scope('img_t_net'): X = tf.placeholder(tf.float32, shape=shape, name='input') Y = create_net(X, upsample_method) # Connect vgg directly to the image transformation network. with tf.variable_scope('vgg'): vggnet = vgg16.vgg16(Y) # Get the gram matrices' tensors for the style loss features. input_img_grams = utils.get_grams(loss_style_layers) # Get the tensors for content loss features. content_layers = utils.get_layers(loss_content_layers) # Create loss function content_targets = tuple( tf.placeholder(tf.float32, shape=layer.get_shape(), name='content_input_{}'.format(i)) for i, layer in enumerate(content_layers)) cont_loss = losses.content_loss(content_layers, content_targets, content_weights) style_loss = losses.style_loss(input_img_grams, target_grams, style_weights) tv_loss = losses.tv_loss(Y) beta = tf.placeholder(tf.float32, shape=[], name='tv_scale') loss = cont_loss + style_loss + beta * tv_loss with tf.name_scope('summaries'): tf.summary.scalar('loss', loss) tf.summary.scalar('style_loss', style_loss) tf.summary.scalar('content_loss', cont_loss) tf.summary.scalar('tv_loss', beta * tv_loss) # Setup input pipeline (delegate it to CPU to let GPU handle neural net) files = tf.train.match_filenames_once(train_dir + '/train-*') with tf.variable_scope('input_pipe'), tf.device('/cpu:0'): batch_op = datapipe.batcher(files, batch_size, preprocess_size, n_epochs, num_pipe_buffer) # We do not want to train VGG, so we must grab the subset. train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='img_t_net') # Setup step + optimizer global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(learn_rate).minimize( loss, global_step, train_vars) # Setup subdirectory for this run's Tensoboard logs. if not os.path.exists('./summaries/train/'): os.makedirs('./summaries/train/') if run_name is None: current_dirs = [ name for name in os.listdir('./summaries/train/') if os.path.isdir('./summaries/train/' + name) ] name = model_name + '0' count = 0 while name in current_dirs: count += 1 name = model_name + '{}'.format(count) run_name = name # Savers and summary writers if not os.path.exists('./training'): # Dir that we'll later save .ckpts to os.makedirs('./training') if not os.path.exists('./models'): # Dir that save final models to os.makedirs('./models') saver = tf.train.Saver() final_saver = tf.train.Saver(train_vars) merged = tf.summary.merge_all() full_log_path = './summaries/train/' + run_name train_writer = tf.summary.FileWriter(full_log_path) # We must include local variables because of batch pipeline. init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # Begin training. print('Starting training...') with tf.Session() as sess: # Initialization sess.run(init_op) vggnet.load_weights('libs/vgg16_weights.npz', sess) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: while not coord.should_stop(): current_step = sess.run(global_step) batch = sess.run(batch_op) # Collect content targets content_data = sess.run(content_layers, feed_dict={Y: batch}) feed_dict = { X: batch, content_targets: content_data, beta: beta_val } if (current_step % num_steps_ckpt == 0): # Save a checkpoint save_path = 'training/' + model_name + '.ckpt' saver.save(sess, save_path, global_step=global_step) summary, _, loss_out = sess.run([merged, optimizer, loss], feed_dict=feed_dict) train_writer.add_summary(summary, current_step) print(current_step, loss_out) elif (current_step % 10 == 0): # Collect some diagnostic data for Tensorboard. summary, _, loss_out = sess.run([merged, optimizer, loss], feed_dict=feed_dict) train_writer.add_summary(summary, current_step) # Do some standard output. print(current_step, loss_out) else: _, loss_out = sess.run([optimizer, loss], feed_dict=feed_dict) # Throw error if we reach number of steps to break after. if current_step == num_steps_break: print('Done training.') break except tf.errors.OutOfRangeError: print('Done training.') finally: # Save the model (the image transformation network) for later usage # in predict.py final_saver.save(sess, 'models/' + model_name + '_final.ckpt') coord.request_stop() coord.join(threads)
def main(args): # Unpack command-line arguments. style_img_path = args.style_img_path cont_img_path = args.cont_img_path learn_rate = args.learn_rate loss_content_layers = args.loss_content_layers loss_style_layers = args.loss_style_layers content_weights = args.content_weights style_weights = args.style_weights num_steps_break = args.num_steps_break beta = args.beta style_target_resize = args.style_target_resize cont_target_resize = args.cont_target_resize output_img_path = args.output_img_path # Load in style image that will define the model. style_img = utils.imread(style_img_path) style_img = utils.imresize(style_img, style_target_resize) style_img = style_img[np.newaxis, :].astype(np.float32) # Alter the names to include a namescope that we'll use + output suffix. loss_style_layers = ['vgg/' + i + ':0' for i in loss_style_layers] loss_content_layers = ['vgg/' + i + ':0' for i in loss_content_layers] # Get target Gram matrices from the style image. with tf.variable_scope('vgg'): X_vgg = tf.placeholder(tf.float32, shape=style_img.shape, name='input') vggnet = vgg16.vgg16(X_vgg) with tf.Session() as sess: vggnet.load_weights('libs/vgg16_weights.npz', sess) print 'Precomputing target style layers.' target_grams = sess.run(utils.get_grams(loss_style_layers), feed_dict={'vgg/input:0': style_img}) # Clean up so we can re-create vgg at size of input content image for # training. print 'Resetting default graph.' tf.reset_default_graph() # Read in + resize the content image. cont_img = utils.imread(cont_img_path) cont_img = utils.imresize(cont_img, cont_target_resize) cont_img = cont_img[np.newaxis, :].astype(np.float32) # Setup VGG and initialize it with white noise image that we'll optimize. shape = cont_img.shape with tf.variable_scope('to_train'): white_noise = np.random.rand(shape[0], shape[1], shape[2], shape[3]) * 255.0 white_noise = tf.constant(white_noise.astype(np.float32)) X = tf.get_variable('input', dtype=tf.float32, initializer=white_noise) with tf.variable_scope('vgg'): vggnet = vgg16.vgg16(X) # Get the gram matrices' tensors for the style loss features. input_img_grams = utils.get_grams(loss_style_layers) # Get the tensors for content loss features. content_layers = utils.get_layers(loss_content_layers) # Get the target content features with tf.Session() as sess: vggnet.load_weights('libs/vgg16_weights.npz', sess) print 'Precomputing target content layers.' content_targets = sess.run(content_layers, feed_dict={'to_train/input:0': cont_img}) # Create loss function cont_loss = losses.content_loss(content_layers, content_targets, content_weights) style_loss = losses.style_loss(input_img_grams, target_grams, style_weights) tv_loss = losses.tv_loss(X) loss = cont_loss + style_loss + beta * tv_loss # We do not want to train VGG, so we must grab the subset. train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='to_train') # Setup step + optimizer global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(learn_rate) \ .minimize(loss, global_step, train_vars) # Initializer init_op = tf.global_variables_initializer() # Begin training with tf.Session() as sess: sess.run(init_op) vggnet.load_weights('libs/vgg16_weights.npz', sess) current_step = 0 while current_step < num_steps_break: current_step = sess.run(global_step) if (current_step % 10 == 0): # Collect some diagnostic data for Tensorboard. _, loss_out = sess.run([optimizer, loss]) # Do some standard output. print current_step, loss_out else: # optimizer.minimize(sess) _, loss_out = sess.run([optimizer, loss]) # Upon finishing, get the X tensor (our image). img_out = sess.run(X) # Save it. img_out = np.squeeze(img_out) utils.imwrite(output_img_path, img_out)
def solve(Config): gc.enable() # get the style feature style_features = losses.get_style_feature(Config) # prepare some dirs for use # tf.reset_default_graph() model_dir = Config.model_dir if not osp.exists(model_dir): os.mkdir(model_dir) # construct the graph and model # prepare the dataset images = Dataset(Config).imagedata_pipelines() # the trainnet generated = model.inference_trainnet(images) # concat the content image and the generated together to save time and feed to the vgg net one time # preprocess the generated preprocess_generated = preprocess(generated, Config) layer_infos = Vgg(Config.feature_path).build( tf.concat([preprocess_generated, images], 0)) # get the loss content_loss = losses.content_loss(layer_infos, Config.content_layers) style_loss = losses.style_loss(layer_infos, Config.style_layers, style_features) tv_loss = losses.tv_loss(generated) loss = Config.style_weight * style_loss + Config.content_weight * content_loss + Config.tv_weight * tv_loss # train op global_step = tf.Variable(0, name='global_step', trainable=False) train_op = tf.train.AdamOptimizer(Config.lr).minimize( loss, global_step=global_step) # add summary with tf.name_scope('losses'): tf.summary.scalar('content_loss', content_loss) tf.summary.scalar('style_loss', style_loss) tf.summary.scalar('tv_loss', tv_loss) with tf.name_scope('weighted_losses'): tf.summary.scalar('weighted_content_loss', content_loss * Config.content_weight) tf.summary.scalar('weighted_style_loss', style_loss * Config.style_weight) tf.summary.scalar('weighted_tv_loss', tv_loss * Config.tv_weight) tf.summary.scalar('total_loss', loss) tf.summary.image('generated', generated) tf.summary.image('original', images) summary = tf.summary.merge_all() summary_path = osp.join(model_dir, 'summary') if not osp.exists(summary_path): os.mkdir(summary_path) writer = tf.summary.FileWriter(summary_path) # the saver loader saver = tf.train.Saver(tf.global_variables()) #for var in tf.global_variables(): # print var restore = tf.train.latest_checkpoint(model_dir) # begin training work config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # restore the variables sess.run([ tf.global_variables_initializer(), tf.local_variables_initializer() ]) # if we need finetune? if Config.finetune: if restore: print 'restoring model from {}'.format(restore) saver.restore(sess, restore) else: print 'no model exist, from scratch' # pop the data queue coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) print 'begin training' start_time = time.time() local_time = time.time() for step in xrange(Config.max_iter + 1): _, loss_value = sess.run([train_op, loss]) #plt.imshow(np.uint8(gen[0,...])) if step % Config.display == 0 or step == Config.max_iter: print "{}[iterations], train loss {}, time consumes {}s".format( step, loss_value, time.time() - local_time) local_time = time.time() assert not np.isnan(loss_value), 'model with loss nan' if step != 0 and (step % Config.snapshot == 0 or step == Config.max_iter): # save the generated to see print 'adding summary and saving snapshot...' saver.save(sess, osp.join(model_dir, 'model.ckpt'), global_step=step) summary_str = sess.run(summary) writer.add_summary(summary_str, global_step=step) writer.flush() coord.request_stop() coord.join(threads) sess.close() print 'done, consumes time {}s'.format(time.time() - start_time)
def _create_tv_loss(self): self.tv_loss = tv_loss(self.x[0])