def content_loss(target, prediction, batch_size): CONTENT_LAYER = 'relu5_4' vgg_dir = '/gdata/huangjie/hdrnet/vgg_pretrained/imagenet-vgg-verydeep-19.mat' enhanced_vgg = vgg.net(vgg_dir, vgg.preprocess(prediction * 255)) dslr_vgg = vgg.net(vgg_dir, vgg.preprocess(target * 255)) content_size = utils._tensor_size(dslr_vgg[CONTENT_LAYER]) * batch_size loss_content = 2 * tf.nn.l2_loss(enhanced_vgg[CONTENT_LAYER] - dslr_vgg[CONTENT_LAYER]) / content_size return tf.reduce_mean(loss_content)
def get_content_features(content_path, content_layers): with tf.Graph().as_default() as g: image = tf.expand_dims(reader.get_image(content_path, FLAGS.IMAGE_SIZE), 0) net, _ = vgg.net(FLAGS.VGG_PATH, image) layers = [] for layer in content_layers: layers.append(net[layer]) with tf.Session() as sess: return sess.run(layers + [image])
def get_style_features(style_paths, style_layers): with tf.Graph().as_default() as g: size = int(round(FLAGS.IMAGE_SIZE * FLAGS.STYLE_SCALE)) images = tf.pack([reader.get_image(path, size) for path in style_paths]) net, _ = vgg.net(FLAGS.VGG_PATH, images) features = [] for layer in style_layers: features.append(gram(net[layer])) with tf.Session() as sess: return sess.run(features)
def get_style_features(style_paths, style_layers, net_type): with tf.Graph().as_default() as g: size = int(round(FLAGS.image_size * FLAGS.style_scale)) images = tf.stack( [reader.get_image(path, size) for path in style_paths]) net, _ = vgg.net(FLAGS.vgg_path, images, net_type) features = [] for layer in style_layers: features.append(model.gram(net[layer], FLAGS.batch_size)) with tf.Session() as sess: return sess.run(features)
def main(argv=None): style_paths = FLAGS.STYLE_IMAGES.split(',') style_layers = FLAGS.STYLE_LAYERS.split(',') content_path = FLAGS.CONTENT_IMAGE content_layers = FLAGS.CONTENT_LAYERS.split(',') style_features_t = get_style_features(style_paths, style_layers) res = get_content_features(content_path, content_layers) content_features_t, image_t = res[:-1], res[-1] image = tf.constant(image_t) random = tf.random_normal(image_t.shape) initial = tf.Variable(random if FLAGS.RANDOM_INIT else image) net, _ = vgg.net(FLAGS.VGG_PATH, initial) content_loss = 0 for content_features, layer in zip(content_features_t, content_layers): layer_size = tf.size(content_features) content_loss += tf.nn.l2_loss( net[layer] - content_features) / tf.to_float(layer_size) content_loss = FLAGS.CONTENT_WEIGHT * content_loss / len(content_layers) style_loss = 0 for style_gram, layer in zip(style_features_t, style_layers): layer_size = tf.size(style_gram) style_loss += tf.nn.l2_loss(gram(net[layer]) - style_gram) / tf.to_float(layer_size) #style_loss += tf.sqrt(tf.reduce_sum(tf.pow(gram(net[layer]) - style_gram, 2))) style_loss = FLAGS.STYLE_WEIGHT * style_loss tv_loss = FLAGS.TV_WEIGHT * total_variation_loss(initial) total_loss = content_loss + style_loss + tv_loss train_op = tf.train.AdamOptimizer(FLAGS.LEARNING_RATE).minimize(total_loss) output_image = tf.image.encode_png( tf.saturate_cast(tf.squeeze(initial) + reader.mean_pixel, tf.uint8)) with tf.Session() as sess: sess.run(tf.initialize_all_variables()) start_time = time.time() for step in range(FLAGS.NUM_ITERATIONS): _, loss_t, cl, sl = sess.run( [train_op, total_loss, content_loss, style_loss]) elapsed = time.time() - start_time start_time = time.time() print(step, elapsed, loss_t, cl, sl) image_t = sess.run(output_image) with open('out.png', 'wb') as f: f.write(image_t)
def main(argv=None): style_paths = FLAGS.STYLE_IMAGES.split(',') style_layers = FLAGS.STYLE_LAYERS.split(',') content_path = FLAGS.CONTENT_IMAGE content_layers = FLAGS.CONTENT_LAYERS.split(',') style_features_t = get_style_features(style_paths, style_layers) res = get_content_features(content_path, content_layers) content_features_t, image_t = res[:-1], res[-1] image = tf.constant(image_t) random = tf.random_normal(image_t.shape) initial = tf.Variable(random if FLAGS.RANDOM_INIT else image) net, _ = vgg.net(FLAGS.VGG_PATH, initial) content_loss = 0 for content_features, layer in zip(content_features_t, content_layers): layer_size = tf.size(content_features) content_loss += tf.nn.l2_loss(net[layer] - content_features) / tf.to_float(layer_size) content_loss = FLAGS.CONTENT_WEIGHT * content_loss / len(content_layers) style_loss = 0 for style_gram, layer in zip(style_features_t, style_layers): layer_size = tf.size(style_gram) style_loss += tf.nn.l2_loss(gram(net[layer]) - style_gram) / tf.to_float(layer_size) #style_loss += tf.sqrt(tf.reduce_sum(tf.pow(gram(net[layer]) - style_gram, 2))) style_loss = FLAGS.STYLE_WEIGHT * style_loss tv_loss = FLAGS.TV_WEIGHT * total_variation_loss(initial) total_loss = content_loss + style_loss + tv_loss train_op = tf.train.AdamOptimizer(FLAGS.LEARNING_RATE).minimize(total_loss) output_image = tf.image.encode_png(tf.saturate_cast(tf.squeeze(initial) + reader.mean_pixel, tf.uint8)) with tf.Session() as sess: sess.run(tf.initialize_all_variables()) start_time = time.time() for step in range(FLAGS.NUM_ITERATIONS): _, loss_t, cl, sl = sess.run([train_op, total_loss, content_loss, style_loss]) elapsed = time.time() - start_time start_time = time.time() print(step, elapsed, loss_t, cl, sl) image_t = sess.run(output_image) with open('out.png', 'wb') as f: f.write(image_t)
def main(): content_path, style_path, width, style_scale = sys.argv[1:] width = int(width) style_scale = float(style_scale) content_image = imread(content_path) style_image = imread(style_path) if width > 0: new_shape = (int(math.floor(float(content_image.shape[0]) / content_image.shape[1] * width)), width) content_image = sm.imresize(content_image, new_shape) if style_scale > 0: style_image = sm.imresize(style_image, style_scale) shape = (1,) + content_image.shape style_shape = (1,) + style_image.shape content_features = {} style_features = {} g = tf.Graph() with g.as_default(), g.device('/cpu:0'), tf.Session() as sess: image = tf.placeholder('float', shape=shape) net, mean_pixel = vgg.net(VGG_PATH, image) content_pre = np.array([vgg.preprocess(content_image, mean_pixel)]) content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval( feed_dict={image: content_pre}) g = tf.Graph() with g.as_default(), g.device('/cpu:0'), tf.Session() as sess: image = tf.placeholder('float', shape=style_shape) net, _ = vgg.net(VGG_PATH, image) style_pre = np.array([vgg.preprocess(style_image, mean_pixel)]) for layer in STYLE_LAYERS: features = net[layer].eval(feed_dict={image: style_pre}) features = np.reshape(features, (-1, features.shape[3])) grammatrix = np.matmul(features.T, features) style_features[layer] = grammatrix g = tf.Graph() with g.as_default(): global_step = tf.Variable(0, trainable=False) noise = np.random.normal(size=shape, scale=np.std(content_image) * 0.1) content_pre = vgg.preprocess(content_image, mean_pixel) init = content_pre * (1 - NOISE_RATIO) + noise * NOISE_RATIO init = init.astype('float32') image = tf.Variable(init) net, _ = vgg.net(VGG_PATH, image) content_loss = tf.nn.l2_loss( net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) style_losses = [] for i in STYLE_LAYERS: layer = net[i] _, height, width, number = map(lambda i: i.value, layer.get_shape()) feats = tf.reshape(layer, (-1, number)) gram = tf.matmul(tf.transpose(feats), feats) style_gram = style_features[i] style_losses.append(tf.nn.l2_loss(gram - style_gram) / (4.0 * number ** 2 * (height * width) ** 2)) style_loss = reduce(tf.add, style_losses) / len(style_losses) loss = ALPHA * content_loss + BETA * style_loss learning_rate = tf.train.exponential_decay(LEARNING_RATE_INITIAL, global_step, LEARNING_DECAY_STEPS, LEARNING_DECAY_BASE, staircase=True) train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step) with tf.Session() as sess: sess.run(tf.initialize_all_variables()) for i in range(100000): print 'i = %d' % i imsave('%05d.jpg' % i, vgg.unprocess( image.eval().reshape(shape[1:]), mean_pixel)) train_step.run()
def stylize(network, initial, content, styles, iterations, content_weight, style_weight, style_blend_weights, tv_weight, learning_rate, print_iterations=None, checkpoint_iterations=None): shape = (1,) + content.shape style_shapes = [(1,) + style.shape for style in styles] content_features = {} style_features = [{} for _ in styles] # compute content features in feedforward mode g = tf.Graph() with g.as_default(), g.device('/cpu:0'), tf.Session() as sess: image = tf.placeholder('float', shape=shape) net, mean_pixel = vgg.net(network, image) content_pre = np.array([vgg.preprocess(content, mean_pixel)]) content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval( feed_dict={image: content_pre}) # compute style features in feedforward mode for i in range(len(styles)): g = tf.Graph() with g.as_default(), g.device('/cpu:0'), tf.Session() as sess: image = tf.placeholder('float', shape=style_shapes[i]) net, _ = vgg.net(network, image) style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)]) for layer in STYLE_LAYERS: features = net[layer].eval(feed_dict={image: style_pre}) print 'Initial feature shape: ', features.shape features = np.reshape(features, (-1, features.shape[3])) #mask = np.zeros_like(features) #mask[:49664/2, :] = 1 #print 'Mask shape', mask.shape print 'Final features shape', features.shape #features = features*mask gram = np.matmul(features.T, features) / features.size print 'Gram matrix shape: ', gram.shape style_features[i][layer] = gram #sys.exit() # make stylized image using backpropogation with tf.Graph().as_default(): if initial is None: noise = np.random.normal(size=shape, scale=np.std(content) * 0.1) initial = tf.random_normal(shape) * 0.256 else: initial = np.array([vgg.preprocess(initial, mean_pixel)]) initial = initial.astype('float32') image = tf.Variable(initial) net, _ = vgg.net(network, image) # content loss content_loss = content_weight * (2 * tf.nn.l2_loss( net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) / content_features[CONTENT_LAYER].size) # style loss style_loss = 0 for i in range(len(styles)): style_losses = [] for style_layer in STYLE_LAYERS: layer = net[style_layer] _, height, width, number = map(lambda i: i.value, layer.get_shape()) print 'Height, width, number', height, width, number size = height * width * number feats = tf.reshape(layer, (-1, number)) #print tf.shape(feats).as_list() print 'Height', height print 'Weight', width print 'Number', number print 'Style features shape', style_features[i][style_layer].shape print style_layer if style_layer == 'relu2_1': mask = np.zeros((height*width, number), dtype=np.float32) temp = imread('emma/emma_test_mask.jpg').astype(np.float32) c = temp.reshape(height,2,width,2) temp = c.max(axis=1).max(axis=2) print temp.shape maskt = np.reshape(temp, (height*width,)) maskt = maskt > 100 for d in xrange(number): mask[:,d] = maskt print 'Mask shape', mask.shape #b = mask.reshape(height*width*2, 2, number/2,2) #mask = b.max(axis=1).max(axis=2) #print 'New mask shape', mask.shape else: mask = np.zeros((height*width, number), dtype=np.float32) maskt = np.reshape(imread('emma/emma_test_mask.jpg').astype(np.float32), (height*width,)) maskt = maskt > 100 for d in xrange(number): mask[:,d] = maskt print 'Mask shape', mask.shape if i == 0: mask = tf.constant(mask) print 'Mask shape', map(lambda i: i.value, mask.get_shape()) feats = tf.mul(feats,mask) gram = tf.matmul(tf.transpose(feats), feats) / size style_gram = style_features[i][style_layer] style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size) else: mask2 = mask < 1 feats2 = tf.mul(feats,mask2) gram2 = tf.matmul(tf.transpose(feats2), feats2) / size style_gram = style_features[i][style_layer] style_losses.append(2 * tf.nn.l2_loss(gram2 - style_gram) / style_gram.size) style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses) # total variation denoising tv_y_size = _tensor_size(image[:,1:,:,:]) tv_x_size = _tensor_size(image[:,:,1:,:]) tv_loss = tv_weight * 2 * ( (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) / tv_y_size) + (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) / tv_x_size)) # overall loss loss = content_loss + style_loss + tv_loss # optimizer setup train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss) def print_progress(i, last=False): if print_iterations is not None: if i is not None and i % print_iterations == 0 or last: print >> stderr, ' content loss: %g' % content_loss.eval() print >> stderr, ' style loss: %g' % style_loss.eval() print >> stderr, ' tv loss: %g' % tv_loss.eval() print >> stderr, ' total loss: %g' % loss.eval() # optimization best_loss = float('inf') best = None with tf.Session() as sess: sess.run(tf.initialize_all_variables()) for i in range(iterations): print_progress(i) print >> stderr, 'Iteration %d/%d' % (i + 1, iterations) train_step.run() if (checkpoint_iterations is not None and i % checkpoint_iterations == 0) or i == iterations - 1: this_loss = loss.eval() if this_loss < best_loss: best_loss = this_loss best = image.eval() print_progress(None, i == iterations - 1) if i % 10 == 0 and best is not None: tmp_img = vgg.unprocess(best.reshape(shape[1:]), mean_pixel) imsave("iter" + str(i) + ".jpg", tmp_img) return vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
def stylize(network, initial, content, styles, iterations, content_weight, style_weight, style_blend_weights, tv_weight, learning_rate, print_iterations=None, checkpoint_iterations=None, print_image_iterations=False): shape = (1,) + content.shape style_shapes = [(1,) + style.shape for style in styles] content_features = {} style_features = [{} for _ in styles] # compute content features in feedforward mode g = tf.Graph() with g.as_default(), g.device('/cpu:0'), tf.Session() as sess: image = tf.placeholder('float', shape=shape) net, mean_pixel = vgg.net(network, image) content_pre = np.array([vgg.preprocess(content, mean_pixel)]) content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval( feed_dict={image: content_pre}) # compute style features in feedforward mode for i in range(len(styles)): g = tf.Graph() with g.as_default(), g.device('/cpu:0'), tf.Session() as sess: image = tf.placeholder('float', shape=style_shapes[i]) net, _ = vgg.net(network, image) style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)]) for layer in STYLE_LAYERS: features = net[layer].eval(feed_dict={image: style_pre}) features = np.reshape(features, (-1, features.shape[3])) gram = np.matmul(features.T, features) / features.size style_features[i][layer] = gram # make stylized image using backpropogation with tf.Graph().as_default(): if initial is None: noise = np.random.normal(size=shape, scale=np.std(content) * 0.1) initial = tf.random_normal(shape) * 0.256 else: initial = np.array([vgg.preprocess(initial, mean_pixel)]) initial = initial.astype('float32') image = tf.Variable(initial) net, _ = vgg.net(network, image) # content loss content_loss = content_weight * (2 * tf.nn.l2_loss( net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) / content_features[CONTENT_LAYER].size) # style loss style_loss = 0 for i in range(len(styles)): style_losses = [] for style_layer in STYLE_LAYERS: layer = net[style_layer] _, height, width, number = map(lambda i: i.value, layer.get_shape()) size = height * width * number feats = tf.reshape(layer, (-1, number)) gram = tf.matmul(tf.transpose(feats), feats) / size style_gram = style_features[i][style_layer] style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size) style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses) # total variation denoising tv_y_size = _tensor_size(image[:,1:,:,:]) tv_x_size = _tensor_size(image[:,:,1:,:]) tv_loss = tv_weight * 2 * ( (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) / tv_y_size) + (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) / tv_x_size)) # overall loss loss = content_loss + style_loss + tv_loss # optimizer setup train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss) def print_progress(i, last=False): if print_iterations is not None: if i is not None and i % print_iterations == 0 or last: print >> stderr, ' content loss: %g' % content_loss.eval() print >> stderr, ' style loss: %g' % style_loss.eval() print >> stderr, ' tv loss: %g' % tv_loss.eval() print >> stderr, ' total loss: %g' % loss.eval() # optimization best_loss = float('inf') best = None with tf.Session() as sess: sess.run(tf.initialize_all_variables()) for i in range(iterations): print_progress(i) print >> stderr, 'Iteration %d/%d' % (i + 1, iterations) train_step.run() if (checkpoint_iterations is not None and i % checkpoint_iterations == 0) or i == iterations - 1: this_loss = loss.eval() if this_loss < best_loss: best_loss = this_loss best = image.eval() print_progress(None, i == iterations - 1) if (i % 100 == 0) and (print_image_iterations): temp_image = vgg.unprocess(best.reshape(shape[1:]), mean_pixel) temp_output = 'iteration_' + str(i) + '.jpg' imsave(temp_output, temp_image) return vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
def stylize(network, initial, content, styles, iterations, content_weight, style_weight, style_blend_weights, tv_weight, learning_rate, print_iterations=None, checkpoint_iterations=None): """ Stylize images. This function yields tuples (iteration, image); `iteration` is None if this is the final image (the last iteration). Other tuples are yielded every `checkpoint_iterations` iterations. :rtype: iterator[tuple[int|None,image]] """ shape = (1,) + content.shape style_shapes = [(1,) + style.shape for style in styles] content_features = {} style_features = [{} for _ in styles] # compute content features in feedforward mode g = tf.Graph() with g.as_default(), g.device('/cpu:0'), tf.Session() as sess: image = tf.placeholder('float', shape=shape) net, mean_pixel = vgg.net(network, image) content_pre = np.array([vgg.preprocess(content, mean_pixel)]) content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval( feed_dict={image: content_pre}) # compute style features in feedforward mode for i in range(len(styles)): g = tf.Graph() with g.as_default(), g.device('/cpu:0'), tf.Session() as sess: image = tf.placeholder('float', shape=style_shapes[i]) net, _ = vgg.net(network, image) style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)]) for layer in STYLE_LAYERS: features = net[layer].eval(feed_dict={image: style_pre}) features = np.reshape(features, (-1, features.shape[3])) gram = np.matmul(features.T, features) / features.size style_features[i][layer] = gram # make stylized image using backpropogation with tf.Graph().as_default(): if initial is None: noise = np.random.normal(size=shape, scale=np.std(content) * 0.1) initial = tf.random_normal(shape) * 0.256 else: initial = np.array([vgg.preprocess(initial, mean_pixel)]) initial = initial.astype('float32') image = tf.Variable(initial) net, _ = vgg.net(network, image) # content loss content_loss = content_weight * (2 * tf.nn.l2_loss( net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) / content_features[CONTENT_LAYER].size) # style loss style_loss = 0 for i in range(len(styles)): style_losses = [] for style_layer in STYLE_LAYERS: layer = net[style_layer] _, height, width, number = map(lambda i: i.value, layer.get_shape()) size = height * width * number feats = tf.reshape(layer, (-1, number)) gram = tf.matmul(tf.transpose(feats), feats) / size style_gram = style_features[i][style_layer] style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size) style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses) # total variation denoising tv_y_size = _tensor_size(image[:,1:,:,:]) tv_x_size = _tensor_size(image[:,:,1:,:]) tv_loss = tv_weight * 2 * ( (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) / tv_y_size) + (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) / tv_x_size)) # overall loss loss = content_loss + style_loss + tv_loss # optimizer setup train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss) def print_progress(i, last=False): global timenow stderr.write('Iteration %d/%d, time: %dms\n' % (i + 1, iterations, current_milli_time() - timenow)) timenow = current_milli_time() if last or (print_iterations and i % print_iterations == 0): stderr.write(' content loss: %g\n' % content_loss.eval()) stderr.write(' style loss: %g\n' % style_loss.eval()) stderr.write(' tv loss: %g\n' % tv_loss.eval()) stderr.write(' total loss: %g\n' % loss.eval()) # optimization best_loss = float('inf') best = None with tf.Session() as sess: sess.run(tf.initialize_all_variables()) for i in range(iterations): last_step = (i == iterations - 1) print_progress(i, last=last_step) train_step.run() if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step: this_loss = loss.eval() if this_loss < best_loss: best_loss = this_loss best = image.eval() yield ( (None if last_step else i), vgg.unprocess(best.reshape(shape[1:]), mean_pixel) )
def main(argv=None): run_id = FLAGS.NAME if FLAGS.NAME else str(uuid.uuid4()) model_path = '%s/%s' % (FLAGS.MODEL_PATH, run_id) if not os.path.exists(model_path): os.makedirs(model_path) summary_path = '%s/%s' % (FLAGS.SUMMARY_PATH, run_id) if not os.path.exists(summary_path): os.makedirs(summary_path) style_paths = FLAGS.STYLE_IMAGES.split(',') style_layers = FLAGS.STYLE_LAYERS.split(',') content_layers = FLAGS.CONTENT_LAYERS.split(',') style_features_t = get_style_features(style_paths, style_layers) images = reader.image(FLAGS.BATCH_SIZE, FLAGS.IMAGE_SIZE, FLAGS.TRAIN_IMAGES_PATH) generated = model.net(images - reader.mean_pixel, training=True) # Put both generated and training images in same batch through VGG net for efficiency net, _ = vgg.net(FLAGS.VGG_PATH, tf.concat(0, [generated, images]) - reader.mean_pixel) content_loss = 0 for layer in content_layers: generated_images, content_images = tf.split(0, 2, net[layer]) size = tf.size(generated_images) shape = tf.shape(generated_images) width = shape[1] height = shape[2] num_filters = shape[3] content_loss += tf.nn.l2_loss(generated_images - content_images) / tf.to_float(size) content_loss = content_loss style_loss = 0 for style_grams, layer in zip(style_features_t, style_layers): generated_images, _ = tf.split(0, 2, net[layer]) size = tf.size(generated_images) for style_gram in style_grams: style_loss += tf.nn.l2_loss(gram(generated_images) - style_gram) / tf.to_float(size) style_loss = style_loss / len(style_paths) tv_loss = total_variation_loss(generated) loss = FLAGS.STYLE_WEIGHT * style_loss + FLAGS.CONTENT_WEIGHT * content_loss + FLAGS.TV_WEIGHT * tv_loss global_step = tf.Variable(0, name="global_step", trainable=False) train_op = tf.train.AdamOptimizer(1e-3).minimize(loss, global_step=global_step) # Statistics with tf.name_scope('losses'): tf.scalar_summary('content loss', content_loss) tf.scalar_summary('style loss', style_loss) tf.scalar_summary('regularizer loss', tv_loss) with tf.name_scope('weighted_losses'): tf.scalar_summary('weighted content loss', content_loss * FLAGS.CONTENT_WEIGHT) tf.scalar_summary('weighted style loss', style_loss * FLAGS.STYLE_WEIGHT) tf.scalar_summary('weighted regularizer loss', tv_loss * FLAGS.TV_WEIGHT) tf.scalar_summary('total loss', loss) tf.image_summary('original', images) tf.image_summary('generated', generated) summary = tf.merge_all_summaries() with tf.Session() as sess: writer = tf.train.SummaryWriter(summary_path, sess.graph) saver = tf.train.Saver(tf.all_variables()) file = tf.train.latest_checkpoint(model_path) sess.run([tf.initialize_all_variables(), tf.initialize_local_variables()]) if file: print('Restoring model from {}'.format(file)) saver.restore(sess, file) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) start_time = time.time() try: while not coord.should_stop(): _, loss_t, step = sess.run([train_op, loss, global_step]) elapsed_time = time.time() - start_time start_time = time.time() if step % 100 == 0: print(step, loss_t, elapsed_time) summary_str = sess.run(summary) writer.add_summary(summary_str, step) if step % 10000 == 0: saver.save(sess, model_path + '/fast-style-model', global_step=step) except tf.errors.OutOfRangeError: saver.save(sess, model_path + '/fast-style-model-done') print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads)
def main(argv=None): if FLAGS.CONTENT_IMAGES_PATH: content_images = reader.image( FLAGS.BATCH_SIZE, FLAGS.IMAGE_SIZE, FLAGS.CONTENT_IMAGES_PATH, epochs=1, shuffle=False, crop=False) generated_images = model.net(content_images / 255.) output_format = tf.saturate_cast(generated_images + reader.mean_pixel, tf.uint8) with tf.Session() as sess: file = tf.train.latest_checkpoint(FLAGS.MODEL_PATH) if not file: print('Could not find trained model in {}'.format(FLAGS.MODEL_PATH)) return print('Using model from {}'.format(file)) saver = tf.train.Saver() saver.restore(sess, file) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) i = 0 start_time = time.time() try: while not coord.should_stop(): print(i) images_t = sess.run(output_format) elapsed = time.time() - start_time start_time = time.time() print('Time for one batch: {}'.format(elapsed)) for raw_image in images_t: i += 1 misc.imsave('out{0:04d}.png'.format(i), raw_image) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads) return if not os.path.exists(FLAGS.MODEL_PATH): os.makedirs(FLAGS.MODEL_PATH) style_paths = FLAGS.STYLE_IMAGES.split(',') style_layers = FLAGS.STYLE_LAYERS.split(',') content_layers = FLAGS.CONTENT_LAYERS.split(',') style_features_t = get_style_features(style_paths, style_layers) images = reader.image(FLAGS.BATCH_SIZE, FLAGS.IMAGE_SIZE, FLAGS.TRAIN_IMAGES_PATH) generated = model.net(images / 255.) net, _ = vgg.net(FLAGS.VGG_PATH, tf.concat(0, [generated, images])) content_loss = 0 for layer in content_layers: generated_images, content_images = tf.split(0, 2, net[layer]) size = tf.size(generated_images) content_loss += tf.nn.l2_loss(generated_images - content_images) / tf.to_float(size) content_loss = content_loss / len(content_layers) style_loss = 0 for style_gram, layer in zip(style_features_t, style_layers): generated_images, _ = tf.split(0, 2, net[layer]) size = tf.size(generated_images) for style_image in style_gram: style_loss += tf.nn.l2_loss(tf.reduce_sum(gram(generated_images) - style_image, 0)) / tf.to_float(size) style_loss = style_loss / len(style_layers) loss = FLAGS.STYLE_WEIGHT * style_loss + FLAGS.CONTENT_WEIGHT * content_loss + FLAGS.TV_WEIGHT * total_variation_loss(generated) global_step = tf.Variable(0, name="global_step", trainable=False) train_op = tf.train.AdamOptimizer(1e-3).minimize(loss, global_step=global_step) output_format = tf.saturate_cast(tf.concat(0, [generated, images]) + reader.mean_pixel, tf.uint8) with tf.Session() as sess: saver = tf.train.Saver(tf.all_variables()) file = tf.train.latest_checkpoint(FLAGS.MODEL_PATH) if file: print('Restoring model from {}'.format(file)) saver.restore(sess, file) else: print('New model initilized') sess.run(tf.initialize_all_variables()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) start_time = time.time() try: while not coord.should_stop(): _, loss_t, step = sess.run([train_op, loss, global_step]) elapsed_time = time.time() - start_time start_time = time.time() if step % 100 == 0: print(step, loss_t, elapsed_time) output_t = sess.run(output_format) for i, raw_image in enumerate(output_t): misc.imsave('out{}.png'.format(i), raw_image) if step % 10000 == 0: saver.save(sess, FLAGS.MODEL_PATH + '/fast-style-model', global_step=step) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads)
def optimize(content_targets, style_target, content_weight, style_weight, tv_weight, vgg_path, epochs=2, print_iterations=1000, batch_size=4, save_path='saver/fns.ckpt', slow=False, learning_rate=1e-3, debug=False): if slow: batch_size = 1 mod = len(content_targets) % batch_size if mod > 0: print("Train set has been trimmed slightly..") content_targets = content_targets[:-mod] style_features = {} batch_shape = (batch_size,256,256,3) style_shape = (1,) + style_target.shape print(style_shape) # precompute style features with tf.Graph().as_default(), tf.device('/cpu:0'), tf.Session() as sess: style_image = tf.placeholder(tf.float32, shape=style_shape, name='style_image') style_image_pre = vgg.preprocess(style_image) net = vgg.net(vgg_path, style_image_pre) style_pre = np.array([style_target]) for layer in STYLE_LAYERS: features = net[layer].eval(feed_dict={style_image:style_pre}) features = np.reshape(features, (-1, features.shape[3])) gram = np.matmul(features.T, features) / features.size style_features[layer] = gram with tf.Graph().as_default(), tf.Session() as sess: X_content = tf.placeholder(tf.float32, shape=batch_shape, name="X_content") X_pre = vgg.preprocess(X_content) # precompute content features content_features = {} content_net = vgg.net(vgg_path, X_pre) content_features[CONTENT_LAYER] = content_net[CONTENT_LAYER] if slow: preds = tf.Variable( tf.random_normal(X_content.get_shape()) * 0.256 ) preds_pre = preds else: preds = transform.net(X_content/255.0) preds_pre = vgg.preprocess(preds) net = vgg.net(vgg_path, preds_pre) content_size = _tensor_size(content_features[CONTENT_LAYER])*batch_size assert _tensor_size(content_features[CONTENT_LAYER]) == _tensor_size(net[CONTENT_LAYER]) content_loss = content_weight * (2 * tf.nn.l2_loss( net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) / content_size ) style_losses = [] for style_layer in STYLE_LAYERS: layer = net[style_layer] bs, height, width, filters = map(lambda i:i.value,layer.get_shape()) size = height * width * filters feats = tf.reshape(layer, (bs, height * width, filters)) feats_T = tf.transpose(feats, perm=[0,2,1]) grams = tf.matmul(feats_T, feats) / size style_gram = style_features[style_layer] style_losses.append(2 * tf.nn.l2_loss(grams - style_gram)/style_gram.size) style_loss = style_weight * functools.reduce(tf.add, style_losses) / batch_size # total variation denoising tv_y_size = _tensor_size(preds[:,1:,:,:]) tv_x_size = _tensor_size(preds[:,:,1:,:]) y_tv = tf.nn.l2_loss(preds[:,1:,:,:] - preds[:,:batch_shape[1]-1,:,:]) x_tv = tf.nn.l2_loss(preds[:,:,1:,:] - preds[:,:,:batch_shape[2]-1,:]) tv_loss = tv_weight*2*(x_tv/tv_x_size + y_tv/tv_y_size)/batch_size loss = content_loss + style_loss + tv_loss # overall loss train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss) sess.run(tf.global_variables_initializer()) import random uid = random.randint(1, 100) print("UID: %s" % uid) for epoch in range(epochs): num_examples = len(content_targets) iterations = 0 while iterations * batch_size < num_examples: start_time = time.time() curr = iterations * batch_size step = curr + batch_size X_batch = np.zeros(batch_shape, dtype=np.float32) for j, img_p in enumerate(content_targets[curr:step]): X_batch[j] = get_img(img_p, (256,256,3)).astype(np.float32) iterations += 1 assert X_batch.shape[0] == batch_size feed_dict = { X_content:X_batch } train_step.run(feed_dict=feed_dict) end_time = time.time() delta_time = end_time - start_time if debug: print("UID: %s, batch time: %s" % (uid, delta_time)) is_print_iter = int(iterations) % print_iterations == 0 if slow: is_print_iter = epoch % print_iterations == 0 is_last = epoch == epochs - 1 and iterations * batch_size >= num_examples should_print = is_print_iter or is_last if should_print: to_get = [style_loss, content_loss, tv_loss, loss, preds] test_feed_dict = { X_content:X_batch } tup = sess.run(to_get, feed_dict = test_feed_dict) _style_loss,_content_loss,_tv_loss,_loss,_preds = tup losses = (_style_loss, _content_loss, _tv_loss, _loss) if slow: _preds = vgg.unprocess(_preds) else: saver = tf.train.Saver() res = saver.save(sess, save_path) yield(_preds, losses, iterations, epoch)
def stylize( network, initial, content, style, iterations, content_weight, style_weight, tv_weight, learning_rate, print_iter=None, ): shape = (1,) + content.shape style_shape = (1,) + style.shape content_features = {} style_features = {} g = tf.Graph() with g.as_default(), g.device("/cpu:0"), tf.Session() as sess: image = tf.placeholder("float", shape=shape) net, mean_pixel = vgg.net(network, image) content_pre = np.array([vgg.preprocess(content, mean_pixel)]) content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(feed_dict={image: content_pre}) g = tf.Graph() with g.as_default(), g.device("/cpu:0"), tf.Session() as sess: image = tf.placeholder("float", shape=style_shape) net, _ = vgg.net(network, image) style_pre = np.array([vgg.preprocess(style, mean_pixel)]) for layer in STYLE_LAYERS: features = net[layer].eval(feed_dict={image: style_pre}) features = np.reshape(features, (-1, features.shape[3])) gram = np.matmul(features.T, features) / (features.size) style_features[layer] = gram with tf.Graph().as_default(): if initial is None: noise = np.random.normal(size=shape, scale=np.std(content) * 0.1) initial = tf.random_normal(shape) * 256 / 1000 else: initial = np.array([vgg.preprocess(initial, mean_pixel)]) initial = initial.astype("float32") image = tf.Variable(initial) net, _ = vgg.net(network, image) content_loss = tf.nn.l2_loss(net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) style_losses = [] for i in STYLE_LAYERS: layer = net[i] _, height, width, number = map(lambda i: i.value, layer.get_shape()) size = height * width * number feats = tf.reshape(layer, (-1, number)) gram = tf.matmul(tf.transpose(feats), feats) / (size) style_gram = style_features[i] style_losses.append(tf.nn.l2_loss(gram - style_gram)) style_loss = reduce(tf.add, style_losses) / len(style_losses) tv_loss = tf.nn.l2_loss(image[:, 1:, :, :] - image[:, : shape[1] - 1, :, :]) + tf.nn.l2_loss( image[:, :, 1:, :] - image[:, :, : shape[2] - 1, :] ) loss = content_weight * content_loss + style_weight * style_loss + tv_weight * tv_loss train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss) with tf.Session() as sess: sess.run(tf.initialize_all_variables()) for i in range(iterations): if print_iter is not None and i % print_iter == 0: print " content loss: %g" % (content_loss.eval()) print " style loss: %g" % (style_loss.eval()) print " tv loss: %g" % (tv_loss.eval()) print " total loss: %g" % loss.eval() print "Iteration %d/%d" % (i + 1, iterations) train_step.run() return vgg.unprocess(image.eval().reshape(shape[1:]), mean_pixel)
# Network parameters VGG_19_PATH = 'models/imagenet-vgg-verydeep-19.mat' LEARNING_RATE = 10 POOLING_FUNCTION = 'MAX' # Load images content_image = utils.read_image(CONTENT_PATH) style_image = utils.read_image(STYLE_PATH) g = tf.Graph() with g.device(DEVICE), g.as_default(), tf.Session() as sess: # 1. Compute content representation print("1. Computing content representation...") content_shape = (1,) + content_image.shape # add batch size dimension x = tf.placeholder(tf.float32, content_shape) net, activations, img_mean = vgg.net(VGG_19_PATH, x, pooling_function=POOLING_FUNCTION) # Pre-process image content_image_pp = utils.preprocess_image(content_image, img_mean) content_representation = activations[CONTENT_LAYER].eval(feed_dict={x: np.array([content_image_pp])}) # 2. Compute style Gram matrices print("2. Computing style Gram matrices...") style_shape = (1,) + style_image.shape # add batch size dimension x = tf.placeholder(tf.float32, style_shape) net, activations, _ = vgg.net(VGG_19_PATH, x, pooling_function=POOLING_FUNCTION) # Pre-process image style_image_pp = utils.preprocess_image(style_image, img_mean)
def main(): content_path, style_path, width, style_scale = sys.argv[1:] width = int(width) style_scale = float(style_scale) content_image = imread(content_path) style_image = imread(style_path) if width > 0: new_shape = (int(math.floor(float(content_image.shape[0]) / content_image.shape[1] * width)), width) content_image = sm.imresize(content_image, new_shape) if style_scale > 0: style_image = sm.imresize(style_image, style_scale) shape = (1,) + content_image.shape style_shape = (1,) + style_image.shape content_features = {} style_features = {} g = tf.Graph() with g.as_default(), g.device('/cpu:0'), tf.Session() as sess: image = tf.placeholder('float', shape=shape) net, mean_pixel = vgg.net(VGG_PATH, image) content_pre = np.array([vgg.preprocess(content_image, mean_pixel)]) content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval( feed_dict={image: content_pre}) g = tf.Graph() with g.as_default(), g.device('/cpu:0'), tf.Session() as sess: image = tf.placeholder('float', shape=style_shape) net, _ = vgg.net(VGG_PATH, image) style_pre = np.array([vgg.preprocess(style_image, mean_pixel)]) for layer in STYLE_LAYERS: features = net[layer].eval(feed_dict={image: style_pre}) features = np.reshape(features, (-1, features.shape[3])) gram = np.matmul(features.T, features) / (features.size) style_features[layer] = gram with tf.Graph().as_default(): noise = np.random.normal(size=shape, scale=np.std(content_image) * 0.1) init = tf.random_normal(shape) * 256 / 1000 image = tf.Variable(init) net, _ = vgg.net(VGG_PATH, image) content_loss = tf.nn.l2_loss( net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) style_losses = [] for i in STYLE_LAYERS: layer = net[i] _, height, width, number = map(lambda i: i.value, layer.get_shape()) size = height * width * number feats = tf.reshape(layer, (-1, number)) gram = tf.matmul(tf.transpose(feats), feats) / (size) style_gram = style_features[i] style_losses.append(tf.nn.l2_loss(gram - style_gram)) style_loss = reduce(tf.add, style_losses) / len(style_losses) tv_loss = (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) + tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:])) loss = ALPHA * content_loss + BETA * style_loss + TV_WEIGHT * tv_loss train_step = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss) with tf.Session() as sess: sess.run(tf.initialize_all_variables()) for i in range(100000): print 'i = %d' % i if i % 10 == 0: print '\tcontent_loss = %15.0f' % content_loss.eval() print '\tstyle_loss = %15.0f' % style_loss.eval() print '\ttv_loss = %15.0f' % tv_loss.eval() print '\tloss = %15.0f' % loss.eval() imsave('%05d.jpg' % i, vgg.unprocess( image.eval().reshape(shape[1:]), mean_pixel)) train_step.run()
def stylize(network, initial, content, styles, iterations, content_weight, style_weight, style_blend_weights, tv_weight, learning_rate, print_iterations=None, checkpoint_iterations=None): shape = (1,) + content.shape style_shapes = [(1,) + style.shape for style in styles] content_features = {} style_features = [{} for _ in styles] # compute content features in feedforward mode g = tf.Graph() with g.as_default(), g.device('/cpu:0'), tf.Session() as sess: image = tf.placeholder('float', shape=shape) net, mean_pixel = vgg.net(network, image) content_pre = np.array([vgg.preprocess(content, mean_pixel)]) content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval( feed_dict={image: content_pre}) # compute style features in feedforward mode for i in range(len(styles)): g = tf.Graph() with g.as_default(), g.device('/cpu:0'), tf.Session() as sess: image = tf.placeholder('float', shape=style_shapes[i]) net, _ = vgg.net(network, image) style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)]) for layer in STYLE_LAYERS: features = net[layer].eval(feed_dict={image: style_pre}) print 'Initial feature shape: ', features.shape features = np.reshape(features, (-1, features.shape[3])) #mask = np.zeros_like(features) #mask[:49664/2, :] = 1 #print 'Mask shape', mask.shape print 'Final features shape', features.shape #features = features*mask gram = np.matmul(features.T, features) / features.size print 'Gram matrix shape: ', gram.shape style_features[i][layer] = gram #sys.exit() # make stylized image using backpropogation with tf.Graph().as_default(): if initial is None: noise = np.random.normal(size=shape, scale=np.std(content) * 0.1) initial = tf.random_normal(shape) * 0.256 else: initial = np.array([vgg.preprocess(initial, mean_pixel)]) initial = initial.astype('float32') image = tf.Variable(initial) net, _ = vgg.net(network, image) # content loss content_loss = content_weight * (2 * tf.nn.l2_loss( net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) / content_features[CONTENT_LAYER].size) # style loss style_loss = 0 for i in range(len(styles)): style_losses = [] for style_layer in STYLE_LAYERS: layer = net[style_layer] _, height, width, number = map(lambda i: i.value, layer.get_shape()) print 'Height, width, number', height, width, number size = height * width * number feats = tf.reshape(layer, (-1, number)) #print tf.shape(feats).as_list() if normal_flag == 0: mask = np.zeros((height*width, number), dtype=np.float32) maskt = np.reshape(imread('bottle_mask.jpg').astype(np.float32), (height*width,)) maskt = maskt > 100 for d in xrange(number): mask[:,d] = maskt print 'Mask shape', mask.shape #print sum(sum(mask == 1)) + sum(sum(mask == 0)) #mask[:height*width/2, :] = 1 if i == 0: mask = tf.constant(mask) feats = tf.mul(feats,mask) gram = tf.matmul(tf.transpose(feats), feats) / size style_gram = style_features[i][style_layer] style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size) else: mask2 = mask < 1 feats2 = tf.mul(feats,mask2) gram2 = tf.matmul(tf.transpose(feats2), feats2) / size style_gram = style_features[i][style_layer] style_losses.append(2 * tf.nn.l2_loss(gram2 - style_gram) / style_gram.size) else: feats2 = feats gram2 = tf.matmul(tf.transpose(feats2), feats2) / size style_gram = style_features[i][style_layer] style_losses.append(2 * tf.nn.l2_loss(gram2 - style_gram) / style_gram.size) pass style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses) # total variation denoising tv_y_size = _tensor_size(image[:,1:,:,:]) tv_x_size = _tensor_size(image[:,:,1:,:]) tv_loss = tv_weight * 2 * ( (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) / tv_y_size) + (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) / tv_x_size)) # overall loss loss = content_loss + style_loss + tv_loss if normal_flag != 0: print "general mask :" mask = np.zeros((height*width, number), dtype=np.float32) maskt = np.reshape(imread('bottle_mask.jpg').astype(np.float32), (height*width,)) maskt = maskt > 100 # for d in xrange(3): # mask[:,d] = maskt print 'Mask shape', maskt.shape maskt = maskt.reshape((height,width)) maskt = np.array([maskt,maskt,maskt]) maskt = maskt.transpose((1,2,0)) mask = tf.constant(maskt, dtype=tf.float32) # feats = tf.mul(feats,mask) def capper(a,b,mask): # (1, 468, 304, 3) print "orig shape", a reshaped_in_grad = tf.reshape(a,[-1] ) print "reshaped grad", reshaped_in_grad print "mask" ,mask g = tf.mul(a,mask) # g = tf.reshape(g, (1,height,width,3)) # print a,b # print g return g,b # optimizer setup # train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss) # # Create an optimizer. train_step = tf.train.GradientDescentOptimizer(learning_rate) # # Compute the gradients for a list of variables. grads_and_vars = train_step.compute_gradients(loss) # # grads_and_vars is a list of tuples (gradient, variable). Do whatever you # # need to the 'gradient' part, for example cap them, etc. capped_grads_and_vars = [(capper(gv[0], gv[1], mask)) for gv in grads_and_vars] # # Ask the optimizer to apply the capped gradients. train_step = train_step.apply_gradients(capped_grads_and_vars) # opt_op = opt.minimize(cost, var_list=<list of variables>) def print_progress(i, last=False): if print_iterations is not None: if i is not None and i % print_iterations == 0 or last: print >> stderr, ' content loss: %g' % content_loss.eval() print >> stderr, ' style loss: %g' % style_loss.eval() print >> stderr, ' tv loss: %g' % tv_loss.eval() print >> stderr, ' total loss: %g' % loss.eval() # optimization best_loss = float('inf') best = None with tf.Session() as sess: sess.run(tf.initialize_all_variables()) for i in range(iterations): print_progress(i) print >> stderr, 'Iteration %d/%d' % (i + 1, iterations) train_step.run() # print "runningstep: ",i, running_step if (checkpoint_iterations is not None and i % checkpoint_iterations == 0) or i == iterations - 1: this_loss = loss.eval() if this_loss < best_loss: best_loss = this_loss best = image.eval() print_progress(None, i == iterations - 1) if i % 10 == 0 and best is not None: tmp_img = vgg.unprocess(best.reshape(shape[1:]), mean_pixel) imsave("iter" + str(i) + ".jpg", tmp_img) return vgg.unprocess(best.reshape(shape[1:]), mean_pixel)