def compute_layer_style_cost(a_S, a_G): m, n_H, n_W, n_C = a_S.get_shape().as_list() a_S_unrolled = tf.reshape(tf.transpose(a_S), [n_C, n_H * n_W]) a_G_unrolled = tf.reshape(tf.transpose(a_G), [n_C, n_H * n_W]) GS = gram_matrix(a_S_unrolled) GG = gram_matrix(a_G_unrolled) J_style_layer = tf.reduce_sum(tf.square(tf.subtract(GS, GG))) return J_style_layer
def update_loss(target, vgg, content_features, style_weights, style_grams, content_weight, style_weight): # for displaying the target image intermittently show_every = 400 # iteration hyperparamaters optimizer = optim.Adam([target], lr=0.004) steps = 2000 # variable; can be updated as needed for ii in range(1, steps+1): target_features = get_features(target, vgg) # calculate the content loss content_loss = torch.mean((target_features['conv4_2'] - content_features['conv4_2'])**2) # calculate the style loss iterating through a number of layers style_loss = 0 for layer in style_weights: # get the "target" style representation for the layer target_feature = target_features[layer] target_gram = gram_matrix(target_feature) batch_size, d, h, w = target_feature.shape # get the "style" style representation for the layer style_gram = style_grams[layer] # the style loss for one layer weighted layer_style_loss = style_weights[layer] * torch.mean((target_gram - style_gram)**2) # add to the style loss style_loss += layer_style_loss / (d * h * w) # calculate the total loss total_loss = content_weight * content_loss + style_weight * style_loss # update the target image optimizer.zero_grad() total_loss.backward() optimizer.step() # display intermediate images and print the loss if ii % show_every == 0: print('Total loss: ', total_loss.item()) plt.imshow(im_convert(target)) plt.show()
def f(): optimizer.zero_grad() features = vgg16(input_img) content_loss = F.mse_loss(features[2], content_features[2]) * content_weight style_loss = 0 grams = [gram_matrix(x) for x in features] for a, b in zip(grams, style_grams): style_loss += F.mse_loss(a, b) * style_weight loss = style_loss + content_loss if run[0] % 50 == 0: print('Step {}: Style Loss: {:4f} Content Loss: {:4f}'.format( run[0], style_loss.item(), content_loss.item())) run[0] += 1 loss.backward() return loss
tf.set_random_seed(1) a_C = tf.random_normal([1, 4, 4, 3], mean=1, stddev=4) a_G = tf.random_normal([1, 4, 4, 3], mean=1, stddev=4) J_content = compute_content_cost(a_C, a_G) print(J_content.eval()) # should be 6.76559 style_image = scipy.misc.imread('images/style/style_2.jpg') # plt.imshow(style_image) # plt.show() tf.reset_default_graph() with tf.Session() as test: tf.set_random_seed(1) A = tf.random_normal([3, 2 * 1], mean=1, stddev=4) GA = gram_matrix(A) print(GA.eval()) # [0][0] = 6.4223... tf.reset_default_graph() with tf.Session() as test: tf.set_random_seed(1) a_S = tf.random_normal([1, 4, 4, 3], mean=1, stddev=4) a_G = tf.random_normal([1, 4, 4, 3], mean=1, stddev=4) J_style_layer = compute_layer_style_cost(a_S, a_G) print(J_style_layer.eval()) # 9.19028 STYLE_LAYERS = [(1, 0.2), (2, 0.2), (3, 0.2), (4, 0.2), (5, 0.2)] tf.reset_default_graph()
# display the images from im_convert import im_convert fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10)) #content and style images shown side by side ax1.imshow(im_convert(content)) ax2.imshow(im_convert(style)) # getting the content and style features before forming the target image from get_features import get_features content_features = get_features(content, vgg) style_features = get_features(style, vgg) # calculating the gram matrices for each layer of the style representation from gram_matrix import gram_matrix style_grams = { layer: gram_matrix(style_features[layer]) for layer in style_features } # creating the target image and prepping it for change # to start of, we use a copy of our content image as the initial target and then iteratively change its style target = content.clone().requires_grad_(True).to(device) # setting the weights for each style layer and setting content and style weights style_weights = { 'conv1_1': 1., 'conv2_1': 0.75, 'conv3_1': 0.2, 'conv4_1': 0.2, 'conv5_1': 0.2 }
def run_style_transfer(content_path, style_path, num_iterations=1000, content_weight=1e3, style_weight=1e-2): # We don't need to (or want to) train any layers of our model, so we set their # trainable to false. model = get_model() for layer in model.layers: layer.trainable = False # Get the style and content feature representations (from our specified intermediate layers) style_features, content_features = get_feature_representations( model, content_path, style_path) gram_style_features = [ gram_matrix(style_feature) for style_feature in style_features ] # Set initial image init_image = load_and_process_img(content_path) init_image = tfe.Variable(init_image, dtype=tf.float32) # Create our optimizer opt = tf.train.AdamOptimizer(learning_rate=5, beta1=0.99, epsilon=1e-1) # For displaying intermediate images iter_count = 1 # Store our best result best_loss, best_img = float('inf'), None # Create a nice config loss_weights = (style_weight, content_weight) cfg = { 'model': model, 'loss_weights': loss_weights, 'init_image': init_image, 'gram_style_features': gram_style_features, 'content_features': content_features } # For displaying num_rows = 2 num_cols = 5 display_interval = num_iterations / (num_rows * num_cols) start_time = time.time() global_start = time.time() norm_means = np.array([103.939, 116.779, 123.68]) min_vals = -norm_means max_vals = 255 - norm_means imgs = [] for i in range(num_iterations): grads, all_loss = compute_grads(cfg) loss, style_score, content_score = all_loss opt.apply_gradients([(grads, init_image)]) clipped = tf.clip_by_value(init_image, min_vals, max_vals) init_image.assign(clipped) end_time = time.time() if loss < best_loss: # Update best loss and best image from total loss. best_loss = loss best_img = deprocess_img(init_image.numpy()) if i % display_interval == 0: start_time = time.time() # Use the .numpy() method to get the concrete numpy array plot_img = init_image.numpy() plot_img = deprocess_img(plot_img) imgs.append(plot_img) IPython.display.clear_output(wait=True) IPython.display.display_png(Image.fromarray(plot_img)) print('Iteration: {}'.format(i)) print('Total loss: {:.4e}, ' 'style loss: {:.4e}, ' 'content loss: {:.4e}, ' 'time: {:.4f}s'.format(loss, style_score, content_score, time.time() - start_time)) print('Total time: {:.4f}s'.format(time.time() - global_start)) IPython.display.clear_output(wait=True) plt.figure(figsize=(14, 4)) for i, img in enumerate(imgs): plt.subplot(num_rows, num_cols, i + 1) plt.imshow(img) plt.xticks([]) plt.yticks([]) return best_img, best_loss
transforms.CenterCrop(width), transforms.ToTensor(), tensor_normalizer, ]) dataset = torchvision.datasets.ImageFolder('/home/ypw/COCO/', transform=data_transform) data_loader = torch.utils.data.DataLoader(dataset, batch_size=b_size, shuffle=True) #load imgs style_img = Image.open('style_img.jpg') #compute style features style_features = vgg16(style_img) style_gram = [gram_matrix(x) for x in style_features] style_grams = [x.detach() for x in style_gram] #train optimizer = optim.Adam(transformnet.parameters(), lr=1e-3) transformnet.train() n_batch = len(data_loader) for epoch in range(1): print('Epoch{}'.format(epoch + 1)) smooth_content_loss = Smooth() smooth_style_loss = Smooth() smooth_tv_loss = Smooth() smooth_loss = Smooth()
content_img = read_image('content_img.jpg', width).to(device) style_img = read_image('style_img.jpg', width).to(device) input_img = content_img.clone() # plt.figure(figsize=(12, 6)) # # plt.subplot(1, 2, 1) # imshow(style_img, title='Style Image') # # plt.subplot(1, 2, 2) # imshow(content_img, title='Content Image') style_features = vgg16(style_img) content_features = vgg16(content_img) style_grams = [gram_matrix(x) for x in style_features] # train optimizer = optim.LBFGS([input_img.requires_grad_()]) style_weight = 1e6 content_weight = 1 run = [0] while run[0] < 300: print(run[0], 'th training') def f(): optimizer.zero_grad() features = vgg16(input_img)