def optimize(args): """ Gatys et al. CVPR 2017 ref: Image Style Transfer Using Convolutional Neural Networks """ if args.cuda: ctx = mx.gpu(0) else: ctx = mx.cpu(0) # load the content and style target content_image = utils.tensor_load_rgbimage(args.content_image, ctx, size=args.content_size, keep_asp=True) content_image = utils.subtract_imagenet_mean_preprocess_batch( content_image) style_image = utils.tensor_load_rgbimage(args.style_image, ctx, size=args.style_size) style_image = utils.subtract_imagenet_mean_preprocess_batch(style_image) # load the pre-trained vgg-16 and extract features vgg = net.Vgg16() utils.init_vgg_params(vgg, 'models', ctx=ctx) # content feature f_xc_c = vgg(content_image)[1] # style feature features_style = vgg(style_image) gram_style = [net.gram_matrix(y) for y in features_style] # output output = Parameter('output', shape=content_image.shape) output.initialize(ctx=ctx) output.set_data(content_image) # optimizer trainer = gluon.Trainer([output], 'adam', {'learning_rate': args.lr}) mse_loss = gluon.loss.L2Loss() # optimizing the images for e in range(args.iters): utils.imagenet_clamp_batch(output.data(), 0, 255) # fix BN for pre-trained vgg with autograd.record(): features_y = vgg(output.data()) content_loss = 2 * args.content_weight * mse_loss( features_y[1], f_xc_c) style_loss = 0. for m in range(len(features_y)): gram_y = net.gram_matrix(features_y[m]) gram_s = gram_style[m] style_loss = style_loss + 2 * args.style_weight * mse_loss( gram_y, gram_s) total_loss = content_loss + style_loss total_loss.backward() trainer.step(1) if (e + 1) % args.log_interval == 0: print('loss:{:.2f}'.format(total_loss.asnumpy()[0])) # save the image output = utils.add_imagenet_mean_batch(output.data()) utils.tensor_save_bgrimage(output[0], args.output_image, args.cuda)
def optimize(args): """ Gatys et al. CVPR 2017 ref: Image Style Transfer Using Convolutional Neural Networks """ # load the content and style target content_image = utils.tensor_load_rgbimage(args.content_image, size=args.content_size, keep_asp=True) content_image = content_image.unsqueeze(0) content_image = Variable(utils.preprocess_batch(content_image), requires_grad=False) content_image = utils.subtract_imagenet_mean_batch(content_image) style_image = utils.tensor_load_rgbimage(args.style_image, size=args.style_size) style_image = style_image.unsqueeze(0) style_image = Variable(utils.preprocess_batch(style_image), requires_grad=False) style_image = utils.subtract_imagenet_mean_batch(style_image) # load the pre-trained vgg-16 and extract features vgg = Vgg16() utils.init_vgg16(args.vgg_model_dir) vgg.load_state_dict( torch.load(os.path.join(args.vgg_model_dir, "vgg16.weight"))) if args.cuda: content_image = content_image.cuda() style_image = style_image.cuda() vgg.cuda() features_content = vgg(content_image) f_xc_c = Variable(features_content[1].data, requires_grad=False) features_style = vgg(style_image) gram_style = [utils.gram_matrix(y) for y in features_style] # init optimizer output = Variable(content_image.data, requires_grad=True) optimizer = Adam([output], lr=args.lr) mse_loss = torch.nn.MSELoss() # optimizing the images for e in range(args.iters): utils.imagenet_clamp_batch(output, 0, 255) optimizer.zero_grad() features_y = vgg(output) content_loss = args.content_weight * mse_loss(features_y[1], f_xc_c) style_loss = 0. for m in range(len(features_y)): gram_y = utils.gram_matrix(features_y[m]) gram_s = Variable(gram_style[m].data, requires_grad=False) style_loss += args.style_weight * mse_loss(gram_y, gram_s) total_loss = content_loss + style_loss if (e + 1) % args.log_interval == 0: print(total_loss.data.cpu().numpy()[0]) total_loss.backward() optimizer.step() # save the image output = utils.add_imagenet_mean_batch(output) utils.tensor_save_bgrimage(output.data[0], args.output_image, args.cuda)
def optimize(args): """ Gatys et al. CVPR 2017 ref: Image Style Transfer Using Convolutional Neural Networks """ if args.cuda: ctx = mx.gpu(0) else: ctx = mx.cpu(0) # load the content and style target content_image = utils.tensor_load_rgbimage(args.content_image,ctx, size=args.content_size, keep_asp=True) content_image = utils.subtract_imagenet_mean_preprocess_batch(content_image) style_image = utils.tensor_load_rgbimage(args.style_image, ctx, size=args.style_size) style_image = utils.subtract_imagenet_mean_preprocess_batch(style_image) # load the pre-trained vgg-16 and extract features vgg = net.Vgg16() utils.init_vgg_params(vgg, 'models', ctx=ctx) # content feature f_xc_c = vgg(content_image)[1] # style feature features_style = vgg(style_image) gram_style = [net.gram_matrix(y) for y in features_style] # output output = Parameter('output', shape=content_image.shape) output.initialize(ctx=ctx) output.set_data(content_image) # optimizer trainer = gluon.Trainer([output], 'adam', {'learning_rate': args.lr}) mse_loss = gluon.loss.L2Loss() # optimizing the images for e in range(args.iters): utils.imagenet_clamp_batch(output.data(), 0, 255) # fix BN for pre-trained vgg with autograd.record(): features_y = vgg(output.data()) content_loss = 2 * args.content_weight * mse_loss(features_y[1], f_xc_c) style_loss = 0. for m in range(len(features_y)): gram_y = net.gram_matrix(features_y[m]) gram_s = gram_style[m] style_loss = style_loss + 2 * args.style_weight * mse_loss(gram_y, gram_s) total_loss = content_loss + style_loss total_loss.backward() trainer.step(1) if (e + 1) % args.log_interval == 0: print('loss:{:.2f}'.format(total_loss.asnumpy()[0])) # save the image output = utils.add_imagenet_mean_batch(output.data()) utils.tensor_save_bgrimage(output[0], args.output_image, args.cuda)
def optimize(args): content_image = utils.tensor_load_grayimage(args.content_image, size=args.content_size) content_image = content_image.unsqueeze(0) content_image = Variable(content_image, requires_grad=False) content_image = utils.subtract_imagenet_mean_batch_gray(content_image) style_image = utils.tensor_load_rgbimage(args.style_image, size=args.style_size) style_image = style_image.unsqueeze(0) style_image = Variable(utils.preprocess_batch(style_image), requires_grad=False) style_image = utils.subtract_imagenet_mean_batch(style_image) # generate the vector field that we want to stylize # size = args.content_size # vectors = np.zeros((size, size, 2), dtype=np.float32) # vortex_spacing = 0.5 # extra_factor = 2. # # a = np.array([1, 0]) * vortex_spacing # b = np.array([np.cos(np.pi / 3), np.sin(np.pi / 3)]) * vortex_spacing # rnv = int(2 * extra_factor / vortex_spacing) # vortices = [n * a + m * b for n in range(-rnv, rnv) for m in range(-rnv, rnv)] # vortices = [(x, y) for (x, y) in vortices if -extra_factor < x < extra_factor and -extra_factor < y < extra_factor] # # xs = np.linspace(-1, 1, size).astype(np.float32)[None, :] # ys = np.linspace(-1, 1, size).astype(np.float32)[:, None] # # for (x, y) in vortices: # rsq = (xs - x) ** 2 + (ys - y) ** 2 # vectors[..., 0] += (ys - y) / rsq # vectors[..., 1] += -(xs - x) / rsq # # for y in range(size): # for x in range(size): # angles[y, x] = math.atan(vectors[y, x, 1] / vectors[y, x, 0]) * 180 / math.pi # for y in range(size): # for x in range(size): # xx = float(x - size / 2) # yy = float(y - size / 2) # rsq = xx ** 2 + yy ** 2 # if (rsq == 0): # vectors[y, x, 0] = 0 # vectors[y, x, 1] = 0 # else: # vectors[y, x, 0] = -yy / rsq # vectors[y, x, 1] = xx / rsq # f = h5py.File("../datasets/fake/vector_fields/cat_test3.h5", 'r') # a_group_key = list(f.keys())[0] # vectors = f[a_group_key][:] # vectors = utils.tensor_load_vector_field(vectors) # vectors = Variable(vectors, requires_grad=False) # load the pre-trained vgg-16 and extract features vgg = Vgg16() # utils.init_vgg16(args.vgg_model_dir) vgg.load_state_dict(torch.load(os.path.join(args.vgg_model_dir, 'vgg16.weight'))) if args.cuda: style_image = style_image.cuda() vgg.cuda() features_style = vgg(style_image) gram_style = [utils.gram_matrix(y) for y in features_style] # load the transformer net and extract features transformer_phi1 = TransformerNet() transformer_phi1.load_state_dict(torch.load(args.transformer_model_phi1_path)) if args.cuda: # vectors = vectors.cuda() content_image = content_image.cuda() transformer_phi1.cuda() vectors = transformer_phi1(content_image) vectors = Variable(vectors.data, requires_grad=False) # init optimizer content_image_size = content_image.data.size() output_size = np.asarray(content_image_size) output_size[1] = 3 output_size = torch.Size(output_size) output = Variable(torch.randn(output_size, device="cuda"), requires_grad=True) optimizer = Adam([output], lr=args.lr) mse_loss = torch.nn.MSELoss() cosine_loss = torch.nn.CosineEmbeddingLoss() # label = torch.ones(1, 1, args.content_size, args.content_size) label = torch.ones(1, 128, 128, 128) if args.cuda: label = label.cuda() # optimize the images transformer_phi2 = TransformerNet() transformer_phi2.load_state_dict(torch.load(args.transformer_model_phi2_path)) if args.cuda: transformer_phi2.cuda() tbar = trange(args.iters) for e in tbar: utils.imagenet_clamp_batch(output, 0, 255) optimizer.zero_grad() transformer_input = utils.gray_bgr_batch(output) transformer_y = transformer_phi2(transformer_input) content_loss = args.content_weight * cosine_loss(vectors, transformer_y, label) # content_loss = args.content_weight * mse_loss(vectors, transformer_y) vgg_input = output features_y = vgg(vgg_input) style_loss = 0 for m in range(len(features_y)): gram_y = utils.gram_matrix(features_y[m]) gram_s = Variable(gram_style[m].data, requires_grad=False) style_loss += args.style_weight * mse_loss(gram_y, gram_s) total_loss = content_loss + style_loss # total_loss = content_loss total_loss.backward() optimizer.step() tbar.set_description(str(total_loss.data.cpu().numpy().item())) if ((e+1) % args.log_interval == 0): print("iter: %d content_loss: %f style_loss %f" % (e, content_loss.item(), style_loss.item())) # save the image output = utils.add_imagenet_mean_batch_device(output, args.cuda) utils.tensor_save_bgrimage(output.data[0], args.output_image, args.cuda)
def optimize(args): style_image = utils.tensor_load_rgbimage(args.style_image, size=args.style_size) style_image = style_image.unsqueeze(0) style_image = Variable(utils.preprocess_batch(style_image), requires_grad=False) style_image = utils.subtract_imagenet_mean_batch(style_image) # generate the vector field that we want to backward from size = args.content_size vectors = np.zeros((size, size, 2), dtype=np.float32) vortex_spacing = 0.5 extra_factor = 2. a = np.array([1, 0]) * vortex_spacing b = np.array([np.cos(np.pi / 3), np.sin(np.pi / 3)]) * vortex_spacing rnv = int(2 * extra_factor / vortex_spacing) vortices = [ n * a + m * b for n in range(-rnv, rnv) for m in range(-rnv, rnv) ] vortices = [(x, y) for (x, y) in vortices if -extra_factor < x < extra_factor and -extra_factor < y < extra_factor] xs = np.linspace(-1, 1, size).astype(np.float32)[None, :] ys = np.linspace(-1, 1, size).astype(np.float32)[:, None] for (x, y) in vortices: rsq = (xs - x)**2 + (ys - y)**2 vectors[..., 0] += (ys - y) / rsq vectors[..., 1] += -(xs - x) / rsq # for y in range(size): # for x in range(size): # xx = float(x - size / 2) # yy = float(y - size / 2) # rsq = xx ** 2 + yy ** 2 # if rsq == 0: # vectors[y, x, 0] = 1 # vectors[y, x, 1] = 1 # else: # vectors[y, x, 0] = -yy / rsq # vectors[y, x, 1] = xx / rsq # # vectors[y, x, 0] = 1 # # vectors[y, x, 1] = -1 vectors = utils.tensor_load_vector_field(vectors) # load the pre-trained vgg-16 and extract features vgg = Vgg16() utils.init_vgg16(args.vgg_model_dir) vgg.load_state_dict( torch.load(os.path.join(args.vgg_model_dir, 'vgg16.weight'))) if args.cuda: style_image = style_image.cuda() vgg.cuda() features_style = vgg(style_image) gram_style = [utils.gram_matrix(y) for y in features_style] # load the sobel network sobel = Sobel() if args.cuda: vectors = vectors.cuda() sobel.cuda() # init optimizer vectors_size = vectors.data.size() output_size = np.asarray(vectors_size) output_size[1] = 3 output_size = torch.Size(output_size) output = Variable(torch.randn(output_size, device="cuda") * 30, requires_grad=True) optimizer = Adam([output], lr=args.lr) cosine_loss = CosineLoss() mse_loss = torch.nn.MSELoss() #optimize the images tbar = trange(args.iters) for e in tbar: utils.imagenet_clamp_batch(output, 0, 255) optimizer.zero_grad() sobel_input = utils.gray_bgr_batch(output) sobel_y = sobel(sobel_input) content_loss = args.content_weight * cosine_loss(vectors, sobel_y) vgg_input = output features_y = vgg(vgg_input) style_loss = 0 for m in range(len(features_y)): gram_y = utils.gram_matrix(features_y[m]) gram_s = Variable(gram_style[m].data, requires_grad=False) style_loss += args.style_weight * mse_loss(gram_y, gram_s) total_loss = content_loss + style_loss total_loss.backward() optimizer.step() if ((e + 1) % args.log_interval == 0): print("iter: %d content_loss: %f style_loss %f" % (e, content_loss.item() / args.content_weight, style_loss.item() / args.style_weight)) tbar.set_description(str(total_loss.data.cpu().numpy().item())) # save the image output = utils.add_imagenet_mean_batch_device(output, args.cuda) utils.tensor_save_bgrimage(output.data[0], args.output_image, args.cuda)