def main(): parser = argparse.ArgumentParser(description='Style Swap by Pytorch') parser.add_argument('--batch_size', '-b', type=int, default=4, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=3, help='Number of sweeps over the dataset to train') parser.add_argument('--patch_size', '-p', type=int, default=5, help='Size of extracted patches from style features') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID(nagative value indicate CPU)') parser.add_argument('--learning_rate', '-lr', type=int, default=1e-4, help='learning rate for Adam') parser.add_argument('--tv_weight', type=int, default=1e-6, help='weight for total variation loss') parser.add_argument('--snapshot_interval', type=int, default=500, help='Interval of snapshot to generate image') parser.add_argument('--train_content_dir', type=str, default='/data/chen/content', help='content images directory for train') parser.add_argument('--train_style_dir', type=str, default='/data/chen/style', help='style images directory for train') parser.add_argument('--test_content_dir', type=str, default='/data/chen/content', help='content images directory for test') parser.add_argument('--test_style_dir', type=str, default='/data/chen/style', help='style images directory for test') parser.add_argument('--save_dir', type=str, default='result', help='save directory for result and loss') args = parser.parse_args() # create directory to save if not os.path.exists(args.save_dir): os.mkdir(args.save_dir) loss_dir = f'{args.save_dir}/loss' model_state_dir = f'{args.save_dir}/model_state' image_dir = f'{args.save_dir}/image' if not os.path.exists(loss_dir): os.mkdir(loss_dir) os.mkdir(model_state_dir) os.mkdir(image_dir) # set device on GPU if available, else CPU if torch.cuda.is_available() and args.gpu >= 0: device = torch.device(f'cuda:{args.gpu}') print(f'# CUDA available: {torch.cuda.get_device_name(0)}') else: device = 'cpu' print(f'# Minibatch-size: {args.batch_size}') print(f'# epoch: {args.epoch}') print('') # prepare dataset and dataLoader train_dataset = PreprocessDataset(args.train_content_dir, args.train_style_dir) test_dataset = PreprocessDataset(args.test_content_dir, args.test_style_dir) iters = len(train_dataset) print(f'Length of train image pairs: {iters}') train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True) test_iter = iter(test_loader) # set model and optimizer encoder = VGGEncoder().to(device) decoder = Decoder().to(device) optimizer = Adam(decoder.parameters(), lr=args.learning_rate) # start training criterion = nn.MSELoss() loss_list = [] for e in range(1, args.epoch + 1): print(f'Start {e} epoch') for i, (content, style) in tqdm(enumerate(train_loader, 1)): content = content.to(device) style = style.to(device) content_feature = encoder(content) style_feature = encoder(style) style_swap_res = [] for b in range(content_feature.shape[0]): c = content_feature[b].unsqueeze(0) s = style_feature[b].unsqueeze(0) cs = style_swap(c, s, args.patch_size, 1) style_swap_res.append(cs) style_swap_res = torch.cat(style_swap_res, 0) out_style_swap = decoder(style_swap_res) out_content = decoder(content_feature) out_style = decoder(style_feature) out_style_swap_latent = encoder(out_style_swap) out_content_latent = encoder(out_content) out_style_latent = encoder(out_style) image_reconstruction_loss = criterion( content, out_content) + criterion(style, out_style) feature_reconstruction_loss = criterion(style_feature, out_style_latent) +\ criterion(content_feature, out_content_latent) +\ criterion(style_swap_res, out_style_swap_latent) tv_loss = TVloss(out_style_swap, args.tv_weight) + TVloss(out_content, args.tv_weight) \ + TVloss(out_style, args.tv_weight) loss = image_reconstruction_loss + feature_reconstruction_loss + tv_loss loss_list.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() print( f'[{e}/total {args.epoch} epoch],[{i} /' f'total {round(iters/args.batch_size)} iteration]: {loss.item()}' ) if i % args.snapshot_interval == 0: content, style = next(test_iter) content = content.to(device) style = style.to(device) with torch.no_grad(): content_feature = encoder(content) style_feature = encoder(style) style_swap_res = [] for b in range(content_feature.shape[0]): c = content_feature[b].unsqueeze(0) s = style_feature[b].unsqueeze(0) cs = style_swap(c, s, args.patch_size, 1) style_swap_res.append(cs) style_swap_res = torch.cat(style_swap_res, 0) out_style_swap = decoder(style_swap_res) out_content = decoder(content_feature) out_style = decoder(style_feature) content = denorm(content, device) style = denorm(style, device) out_style_swap = denorm(out_style_swap, device) out_content = denorm(out_content, device) out_style = denorm(out_style, device) res = torch.cat( [content, style, out_content, out_style, out_style_swap], dim=0) res = res.to('cpu') save_image(res, f'{image_dir}/{e}_epoch_{i}_iteration.png', nrow=content_feature.shape[0]) torch.save(decoder.state_dict(), f'{model_state_dir}/{e}_epoch.pth') plt.plot(range(len(loss_list)), loss_list) plt.xlabel('iteration') plt.ylabel('loss') plt.title('train loss') plt.savefig(f'{loss_dir}/train_loss.png') with open(f'{loss_dir}/loss_log.txt', 'w') as f: for l in loss_list: f.write(f'{l}\n') print(f'Loss saved in {loss_dir}')
def main(): parser = argparse.ArgumentParser(description='AdaIN Style Transfer by Pytorch') parser.add_argument('--batch_size', '-b', type=int, default=8, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID(nagative value indicate CPU)') parser.add_argument('--learning_rate', '-lr', type=int, default=5e-5, help='learning rate for Adam') parser.add_argument('--snapshot_interval', type=int, default=1000, help='Interval of snapshot to generate image') parser.add_argument('--train_content_dir', type=str, default='content', help='content images directory for train') parser.add_argument('--train_style_dir', type=str, default='style', help='style images directory for train') parser.add_argument('--test_content_dir', type=str, default='content', help='content images directory for test') parser.add_argument('--test_style_dir', type=str, default='style', help='style images directory for test') parser.add_argument('--save_dir', type=str, default='result', help='save directory for result and loss') parser.add_argument('--reuse', default=None, help='model state path to load for reuse') args = parser.parse_args() # create directory to save if not os.path.exists(args.save_dir): os.mkdir(args.save_dir) loss_dir = f'{args.save_dir}/loss' model_state_dir = f'{args.save_dir}/model_state' image_dir = f'{args.save_dir}/image' if not os.path.exists(loss_dir): os.mkdir(loss_dir) os.mkdir(model_state_dir) os.mkdir(image_dir) # set device on GPU if available, else CPU if torch.cuda.is_available() and args.gpu >= 0: device = torch.device(f'cuda:{args.gpu}') print(f'# CUDA available: {torch.cuda.get_device_name(0)}') else: device = 'cpu' print(f'# Minibatch-size: {args.batch_size}') print(f'# epoch: {args.epoch}') print('') # prepare dataset and dataLoader train_dataset = PreprocessDataset(args.train_content_dir, args.train_style_dir) test_dataset = PreprocessDataset(args.test_content_dir, args.test_style_dir) iters = len(train_dataset) print(f'Length of train image pairs: {iters}') train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False) test_iter = iter(test_loader) # set model and optimizer model = Model().to(device) if args.reuse is not None: model.load_state_dict(torch.load(args.reuse)) optimizer = Adam(model.parameters(), lr=args.learning_rate) # start training loss_list = [] for e in range(1, args.epoch + 1): print(f'Start {e} epoch') for i, (content, style) in tqdm(enumerate(train_loader, 1)): content = content.to(device) style = style.to(device) loss = model(content, style) loss_list.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() print(f'[{e}/total {args.epoch} epoch],[{i} /' f'total {round(iters/args.batch_size)} iteration]: {loss.item()}') if i % args.snapshot_interval == 0: content, style = next(test_iter) content = content.to(device) style = style.to(device) with torch.no_grad(): out = model.generate(content, style) content = denorm(content, device) style = denorm(style, device) out = denorm(out, device) res = torch.cat([content, style, out], dim=0) res = res.to('cpu') save_image(res, f'{image_dir}/{e}_epoch_{i}_iteration.png', nrow=args.batch_size) torch.save(model.state_dict(), f'{model_state_dir}/{e}_epoch.pth') plt.plot(range(len(loss_list)), loss_list) plt.xlabel('iteration') plt.ylabel('loss') plt.title('train loss') plt.savefig(f'{loss_dir}/train_loss.png') with open(f'{loss_dir}/loss_log.txt', 'w') as f: for l in loss_list: f.write(f'{l}\n') print(f'Loss saved in {loss_dir}')
def main(): parser = argparse.ArgumentParser( description='AdaIN Style Transfer by Pytorch') parser.add_argument('--batch_size', '-b', type=int, default=12, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID(nagative value indicate CPU)') parser.add_argument('--learning_rate', '-lr', type=int, default=5e-5, help='learning rate for Adam') parser.add_argument('--snapshot_interval', type=int, default=900, help='Interval of snapshot to generate image') parser.add_argument('--train_content_dir', type=str, default='../content', help='content images directory for train') parser.add_argument('--train_style_dir', type=str, default='../style', help='style images directory for train') parser.add_argument('--test_content_dir', type=str, default='content', help='content images directory for test') parser.add_argument('--test_style_dir', type=str, default='style', help='style images directory for test') parser.add_argument('--save_dir', type=str, default='.', help='save directory for result and loss') parser.add_argument('--reuse', default=None, help='model state path to load for reuse') args = parser.parse_args() print(args.save_dir) # create directory to save if not os.path.exists(args.save_dir): os.mkdir(args.save_dir) loss_dir = f'{args.save_dir}/loss' model_state_dir = f'{args.save_dir}/model_state' image_dir = f'{args.save_dir}/image' if not os.path.exists(loss_dir): os.mkdir(loss_dir) if not os.path.exists(model_state_dir): os.mkdir(model_state_dir) if not os.path.exists(image_dir): os.mkdir(image_dir) os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2" print(f'# Minibatch-size: {args.batch_size}') print(f'# epoch: {args.epoch}') print('') # prepare dataset and dataLoader train_dataset = PreprocessDataset(args.train_content_dir, args.train_style_dir) test_dataset = PreprocessDataset(args.test_content_dir, args.test_style_dir) iters = len(train_dataset) print(f'Length of train image pairs: {iters}') train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False) test_iter = iter(test_loader) device_ids = [0, 1, 2] # Re_encoder=nn.DataParallel(ReEncoder(),device_ids).cuda() vgg_encoder = nn.DataParallel(VGGEncoder(), device_ids).cuda() attn = nn.DataParallel(CoAttention(channel=512), device_ids).cuda() decoder = nn.DataParallel(Decoder(), device_ids).cuda() vggattn = nn.DataParallel(VGGAttn(), device_ids).cuda() D_img = Dimg().cuda() if args.reuse is not None: model.load_state_dict(torch.load(args.reuse)) # optimizer_Re_encoder = Adam(Re_encoder.parameters(), lr=args.learning_rate) optimizer_decoder = Adam(decoder.parameters(), lr=args.learning_rate) optimizer_attn = Adam(attn.parameters(), lr=args.learning_rate) optimizer_vggattn = Adam(filter(lambda p: p.requires_grad, vggattn.parameters()), lr=args.learning_rate) optimizer_D_img = Adam(D_img.parameters(), lr=args.learning_rate) # start training loss_list_1 = [] loss_list_2 = [] loss_list_D_img = [] lam = 10.0 # print(list(vggattn.parameters())) for e in range(1, args.epoch + 1): print(f'Start {e} epoch') for i, (content, style) in tqdm(enumerate(train_loader, 1)): content = content.cuda() style = style.cuda() t_1 = vggattn(content, style, output_last_feature=True) t_2 = vggattn(style, content, output_last_feature=True) c1s2 = decoder(t_1) output_features_1 = vgg_encoder(images=c1s2, output_last_feature=True) output_middle_features_1 = vgg_encoder(images=c1s2, output_last_feature=False) style_middle_features_1 = vgg_encoder(images=style, output_last_feature=False) loss_c_1 = calc_content_loss(output_features_1, t_1) loss_s_1 = calc_style_loss(output_middle_features_1, style_middle_features_1) c2s1 = decoder(t_2) output_features_2 = vgg_encoder(images=c2s1, output_last_feature=True) output_middle_features_2 = vgg_encoder(images=c2s1, output_last_feature=False) style_middle_features_2 = vgg_encoder(images=content, output_last_feature=False) loss_c_2 = calc_content_loss(output_features_2, t_2) loss_s_2 = calc_style_loss(output_middle_features_2, style_middle_features_2) D_content = D_img(content.to('cuda:1')) D_style = D_img(style.to('cuda:1')) D_c1s2 = D_img(c1s2.to('cuda:1')) D_c2s1 = D_img(c2s1.to('cuda:1')) D_loss = MSE_Loss(D_content, fake_label) + MSE_Loss( D_style, fake_label) + MSE_Loss(D_c1s2, real_label) + MSE_Loss( D_c2s1, real_label) loss = loss_c_1 + lam * loss_s_1 + loss_c_2 + lam * loss_s_2 + 0.01 * D_loss.to( 'cuda:0') loss_list_1.append(loss.sum().item()) optimizer_vggattn.zero_grad() optimizer_decoder.zero_grad() loss.sum().backward(retain_graph=True) optimizer_decoder.step() optimizer_vggattn.step() # t_1_c1s1 = vggattn(c1s2, c2s1, output_last_feature=True) t_2_c2s2 = vggattn(c2s1, c1s2, output_last_feature=True) c1s1 = decoder(t_1_c1s1) c2s2 = decoder(t_2_c2s2) # # # output_features_c1s1 = vgg_encoder(images=c1s1, output_last_feature=True) output_middle_features_c1s1 = vgg_encoder( images=c1s1, output_last_feature=False) style_middle_features_c1s1 = vgg_encoder(images=content, output_last_feature=False) c_old = vgg_encoder(images=content, output_last_feature=True) loss_c_c1s1 = calc_content_loss(output_features_c1s1, c_old) #与原图比较 loss_s_c1s1 = calc_style_loss(output_middle_features_c1s1, style_middle_features_c1s1) output_features_c2s2 = vgg_encoder(images=c2s2, output_last_feature=True) s_old = vgg_encoder(images=style, output_last_feature=True) output_middle_features_c2s2 = vgg_encoder( images=c2s2, output_last_feature=False) style_middle_features_c2s2 = vgg_encoder(images=style, output_last_feature=False) loss_c_c2s2 = calc_content_loss(output_features_c2s2, s_old) #与原图比较 loss_s_c2s2 = calc_style_loss(output_middle_features_c2s2, style_middle_features_c2s2) mse_c1s1 = MSE_Loss(content, c1s1) mse_c2s2 = MSE_Loss(style, c2s2) loss = loss_c_c1s1 + lam * loss_s_c1s1 + loss_c_c2s2 + lam * loss_s_c2s2 + 10 * mse_c1s1 + 10 * mse_c2s2 loss_list_1.append(loss.sum().item()) optimizer_vggattn.zero_grad() optimizer_decoder.zero_grad() loss.sum().backward(retain_graph=True) optimizer_decoder.step() optimizer_vggattn.step() # for g_index in range(g_steps): optimizer_D_img.zero_grad() D_loss = MSE_Loss(D_content, real_label) + MSE_Loss( D_style, real_label) + MSE_Loss( D_c1s2, fake_label) + MSE_Loss(D_c2s1, fake_label) D_loss.backward() optimizer_D_img.step() print( f'[{e}/total {args.epoch} epoch],[{i} /' f'total {round(iters/args.batch_size)} iteration]: {loss.sum().item()}' ) if i % args.snapshot_interval == 0: content = denorm(content) style = denorm(style) c1s2 = denorm(c1s2) c2s1 = denorm(c2s1) c1s1 = denorm(c1s1) c2s2 = denorm(c2s2) res = torch.cat([content, style, c1s2, c2s1, c1s1, c2s2], dim=0) res = res.to('cpu') save_image(res, f'{image_dir}/{e}_epoch_{i}_iteration.png', nrow=args.batch_size) # torch.save(attn.state_dict(), f'{model_state_dir}/attn_{e}_epoch.pth') torch.save(vgg_encoder.state_dict(), f'{model_state_dir}/vgg_encoder_{e}_epoch.pth') torch.save(decoder.state_dict(), f'{model_state_dir}/decoder_{e}_epoch.pth') torch.save(D_img.state_dict(), f'{model_state_dir}/D_img_{e}_epoch.pth') torch.save(vggattn.state_dict(), f'{model_state_dir}/vggattn{e}_epoch.pth') with open(f'{loss_dir}/loss_log.txt', 'w') as f: for l in loss_list_1: f.write(f'{l}\n') # plt.plot(range(len(loss_list)), loss_list) # plt.xlabel('iteration') # plt.ylabel('loss') # plt.title('train loss') # plt.savefig(f'{loss_dir}/train_loss.png') print(f'Loss saved in {loss_dir}')
def train(**kwargs): opt = Config() for k, v in kwargs.items(): setattr(opt, k, v) device = torch.device(opt.gpu_id) vis = visdom.Visdom(port=2333, env='gin') # python -m visdom.server -p 2333 train_dataset = PreprocessDataset('/data/lzd/train_data/content', '/data/lzd/train_data/style') train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True) test_dataset = PreprocessDataset('/data/lzd/test_data/content', '/data/lzd/test_data/style') test_loader = DataLoader(test_dataset, batch_size=opt.test_bs, shuffle=False) test_iter = iter(test_loader) iters = len(train_dataset) print(f'Length of train image pairs: {iters}') # model = Model() # model= torch.nn.DataParallel(model, device_ids=[0,1,2,3]) # model.to(device) model = Model().to(device) optimizer = Adam([{ 'params': model.decoder.parameters(), 'lr': opt.lr }, { 'params': model.gat.parameters(), 'lr': 0.0005 }], lr=opt.lr) for e in range(1, opt.epoch): print(f'start {e} epoch:') for i, (content, style) in enumerate(train_loader, 1): content = content.to(device) # [8, 3, 256, 256] style = style.to(device) # [8, 3, 256, 256] loss_c, loss_s = model(content, style) loss = loss_c + 2 * loss_s optimizer.zero_grad() loss.backward() optimizer.step() print( f'[{e}/{opt.epoch} epoch],[{i} /' f'{round(iters/opt.batch_size)}]: {loss_c.item()} and {loss_s.item()}' ) if i % opt.loss_interval == 0: vis.line(Y=np.array([loss_c.item()]), X=np.array([ (e - 1) * round(iters / opt.batch_size) + i ]), win='loss_c', update='append', opts=dict(xlabel='iteration', ylabel='Content loss', title='loss_c', legend=['Loss'])) vis.line(Y=np.array([loss_s.item()]), X=np.array([ (e - 1) * round(iters / opt.batch_size) + i ]), win='loss_s', update='append', opts=dict(xlabel='iteration', ylabel='style loss', title='loss_s', legend=['Loss'])) vis.line(Y=np.array([loss.item()]), X=np.array([ (e - 1) * round(iters / opt.batch_size) + i ]), win='loss', update='append', opts=dict(xlabel='iteration', ylabel='Total loss', title='loss', legend=['Loss'])) if i % opt.img_interval == 0: c, s = next(test_iter) c = c.to(device) s = s.to(device) with torch.no_grad(): out = model.generate(c, s) c = denorm(c, device) s = denorm(s, device) out = denorm(out, device) res = torch.cat([c, s, out], dim=0) vis.images(torch.clamp(res, 0, 1), win='image', nrow=opt.test_bs)