def make_z(n, img_num, z_dim=8, sample_type='random'): if sample_type == 'random': z = util.var(torch.randn(n, img_num, 8)) elif sample_type == 'interpolation': z = util.var(make_interpolation(n=n, img_num=img_num, z_dim=z_dim)) return z
def make_z(n, img_num, z_dim=8, sample_type='random'): if sample_type == 'random': z = util.var(torch.randn(n, img_num, 8)) elif sample_type == 'interpolation': z = util.var(make_interpolation(n=n, img_num=img_num, z_dim=z_dim)) return z
def mse_loss(score, target=1): dtype = type(score) if target == 1: label = util.var(torch.ones(score.size()), requires_grad=False) elif target == 0: label = util.var(torch.zeros(score.size()), requires_grad=False) criterion = nn.MSELoss() loss = criterion(score, label) return loss
def make_img(dloader, G, z, img_size=128): if torch.cuda.is_available(): dtype = torch.cuda.FloatTensor else: dtype = torch.FloatTensor iter_dloader = iter(dloader) img, _ = iter_dloader.next() img_num = z.size(1) N = img.size(0) img = util.var(img.type(dtype)) result_img = torch.FloatTensor(N * (img_num + 1), 3, img_size, img_size).type(dtype) for i in range(N): # The leftmost is domain A image(Edge image) result_img[i * (img_num + 1)] = img[i].data # Generate img_num images per a domain A image for j in range(img_num): img_ = img[i].unsqueeze(dim=0) z_ = z[i, j, :].unsqueeze(dim=0) out_img = G(img_, z_) result_img[i * (img_num + 1) + j + 1] = out_img.data result_img = result_img / 2 + 0.5 return result_img
def make_img(dloader, G, z, img_size=128): if torch.cuda.is_available(): dtype = torch.cuda.FloatTensor else: dtype = torch.FloatTensor iter_dloader = iter(dloader) img, _ = iter_dloader.next() img_num = z.size(1) N = img.size(0) img = util.var(img.type(dtype)) result_img = torch.FloatTensor(N * (img_num + 1), 3, img_size, img_size).type(dtype) for i in range(N): # The leftmost is domain A image(Edge image) result_img[i * (img_num + 1)] = img[i].data # Generate img_num images per a domain A image for j in range(img_num): img_ = img[i].unsqueeze(dim=0) z_ = z[i, j, :].unsqueeze(dim=0) out_img = G(img_, z_) result_img[i * (img_num + 1) + j + 1] = out_img.data result_img = result_img / 2 + 0.5 return result_img
def make_z(img_num, z_dim=8): #z = util.var(interpolation_by_one_dim(img_num=img_num, z_dim=z_dim)) #randn z = util.var(torch.randn(img_num, z_dim)) return z
def __init__(self, root='data/edges2shoes', result_dir='result', weight_dir='weight', load_weight=False, batch_size=2, test_size=20, test_img_num=5, img_size=128, num_epoch=100, save_every=1000, lr=0.0002, beta_1=0.5, beta_2=0.999, lambda_kl=0.01, lambda_img=10, lambda_z=0.5, z_dim=8): # Data type(Can use GPU or not?) self.dtype = torch.cuda.FloatTensor if torch.cuda.is_available() is False: self.dtype = torch.FloatTensor # Data loader for training self.dloader, dlen = data_loader(root=root, batch_size=batch_size, shuffle=True, img_size=img_size, mode='train') # Data loader for test self.t_dloader, _ = data_loader(root=root, batch_size=test_size, shuffle=False, img_size=img_size, mode='val') # Models # D_cVAE is discriminator for cVAE-GAN(encoded vector z). # D_cLR is discriminator for cLR-GAN(random vector z). # Both of D_cVAE and D_cLR has two discriminators which have different output size((14x14) and (30x30)). # Totally, we have for discriminators now. self.D_cVAE = model.Discriminator().type(self.dtype) self.D_cLR = model.Discriminator().type(self.dtype) self.G = model.Generator(z_dim=z_dim).type(self.dtype) self.E = model.Encoder(z_dim=z_dim).type(self.dtype) # Optimizers self.optim_D_cVAE = optim.Adam(self.D_cVAE.parameters(), lr=lr, betas=(beta_1, beta_2)) self.optim_D_cLR = optim.Adam(self.D_cLR.parameters(), lr=lr, betas=(beta_1, beta_2)) self.optim_G = optim.Adam(self.G.parameters(), lr=lr, betas=(beta_1, beta_2)) self.optim_E = optim.Adam(self.E.parameters(), lr=lr, betas=(beta_1, beta_2)) # Optiminzer lr scheduler #self.optim_D_scheduler = optim.lr_scheduler.LambdaLR(self.optim_D, lr_lambda=lr_decay_rule) #self.optim_G_scheduler = optim.lr_scheduler.LambdaLR(self.optim_G, lr_lambda=lr_decay_rule) #self.optim_E_scheduler = optim.lr_scheduler.LambdaLR(self.optim_E, lr_lambda=lr_decay_rule) # fixed random_z for test self.fixed_z = util.var(torch.randn(test_size, test_img_num, z_dim)) # Some hyperparameters self.z_dim = z_dim self.lambda_kl = lambda_kl self.lambda_img = lambda_img self.lambda_z = lambda_z # Extra things self.result_dir = result_dir self.weight_dir = weight_dir self.load_weight = load_weight self.test_img_num = test_img_num self.img_size = img_size self.start_epoch = 0 self.num_epoch = num_epoch self.save_every = save_every
def epoch_finished(self): _, value = self.policy(var(stack_frames([self.last_state]), self.cuda)) advs = np.vstack(self._adv(self.rewards, np.append(self.values, value.cpu().data[0, 0]), self.dones)) returns = np.vstack(self.values) + np.copy(advs) advs = (advs - advs.mean()) / (advs.std() + 1e-8) states = np.array(self.states, dtype=object) old_probs = np.vstack(self.old_probs) batch_range = list(range(self.batch_size)) for n in range(self.opt_epochs): idx = np.random.permutation(self.n_timesteps) for num_batch in range(self.n_batches): b_idx = idx[num_batch * self.batch_size: (num_batch + 1) * self.batch_size] b_advs = np.copy(advs[b_idx]) b_returns = np.copy(returns[b_idx]) b_old_probs = np.copy(old_probs[b_idx]) b_states = stack_frames(states[b_idx]) b_actions = np.copy(np.array(self.actions)[b_idx]) self.optim.zero_grad() probs, values = self.policy(var(b_states, self.cuda)) r = torch.exp(torch.log(probs[batch_range, b_actions]) - np_var(b_old_probs, self.cuda).squeeze(1)) A = np_var(b_advs, self.cuda).squeeze(1) L_policy = -torch.min(r * A, r.clamp(1 - self.eps_schedule.param, 1 + self.eps_schedule.param) * A).mean() L_value = (values - np_var(b_returns, self.cuda)).pow(2).mean() L_entropy = (-((probs * torch.log(probs)).sum(-1))).mean() L = self.value_coeff * L_value + L_policy - self.entropy_coeff * L_entropy L.backward() self.optim.step() self._reset_timesteps() return L.data.cpu()[0]
def act(self, t, state): self.states[t] = state probs, value = self.policy(var(stack_frames([state]), self.cuda)) self.values[t] = value.cpu().data[0, 0] m = Categorical(probs.cpu().data[0]) action = m.sample() self.actions[t] = action[0] self.old_probs[t] = m.log_prob(action)[0] return action[0]
def train(self): if self.load_weight is True: self.load_pretrained() self.set_train_phase() for epoch in range(self.start_epoch, self.num_epoch): for iters, (img, ground_truth) in enumerate(self.dloader): # img : (2, 3, 128, 128) of domain A / ground_truth : (2, 3, 128, 128) of domain B img, ground_truth = util.var(img), util.var(ground_truth) # Seperate data for cVAE_GAN and cLR_GAN cVAE_data = {'img' : img[0].unsqueeze(dim=0), 'ground_truth' : ground_truth[0].unsqueeze(dim=0)} cLR_data = {'img' : img[1].unsqueeze(dim=0), 'ground_truth' : ground_truth[1].unsqueeze(dim=0)} ''' ----------------------------- 1. Train D ----------------------------- ''' ############# Step 1. D loss in cVAE-GAN ############# # Encoded latent vector mu, log_variance = self.E(cVAE_data['ground_truth']) std = torch.exp(log_variance / 2) random_z = util.var(torch.randn(1, self.z_dim)) encoded_z = (random_z * std) + mu # Generate fake image fake_img_cVAE = self.G(cVAE_data['img'], encoded_z) # Get scores and loss real_d_cVAE_1, real_d_cVAE_2 = self.D_cVAE(cVAE_data['ground_truth']) fake_d_cVAE_1, fake_d_cVAE_2 = self.D_cVAE(fake_img_cVAE) # mse_loss for LSGAN D_loss_cVAE_1 = mse_loss(real_d_cVAE_1, 1) + mse_loss(fake_d_cVAE_1, 0) D_loss_cVAE_2 = mse_loss(real_d_cVAE_2, 1) + mse_loss(fake_d_cVAE_2, 0) ############# Step 2. D loss in cLR-GAN ############# # Random latent vector random_z = util.var(torch.randn(1, self.z_dim)) # Generate fake image fake_img_cLR = self.G(cLR_data['img'], random_z) # Get scores and loss real_d_cLR_1, real_d_cLR_2 = self.D_cLR(cLR_data['ground_truth']) fake_d_cLR_1, fake_d_cLR_2 = self.D_cLR(fake_img_cLR) D_loss_cLR_1 = mse_loss(real_d_cLR_1, 1) + mse_loss(fake_d_cLR_1, 0) D_loss_cLR_2 = mse_loss(real_d_cLR_2, 1) + mse_loss(fake_d_cLR_2, 0) D_loss = D_loss_cVAE_1 + D_loss_cLR_1 + D_loss_cVAE_2 + D_loss_cLR_2 # Update self.all_zero_grad() D_loss.backward() self.optim_D_cVAE.step() self.optim_D_cLR.step() ''' ----------------------------- 2. Train G & E ----------------------------- ''' ############# Step 1. GAN loss to fool discriminator (cVAE_GAN and cLR_GAN) ############# # Encoded latent vector mu, log_variance = self.E(cVAE_data['ground_truth']) std = torch.exp(log_variance / 2) random_z = util.var(torch.randn(1, self.z_dim)) encoded_z = (random_z * std) + mu # Generate fake image and get adversarial loss fake_img_cVAE = self.G(cVAE_data['img'], encoded_z) fake_d_cVAE_1, fake_d_cVAE_2 = self.D_cVAE(fake_img_cVAE) GAN_loss_cVAE_1 = mse_loss(fake_d_cVAE_1, 1) GAN_loss_cVAE_2 = mse_loss(fake_d_cVAE_2, 1) # Random latent vector random_z = util.var(torch.randn(1, self.z_dim)) # Generate fake image and get adversarial loss fake_img_cLR = self.G(cLR_data['img'], random_z) fake_d_cLR_1, fake_d_cLR_2 = self.D_cLR(fake_img_cLR) GAN_loss_cLR_1 = mse_loss(fake_d_cLR_1, 1) GAN_loss_cLR_2 = mse_loss(fake_d_cLR_2, 1) G_GAN_loss = GAN_loss_cVAE_1 + GAN_loss_cVAE_2 + GAN_loss_cLR_1 + GAN_loss_cLR_2 ############# Step 2. KL-divergence with N(0, 1) (cVAE-GAN) ############# KL_div = self.lambda_kl * torch.sum(0.5 * (mu ** 2 + torch.exp(log_variance) - log_variance - 1)) ############# Step 3. Reconstruction of ground truth image (|G(A, z) - B|) (cVAE-GAN) ############# img_recon_loss = self.lambda_img * L1_loss(fake_img_cVAE, cVAE_data['ground_truth']) EG_loss = G_GAN_loss + KL_div + img_recon_loss self.all_zero_grad() EG_loss.backward(retain_graph=True) self.optim_E.step() self.optim_G.step() ''' ----------------------------- 3. Train ONLY G ----------------------------- ''' ############ Step 1. Reconstrution of random latent code (|E(G(A, z)) - z|) (cLR-GAN) ############ # This step should update ONLY G. mu_, log_variance_ = self.E(fake_img_cLR) z_recon_loss = L1_loss(mu_, random_z) G_alone_loss = self.lambda_z * z_recon_loss self.all_zero_grad() G_alone_loss.backward() self.optim_G.step() log_file = open('log.txt', 'w') log_file.write(str(epoch)) # Print error and save intermediate result image and weight if iters % self.save_every == 0: print('[Epoch : %d / Iters : %d] => D_loss : %f / G_GAN_loss : %f / KL_div : %f / img_recon_loss : %f / z_recon_loss : %f'\ %(epoch, iters, D_loss.data[0], G_GAN_loss.data[0], KL_div.data[0], img_recon_loss.data[0], G_alone_loss.data[0])) # Save intermediate result image if os.path.exists(self.result_dir) is False: os.makedirs(self.result_dir) result_img = util.make_img(self.t_dloader, self.G, self.fixed_z, img_num=self.test_img_num, img_size=self.img_size) img_name = '{epoch}_{iters}.png'.format(epoch=epoch, iters=iters) img_path = os.path.join(self.result_dir, img_name) torchvision.utils.save_image(result_img, img_path, nrow=self.test_img_num+1) # Save intermediate weight if os.path.exists(self.weight_dir) is False: os.makedirs(self.weight_dir) self.save_weight() # Save weight at the end of every epoch self.save_weight(epoch=epoch)
def train(self): if self.load_weight is True: self.load_pretrained() self.set_train_phase() self.show_model() # Training Start! for epoch in range(self.start_epoch, self.num_epoch): for iters, (img, ground_truth) in enumerate(self.dloader): # img(2, 3, 128, 128) : Two images in Domain A. One for cVAE and another for cLR. # ground_truth(2, 3, 128, 128) : Two images Domain B. One for cVAE and another for cLR. img, ground_truth = util.var(img), util.var(ground_truth) # Seperate data for cVAE_GAN(using encoded z) and cLR_GAN(using random z) cVAE_data = {'img' : img[0].unsqueeze(dim=0), 'ground_truth' : ground_truth[0].unsqueeze(dim=0)} cLR_data = {'img' : img[1].unsqueeze(dim=0), 'ground_truth' : ground_truth[1].unsqueeze(dim=0)} ''' ----------------------------- 1. Train D ----------------------------- ''' ####################### < Step 1. D loss in cVAE-GAN > ####################### # Encoded latent vector mu, log_variance = self.E(cVAE_data['ground_truth']) std = torch.exp(log_variance / 2) random_z = util.var(torch.randn(1, self.z_dim)) encoded_z = (random_z * std) + mu # Generate fake image fake_img_cVAE = self.G(cVAE_data['img'], encoded_z) real_pair_cVAE = torch.cat([cVAE_data['img'], cVAE_data['ground_truth']], dim=1) fake_pair_cVAE = torch.cat([cVAE_data['img'], fake_img_cVAE], dim=1) real_d_cVAE_1, real_d_cVAE_2 = self.D_cVAE(real_pair_cVAE) fake_d_cVAE_1, fake_d_cVAE_2 = self.D_cVAE(fake_pair_cVAE.detach()) D_loss_cVAE_1 = mse_loss(real_d_cVAE_1, 1) + mse_loss(fake_d_cVAE_1, 0) # Small patch loss D_loss_cVAE_2 = mse_loss(real_d_cVAE_2, 1) + mse_loss(fake_d_cVAE_2, 0) # Big patch loss ####################### < Step 2. D loss in cLR-GAN > ####################### # Generate fake image # Generated img using 'cVAE' data will be used to train D_'cLR' fake_img_cLR = self.G(cVAE_data['img'], random_z) real_pair_cLR = torch.cat([cLR_data['img'], cLR_data['ground_truth']], dim=1) fake_pair_cLR = torch.cat([cVAE_data['img'], fake_img_cLR], dim=1) # A_cVAE = Domain A image for cVAE, A_cLR = Domain A image for cVAE # B_cVAE = Domain B image for cVAE, B_cLR = Domain B image for cVAE # D_cVAE has to discriminate [A_cVAE, B_cVAE] vs [A_cVAE, G(A_cVAE, encoded_z)] # D_cLR has to discriminate [A_cLR, B_cLR] vs [A_cVAE, G(A_cVAE, random_z)] # This helps to generate more diverse images real_d_cLR_1, real_d_cLR_2 = self.D_cLR(real_pair_cLR) fake_d_cLR_1, fake_d_cLR_2 = self.D_cLR(fake_pair_cLR.detach()) D_loss_cLR_1 = mse_loss(real_d_cLR_1, 1) + mse_loss(fake_d_cLR_1, 0) # Small patch loss D_loss_cLR_2 = mse_loss(real_d_cLR_2, 1) + mse_loss(fake_d_cLR_2, 0) # Big patch loss D_loss = D_loss_cVAE_1 + D_loss_cVAE_2 + D_loss_cLR_1 + D_loss_cLR_2 # Update D self.all_zero_grad() D_loss.backward() self.optim_D_cVAE.step() self.optim_D_cLR.step() ''' ----------------------------- 2. Train G & E ----------------------------- ''' ########### < Step 1. GAN loss to fool discriminator (cVAE_GAN and cLR_GAN) > ########### # Encoded latent vector mu, log_variance = self.E(cVAE_data['ground_truth']) std = torch.exp(log_variance / 2) random_z = util.var(torch.randn(1, self.z_dim)) encoded_z = (random_z * std) + mu # Generate fake image fake_img_cVAE = self.G(cVAE_data['img'], encoded_z) fake_pair_cVAE = torch.cat([cVAE_data['img'], fake_img_cVAE], dim=1) # Fool D_cVAE fake_d_cVAE_1, fake_d_cVAE_2 = self.D_cVAE(fake_pair_cVAE) GAN_loss_cVAE_1 = mse_loss(fake_d_cVAE_1, 1) # Small patch loss GAN_loss_cVAE_2 = mse_loss(fake_d_cVAE_2, 1) # Big patch loss # Random latent vector and generate fake image random_z = util.var(torch.randn(1, self.z_dim)) fake_img_cLR = self.G(cLR_data['img'], random_z) fake_pair_cLR = torch.cat([cLR_data['img'], fake_img_cLR], dim=1) # Fool D_cLR fake_d_cLR_1, fake_d_cLR_2 = self.D_cLR(fake_pair_cLR) GAN_loss_cLR_1 = mse_loss(fake_d_cLR_1, 1) # Small patch loss GAN_loss_cLR_2 = mse_loss(fake_d_cLR_2, 1) # Big patch loss G_GAN_loss = GAN_loss_cVAE_1 + GAN_loss_cVAE_2 + GAN_loss_cLR_1 + GAN_loss_cLR_2 ################# < Step 2. KL-divergence with N(0, 1) (cVAE-GAN) > ################# # See http://yunjey47.tistory.com/43 or Appendix B in the paper for details KL_div = self.lambda_kl * torch.sum(0.5 * (mu ** 2 + torch.exp(log_variance) - log_variance - 1)) #### < Step 3. Reconstruction of ground truth image (|G(A, z) - B|) (cVAE-GAN) > #### img_recon_loss = self.lambda_img * L1_loss(fake_img_cVAE, cVAE_data['ground_truth']) EG_loss = G_GAN_loss + KL_div + img_recon_loss self.all_zero_grad() EG_loss.backward(retain_graph=True) # retain_graph=True for the next step 3. Train ONLY G self.optim_E.step() self.optim_G.step() ''' ----------------------------- 3. Train ONLY G ----------------------------- ''' ##### < Step 1. Reconstrution of random latent code (|E(G(A, z)) - z|) (cLR-GAN) > ##### # This step should update only G. # See https://github.com/junyanz/BicycleGAN/issues/5 for details. mu, log_variance = self.E(fake_img_cLR) z_recon_loss = L1_loss(mu, random_z) z_recon_loss = self.lambda_z * z_recon_loss self.all_zero_grad() z_recon_loss.backward() self.optim_G.step() log_file = open('log.txt', 'w') log_file.write(str(epoch)) # Print error, save intermediate result image and weight if iters % self.save_every == 0: print('[Epoch : %d / Iters : %d] => D_loss : %f / G_GAN_loss : %f / KL_div : %f / img_recon_loss : %f / z_recon_loss : %f'\ %(epoch, iters, D_loss.data[0], G_GAN_loss.data[0], KL_div.data[0], img_recon_loss.data[0], z_recon_loss.data[0])) # Save intermediate result image if os.path.exists(self.result_dir) is False: os.makedirs(self.result_dir) result_img = util.make_img(self.t_dloader, self.G, self.fixed_z, img_num=self.test_img_num, img_size=self.img_size) img_name = '{epoch}_{iters}.png'.format(epoch=epoch, iters=iters) img_path = os.path.join(self.result_dir, img_name) torchvision.utils.save_image(result_img, img_path, nrow=self.test_img_num+1) # Save intermediate weight if os.path.exists(self.weight_dir) is False: os.makedirs(self.weight_dir) self.save_weight() # Save weight at the end of every epoch self.save_weight(epoch=epoch)
def main(args): dloader, dlen = data_loader(root=args.root, batch_size=1, shuffle=False, img_size=128, mode=args.mode) #data_file_path = os.path.join(args.root, args.mode) device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu") # Data type(Can use GPU or not?) torch.cuda.set_device(device) if torch.cuda.is_available() is True: dtype = torch.cuda.FloatTensor else: dtype = torch.FloatTensor if args.epoch is not None: weight_name = '{epoch}-G.pkl'.format(epoch=args.epoch) else: weight_name = 'G.pkl' weight_path = os.path.join(args.weight_dir, weight_name) G = model.Generator(z_dim=16).type(dtype) G.load_state_dict(torch.load(weight_path)) G.eval() weight_path2 = os.path.join(args.weight2_dir, weight_name) G2 = model.Generator(z_dim=2).type(dtype) G2.load_state_dict(torch.load(weight_path2)) G2.eval() if os.path.exists(args.result_dir) is False: os.makedirs(args.result_dir) # For example, img_name = random_55.png if args.epoch is None: args.epoch = 'latest' i = 0 for iters, (img, ground_truth, mask, file_name) in enumerate(dloader): img = util.var(img.type(dtype)) #mask = util.var(mask.type(dtype)) #one = torch.ones([1, 3, 128, 128]) #one = util.var(one.type(dtype)) z = make_z(img_num=args.img_num, z_dim=16) z2 = make_z(img_num=args.img_num, z_dim=2) for j in range(args.img_num): z_ = z[j, :].unsqueeze(dim=0) z2_ = z2[j, :].unsqueeze(dim=0) out_img = G(img, z_) out_img2 = G2(img, z2_) outs_img = out_img / 2 + 0.5 outs_img2 = out_img2 / 2 + 0.5 img_name = '{filenames}_{style}.png'.format(filenames=file_name[0], style=j) img_name2 = '{filenames}_{style}_1.png'.format( filenames=file_name[0], style=j) #print(img_name) #mask_name = '{filenames}_{style}.png'.format(filenames = filenames[i], style = j) img_path = os.path.join(args.result_dir, img_name) img_path2 = os.path.join(args.result_dir, img_name2) #mask_path = os.path.join(args.mask_dir, mask_name) # for FID SCORE #fileDir = '/home/leognha/Desktop/seg-model/MedicalImage_Project02_Segmentation/data/split1/train1.25' #pathDir = os.listdir(fileDir) #if img_name in pathDir: # torchvision.utils.save_image(outs_img2, img_path2) torchvision.utils.save_image(outs_img, img_path) torchvision.utils.save_image(outs_img2, img_path2) #torchvision.utils.save_image(mask_, mask_path) i = i + 1 print(i, 'in split3') print('origin number:', len(os.listdir(os.path.join(args.root, args.mode)))) print('agu number:', len(os.listdir(args.result_dir)))