def generate_animation(self, epoch): for i1 in range(self.code_dim): utils.generate_animation( self.save_path + '/visualization/code%02d' % i1, epoch) print("Animations saved")
def result_vary(self, epoch): image_num = 10 row = 10 k = 0 i = 0 for X, Y in self.valid_loader: if Y.numpy() == k: if i == 0: images = X else: images = torch.cat((images, X), 0) i += 1 k += 1 if i == image_num: break self.load(epoch) X = utils.to_var(images) mu, sigma = self.E(self.FC(X)) for epoch in range(0,100): images = X for k in range((image_num-1)): eps = utils.to_var(torch.randn(X.size(0), self.z_dim)) X_rec = self.G(mu + eps * torch.exp(sigma / 2.0)) images = torch.cat((images, X_rec),0) if torch.cuda.is_available(): images = images.cpu().data.numpy().transpose(0, 2, 3, 1) # 1 else: images = images.data.numpy().transpose(0, 2, 3, 1) new_images = [] for i in range(image_num): k = i for _ in range(image_num): new_images.append(images[k]) k += 10 images = np.array(new_images) save_dir = os.path.join(self.root, self.result_dir, self.dataset, self.model_name, str(self.args.seed_random)) utils.save_images(images[:, :, :, :], [row, row], os.path.join(save_dir, 'variational' + '_epoch%03d' % (epoch+1) + '.png')) utils.generate_animation(save_dir+"/variational", 100) self.G.eval() self.E.eval() self.FC.eval()
def latent_traversal(self, samples=None, epoch=0, save_path=None): if save_path is None: save_path = os.path.join(self.save_path, 'visualization') if not os.path.exists(save_path): os.makedirs(save_path) for i0 in tnrange(self.model.hidden_dim, desc='latent traversal', leave=False): if samples is None: images = self.model.latent_traversal_dim( i0) # list T, tensor B C H W else: images = self.model.latent_traversal_given_samples_dim( samples, i0) # list T, tensor B C H W epoch_path = os.path.join(save_path, f'latent_traversal_{epoch}') if not os.path.exists(epoch_path): os.makedirs(epoch_path) utils.generate_animation(images, epoch_path, f'code{i0:03}') return self
def train(self): self.G.apply(self.G.weights_init) self.D.train() for classe in range(10): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] # self.G.apply(self.G.weights_init) does not work for instance if self.gpu_mode: self.y_real_, self.y_fake_ = Variable( torch.ones(self.batch_size, 1).cuda()), Variable( torch.zeros(self.batch_size, 1).cuda()) else: self.y_real_, self.y_fake_ = Variable( torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) self.D.train() self.data_loader_train = get_iter_dataset(self.dataset_train, self.list_class_train, self.batch_size, classe) self.data_loader_valid = get_iter_dataset(self.dataset_valid, self.list_class_valid, self.batch_size, classe) print('training class : ' + str(classe)) start_time = time.time() for epoch in range(self.epoch): self.G.train() epoch_start_time = time.time() n_batch = 0. for iter, (x_, t_) in enumerate(self.data_loader_train): n_batch += 1 z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: x_, z_ = Variable(x_.cuda()), Variable(z_.cuda()) else: x_, z_ = Variable(x_), Variable(z_) # update D network self.D_optimizer.zero_grad() D_real = self.D(x_) D_real_err = torch.mean(torch.abs(D_real - x_)) G_ = self.G(z_) D_fake = self.D(G_) D_fake_err = torch.mean(torch.abs(D_fake - G_)) D_loss = D_real_err - self.k * D_fake_err self.train_hist['D_loss'].append(D_loss.data[0]) D_loss.backward() self.D_optimizer.step() # update G network self.G_optimizer.zero_grad() G_ = self.G(z_) D_fake = self.D(G_) D_fake_err = torch.mean(torch.abs(D_fake - G_)) G_loss = D_fake_err self.train_hist['G_loss'].append(G_loss.data[0]) G_loss.backward() self.G_optimizer.step() # convergence metric temp_M = D_real_err + torch.abs(self.gamma * D_real_err - D_fake_err) # operation for updating k temp_k = self.k + self.lambda_ * (self.gamma * D_real_err - D_fake_err) temp_k = temp_k.data[0] # self.k = temp_k.data[0] self.k = min(max(temp_k, 0), 1) self.M = temp_M.data[0] if ((iter + 1) % 100) == 0: print( "classe : [%1d] Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, M: %.8f, k: %.8f" % (classe, (epoch + 1), (iter + 1), self.size_epoch, D_loss.data[0], G_loss.data[0], self.M, self.k)) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) self.visualize_results((epoch + 1), classe) self.save_G(classe) result_dir = self.result_dir + '/' + 'classe-' + str(classe) utils.generate_animation(result_dir + '/' + self.model_name, epoch + 1) utils.loss_plot(self.train_hist, result_dir, self.model_name) np.savetxt( os.path.join(result_dir, 'began_training_' + self.dataset + '.txt'), np.transpose([self.train_hist['G_loss']])) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results")
def train(self): self.size_epoch = 1000 if self.gpu_mode: self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda(self.device)), Variable( torch.zeros(self.batch_size, 1).cuda(self.device)) else: self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable( torch.zeros(self.batch_size, 1)) self.G.apply(self.G.weights_init) self.D.train() print('training start!!') start_time = time.time() for classe in range(10): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] #self.G.apply(self.G.weights_init) does not work for instance self.data_loader_train = get_iter_dataset(self.dataset_train, self.list_class_train, self.batch_size, classe) self.data_loader_valid = get_iter_dataset(self.dataset_valid, self.list_class_valid, self.batch_size, classe) for epoch in range(self.epoch): self.G.train() epoch_start_time = time.time() for iter, (x_, t_) in enumerate(self.data_loader_train): if x_.shape[0] != self.batch_size: break z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: x_, z_ = Variable(x_.cuda(self.device)), Variable(z_.cuda(self.device)) else: x_, z_ = Variable(x_), Variable(z_) # update D network self.D_optimizer.zero_grad() D_real = self.D(x_) D_real_loss = self.BCELoss(D_real, self.y_real_) G_ = self.G(z_) D_fake = self.D(G_) D_fake_loss = self.BCELoss(D_fake, self.y_fake_) D_loss = D_real_loss + D_fake_loss self.train_hist['D_loss'].append(D_loss.data[0]) D_loss.backward() self.D_optimizer.step() # update G network self.G_optimizer.zero_grad() G_ = self.G(z_) D_fake = self.D(G_) G_loss = self.BCELoss(D_fake, self.y_real_) self.train_hist['G_loss'].append(G_loss.data[0]) G_loss.backward() self.G_optimizer.step() if ((iter + 1) % 100) == 0: print("classe : [%1d] Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" % (classe, (epoch + 1), (iter + 1), len(self.data_loader_train), D_loss.data[0], G_loss.data[0])) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) self.visualize_results((epoch + 1), classe) self.save_G(classe) utils.generate_animation( self.result_dir + '/' + 'classe-' + str(classe) + '/' + self.model_name, self.epoch) utils.loss_plot(self.train_hist, self.save_dir,self.model_name) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") self.save()
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros( self.batch_size, 1) if self.gpu_mode: self.y_real_, self.y_fake_ = self.y_real_.cuda( ), self.y_fake_.cuda() self.D.train() print('training start!!') start_time = time.time() for epoch in range(self.epoch): self.save() self.G.train() epoch_start_time = time.time() for iter, (x_, y_) in enumerate(self.data_loader): if iter == self.data_loader.dataset.__len__( ) // self.batch_size: break z_ = torch.rand((self.batch_size, self.z_dim)) y_vec_ = torch.zeros( (self.batch_size, self.class_num)).scatter_( 1, y_.type(torch.LongTensor).unsqueeze(1), 1) if self.gpu_mode: x_, z_, y_vec_ = x_.cuda(), z_.cuda(), y_vec_.cuda() # update D network self.D_optimizer.zero_grad() D_real, C_real = self.D(x_) D_real_loss = self.BCE_loss(D_real, self.y_real_) C_real_loss = self.CE_loss(C_real, torch.max(y_vec_, 1)[1]) G_ = self.G(z_, y_vec_) D_fake, C_fake = self.D(G_) D_fake_loss = self.BCE_loss(D_fake, self.y_fake_) C_fake_loss = self.CE_loss(C_fake, torch.max(y_vec_, 1)[1]) D_loss = D_real_loss + C_real_loss + D_fake_loss + C_fake_loss self.train_hist['D_loss'].append(D_loss.item()) D_loss.backward() self.D_optimizer.step() # update G network self.G_optimizer.zero_grad() G_ = self.G(z_, y_vec_) D_fake, C_fake = self.D(G_) G_loss = self.BCE_loss(D_fake, self.y_real_) C_fake_loss = self.CE_loss(C_fake, torch.max(y_vec_, 1)[1]) G_loss += C_fake_loss self.train_hist['G_loss'].append(G_loss.item()) G_loss.backward() self.G_optimizer.step() if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" % ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.item(), G_loss.item())) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) with torch.no_grad(): self.visualize_results((epoch + 1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") self.save() utils.generate_animation( self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name, self.epoch) utils.loss_plot( self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['E_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] # if torch.cuda.is_available(): # self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda()) # else: # self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) # self.D.train() print('training start!!') start_time = time.time() for epoch in trange(self.epoch, desc='epoch'): self.G_optimizer.param_groups[0]['lr'] = self.args.lrG / np.sqrt(epoch + 1) self.D_optimizer.param_groups[0]['lr'] = self.args.lrD / np.sqrt(epoch + 1) # reset training mode of G and E epoch_start_time = time.time() E_err = [] D_err = [] G_err = [] # learning rate decay # if (epoch+1) % 20 == 0: # self.G_optimizer.param_groups[0]['lr'] /= 2 # self.D_optimizer.param_groups[0]['lr'] /= 2 # self.E_optimizer.param_groups[0]['lr'] /= 2 # print("learning rate change!") # self.G_optimizer.param_groups[0]['lr'] /= np.sqrt(epoch+1) # self.D_optimizer.param_groups[0]['lr'] /= np.sqrt(epoch+1) # self.E_optimizer.param_groups[0]['lr'] /= np.sqrt(epoch+1) # print("learning rate change!") scale = min(1.0, (1.0/32.)*(epoch + 1)) # linear warm-up total_len = self.data_loader.dataset.__len__() // self.batch_size for iter, (X, _) in tqdm(enumerate(self.data_loader), total=total_len, desc='iteration'): X = utils.to_var(X) """Discriminator""" z = utils.to_var(torch.randn(self.batch_size, self.z_dim)) X_hat = self.G(z) D_real = self.D(self.FC(X)) D_fake = self.D(self.FC(X_hat)) D_loss = -1.0*torch.mean( D_real - torch.exp(D_fake-1.0) ) self.train_hist['D_loss'].append(D_loss.data.item()) D_err.append(D_loss.data.item()) # Optimize D_loss.backward() # gradient clipping torch.nn.utils.clip_grad_value_(chain(self.D.parameters(), self.FC.parameters()), 1.0) # update self.D_optimizer.step() self.__reset_grad() """Generator""" # Use both Discriminator and Encoder to update Generator z = utils.to_var(torch.randn(self.batch_size, self.z_dim)) X_hat = self.G(z) D_fake = self.D(self.FC(X_hat)) z_mu, z_sigma = self.E(self.FC(X_hat)) E_loss = torch.mean( torch.sum(0.5 * (z - z_mu) ** 2 * torch.exp(-z_sigma) + 0.5 * z_sigma + 0.919, dim=1)) # G_loss = torch.mean( -1.0 * torch.exp(D_fake-1.0) ) # G_loss = torch.mean( torch.exp(-D_fake) ) # deal with gradient vanish G_loss = -1.0*torch.mean( D_fake ) # deal with gradient vanish 2 # G_loss = torch.mean( (2.0-D_fake)*(D_fake >= 1).float() + torch.exp(1.0-D_fake)**(D_fake < 1).float() ) # G_loss = torch.mean( (2.0-torch.exp(D_fake-1.0))*(D_fake >= 1).float() + torch.exp(1.0-D_fake)**(D_fake < 1).float() ) total_loss = G_loss + scale * E_loss self.train_hist['G_loss'].append(G_loss.data.item()) G_err.append(G_loss.data.item()) E_err.append(E_loss.data.item()) # Optimize total_loss.backward() # gradient clipping torch.nn.utils.clip_grad_value_(chain(self.G.parameters(), self.E.parameters()), 1.0) # update self.G_optimizer.step() self.E_optimizer.step() self.__reset_grad() """ Plot """ if (iter+1) == self.data_loader.dataset.__len__() // self.batch_size: # Print and plot every epoch print('Epoch-{}; D_loss: {:.4}; G_loss: {:.4}; E_loss: {:.4}\n' .format(epoch, np.mean(D_err), np.mean(G_err), np.mean(E_err))) for iter, (X, _) in enumerate(self.valid_loader): X = utils.to_var(X) self.visualize_results(X, epoch+1) break break self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) # Save model if (epoch+1) % 5 == 0: self.save(epoch) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") #self.save(epoch) # Generate animation of reconstructed plot utils.generate_animation(self.root + '/' + self.result_dir + '/' + self.dataset + '/' + self.model_name + '/reconstructed', self.epoch) utils.loss_plot(self.train_hist, os.path.join(self.root, self.save_dir, self.dataset, self.model_name), self.model_name)
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['C_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] # self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros(self.batch_size, 1) # if self.gpu_mode: # self.y_real_, self.y_fake_ = self.y_real_.cuda(), self.y_fake_.cuda() self.D.train() print('training start!!') start_time = time.time() for epoch in range(self.epoch): self.G.train() self.C.train() epoch_start_time = time.time() for iter, (x_, y_) in enumerate(self.data_loader): if iter == self.data_loader.dataset.__len__( ) // self.batch_size: break z_ = torch.rand((self.batch_size, self.z_dim)) # 类别转化成onehot y_vec_ = torch.zeros( (self.batch_size, self.class_num)).scatter_( 1, y_.type(torch.LongTensor).unsqueeze(1), 1) i = 1 while i < self.batch_size: if y_vec_[i][self.minority] == 1: y_vec_[i][-1] = self.minority_label i += 1 y_fill_ = y_vec_.unsqueeze(2).unsqueeze(3).expand( self.batch_size, self.class_num, self.input_size, self.input_size) if self.gpu_mode: x_, z_, y_vec_, y_fill_ = x_.cuda(), z_.cuda( ), y_vec_.cuda(), y_fill_.cuda() # update D network self.D_optimizer.zero_grad() D_real = self.D(x_, y_fill_) D_real_loss = -torch.mean(D_real) G_ = self.G(z_, y_vec_) D_G_ = self.D(G_, y_fill_) D_G_loss = torch.mean(D_G_) C_ = self.C(x_) C_ = C_.unsqueeze(2).unsqueeze(3).expand( self.batch_size, self.class_num, self.input_size, self.input_size) D_C_ = self.D(x_, C_) D_C_loss = torch.mean(D_C_) D_loss = 2 * D_real_loss + D_G_loss + D_C_loss self.train_hist['D_loss'].append(D_loss.item()) D_loss.backward() self.D_optimizer.step() # clipping D for p in self.D.parameters(): p.data.clamp_(-self.c, self.c) # update G network self.G_optimizer.zero_grad() G_ = self.G(z_, y_vec_) D_G_ = self.D(G_, y_fill_) G_loss = -torch.mean(D_G_) self.train_hist['G_loss'].append(G_loss.item()) G_loss.backward() self.G_optimizer.step() # update C network self.C_optimizer.zero_grad() C_ = self.C(x_) C_ = C_.unsqueeze(2).unsqueeze(3).expand( self.batch_size, self.class_num, self.input_size, self.input_size) D_C_ = self.D(x_, C_) C_loss = -torch.mean(D_C_) self.train_hist['C_loss'].append(C_loss.item()) C_loss.backward() self.C_optimizer.step() if ((iter + 1) % 100) == 0: print( "Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, C_loss: %.8f" % ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.item(), G_loss.item(), C_loss.item())) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) with torch.no_grad(): self.visualize_results((epoch + 1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") self.save() utils.generate_animation( self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name, self.epoch) utils.loss_plot( self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros( self.batch_size, 1) if self.gpu_mode: self.y_real_, self.y_fake_ = self.y_real_.cuda( ), self.y_fake_.cuda() self.D.train() print('training start!!') start_time = time.time() for epoch in range(self.epoch): self.G.train() epoch_start_time = time.time() for iter, (x_, _) in enumerate(self.data_loader): if iter == self.data_loader.dataset.__len__( ) // self.batch_size: break z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: x_, z_ = x_.cuda(), z_.cuda() # update D network self.D_optimizer.zero_grad() D_real = self.D(x_) D_real_loss = -torch.mean(D_real) epsilon = torch.FloatTensor(self.batch_size, self.z_dim).normal_(0, 1).cuda() noise = self.mu + self.sigma * epsilon #noise.cuda() #G_ = self.G(z_) weighted_noise = torch.mul(noise, self.weight) weighted_noise.cuda() G_ = self.G(weighted_noise) D_fake = self.D(G_) D_fake_loss = torch.mean(D_fake) mu_loss = 0.1 * (abs(0.1 - self.mu)).sum() sig_loss = 0.5 * (abs(1 - self.sigma)).sum() weight_loss = 0.01 * (abs(self.weight.sum() - 1)) D_loss = D_real_loss + D_fake_loss #+weight_loss+sig_loss+mu_loss D_loss.backward() self.D_optimizer.step() # clipping D for p in self.D.parameters(): p.data.clamp_(-self.c, self.c) if ((iter + 1) % self.n_critic) == 0: # update G network self.G_optimizer.zero_grad() noise = self.mu + self.sigma * epsilon weighted_noise = torch.mul(noise, self.weight) G_ = self.G(weighted_noise) D_fake = self.D(G_) G_loss = -torch.mean(D_fake) self.train_hist['G_loss'].append(G_loss.item()) G_loss.backward() self.G_optimizer.step() self.train_hist['D_loss'].append(D_loss.item()) if ((iter + 1) % 10) == 0: print( "Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, weight: %.8f" % ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.item(), G_loss.item(), self.weight.sum())) print(torch.mean(self.weight)) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) with torch.no_grad(): self.visualize_results((epoch + 1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") self.save() utils.generate_animation( self.result_dir + '/' + 'cifar10' + '/' + self.model_name + '/' + self.model_name, self.epoch) utils.loss_plot( self.train_hist, os.path.join(self.save_dir, 'cifar10', self.model_name), self.model_name)
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] if self.gpu_mode: self.y_real_, self.y_fake_ = Variable( torch.ones(self.batch_size, 1).cuda()), Variable( torch.zeros(self.batch_size, 1).cuda()) else: self.y_real_, self.y_fake_ = Variable( torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) self.D.train() print('training start!!') start_time = time.time() for epoch in range(self.epoch): self.G.train() epoch_start_time = time.time() for iter, (x_, y_) in enumerate(self.data_loader): if iter == self.data_loader.dataset.__len__( ) // self.batch_size: break if self.dataset == 'celebA': y_ = self.attr[y_] y_ = y_.view((self.batch_size, self.y_dim)) z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: x_, z_ = Variable(x_.cuda()), Variable(z_.cuda()) y_ = Variable(y_.cuda()) else: x_, z_ = Variable(x_), Variable(z_) # update D network self.D_optimizer.zero_grad() D_real, C_real = self.D(x_) D_real_loss = -torch.mean(D_real) # C_real_loss = self.CE_loss(C_real, y_) C_real_loss = self.BCE_loss(C_real, y_) if self.dataset == 'mnist': label = torch.zeros(self.batch_size, 10).cuda() label = label.scatter_(1, y_.data.view(self.batch_size, 1), 1) label = Variable(label) elif self.dataset == 'celebA': label = torch.zeros(self.batch_size, self.y_dim).cuda() label[:50, 0] = 1 # label[24:75, 1] = 1 label = Variable(label) G_ = self.G(z_, label) D_fake, C_fake = self.D(G_) D_fake_loss = torch.mean(D_fake) # C_fake_loss = self.CE_loss(C_fake, y_) C_fake_loss = self.BCE_loss(C_fake, label) # gradient penalty if self.gpu_mode: alpha = torch.rand(x_.size()).cuda() else: alpha = torch.rand(x_.size()) x_hat = Variable(alpha * x_.data + (1 - alpha) * G_.data, requires_grad=True) pred_hat, _ = self.D(x_hat) if self.gpu_mode: gradients = grad(outputs=pred_hat, inputs=x_hat, grad_outputs=torch.ones( pred_hat.size()).cuda(), create_graph=True, retain_graph=True, only_inputs=True)[0] else: gradients = grad(outputs=pred_hat, inputs=x_hat, grad_outputs=torch.ones(pred_hat.size()), create_graph=True, retain_graph=True, only_inputs=True)[0] gradient_penalty = self.lambda_ * ( (gradients.view(gradients.size()[0], -1).norm(2, 1) - 1)** 2).mean() # D_loss = D_real_loss + D_fake_loss + gradient_penalty + self.lambda_cl * (C_real_loss + C_fake_loss) D_loss = D_real_loss + D_fake_loss + self.lambda_cl * ( C_real_loss + C_fake_loss) D_loss.backward() self.D_optimizer.step() # clipping D for p in self.D.parameters(): p.data.clamp_(-self.c, self.c) if ((iter + 1) % self.n_critic) == 0: # update G network self.G_optimizer.zero_grad() if self.dataset == 'mnist': label = torch.zeros(self.batch_size, 10).cuda() label = label.scatter_( 1, y_.data.view(self.batch_size, 1), 1) label = Variable(label) elif self.dataset == 'celebA': label = torch.zeros(self.batch_size, self.y_dim).cuda() label[:50, 0] = 1 # label[24:75, 1] = 1 label = Variable(label) G_ = self.G(z_, label) D_fake, C_fake = self.D(G_) # G_loss = -torch.mean(D_fake) + self.lambda_cl * self.CE_loss(C_fake, y_) G_loss = -torch.mean( D_fake) + self.lambda_cl * self.BCE_loss(C_fake, y_) self.train_hist['G_loss'].append(G_loss.data[0]) G_loss.backward() self.G_optimizer.step() self.train_hist['D_loss'].append(D_loss.data[0]) if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" % ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.data[0], G_loss.data[0])) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) self.visualize_results((epoch + 1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") self.save() utils.generate_animation( self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name, self.epoch) utils.loss_plot( self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] if self.gpu_mode: self.y_real_, self.y_fake_ = Variable( torch.ones(self.batch_size, 1).cuda()), Variable( torch.zeros(self.batch_size, 1).cuda()) else: self.y_real_, self.y_fake_ = Variable( torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) self.D.train() print('training start!!') start_time = time.time() for epoch in range(self.epoch): self.G.train() epoch_start_time = time.time() for iter_, x_ in enumerate(self.data_loader): if iter_ == self.data_loader.dataset.__len__( ) // self.batch_size: break #z_ = torch.rand((self.batch_size, self.z_dim)) try: z_, _ = self.z_loader.next() except StopIteration: self.z_loader = iter(utils.get_mat_loader(self.batch_size)) z_, _ = self.z_loader.next() if z_.shape[0] != x_.shape[0]: self.z_loader = iter(utils.get_mat_loader(self.batch_size)) z_, _ = self.z_loader.next() if self.gpu_mode: x_, z_ = Variable(x_.cuda()), Variable(z_.cuda()) else: x_, z_ = Variable(x_), Variable(z_) # update D network self.D_optimizer.zero_grad() D_real = self.D(x_) D_real_loss = self.MSE_loss(D_real, self.y_real_) G_ = self.G(z_) D_fake = self.D(G_) D_fake_loss = self.MSE_loss(D_fake, self.y_fake_) D_loss = D_real_loss + D_fake_loss self.train_hist['D_loss'].append(D_loss.data[0]) D_loss.backward() self.D_optimizer.step() # update G network self.G_optimizer.zero_grad() G_ = self.G(z_) D_fake = self.D(G_) G_loss = self.MSE_loss(D_fake, self.y_real_) self.train_hist['G_loss'].append(G_loss.data[0]) G_loss.backward() self.G_optimizer.step() if ((iter_ + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" % ((epoch + 1), (iter_ + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.data[0], G_loss.data[0])) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) self.visualize_results((epoch + 1)) latest_g_loss = np.mean(self.train_hist['G_loss'][-200:]) latest_d_loss = np.mean(self.train_hist['D_loss'][-200:]) print("g_loss = %f, d_loss = %f" % (latest_g_loss, latest_d_loss)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") self.save() utils.generate_animation( self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name, self.epoch)
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['info_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros(self.batch_size, 1) if self.gpu_mode: self.y_real_, self.y_fake_ = self.y_real_.cuda(), self.y_fake_.cuda() self.D.train() print('training start!!') start_time = time.time() for epoch in range(self.epoch): self.G.train() epoch_start_time = time.time() for iter, (x_, y_) in enumerate(self.data_loader): if iter == self.data_loader.dataset.__len__() // self.batch_size: break z_ = torch.rand((self.batch_size, self.z_dim)) # for infogan if self.SUPERVISED == True: y_disc_ = torch.zeros((self.batch_size, self.len_discrete_code)).scatter_(1, y_.type(torch.LongTensor).unsqueeze(1), 1) else: y_disc_ = torch.from_numpy( np.random.multinomial(1, self.len_discrete_code * [float(1.0 / self.len_discrete_code)], size=[self.batch_size])).type(torch.FloatTensor) y_cont_ = torch.from_numpy(np.random.uniform(-1, 1, size=(self.batch_size, 2))).type(torch.FloatTensor) # if self.gpu_mode: # x_, z_ = x_.cuda(), z_.cuda() if self.gpu_mode: x_, z_, y_disc_, y_cont_ = x_.cuda(), z_.cuda(), y_disc_.cuda(), y_cont_.cuda() # update D network self.D_optimizer.zero_grad() D_real, _, _ = self.D(x_) D_real_loss = self.BCE_loss(D_real, self.y_real_) # G_ = self.G(z_) # D_fake = self.D(G_) # D_fake_loss = torch.mean(D_fake) G_ = self.G(z_, y_cont_, y_disc_) D_fake, _, _ = self.D(G_) D_fake_loss = self.BCE_loss(D_fake, self.y_fake_) D_loss = D_real_loss + D_fake_loss D_loss.backward(retain_graph=True) # clipping D for p in self.D.parameters(): p.data.clamp_(-self.c, self.c) for p in self.D.parameters(): samp = self.m.sample(sample_shape=p.grad.shape).cuda() p.grad += samp self.D_optimizer.step() if ((iter+1) % self.n_critic) == 0: # update G network self.G_optimizer.zero_grad() G_ = self.G(z_, y_cont_, y_disc_) D_fake, D_cont, D_disc = self.D(G_) G_loss = self.BCE_loss(D_fake, self.y_real_) self.train_hist['G_loss'].append(G_loss.item()) G_loss.backward(retain_graph=True) self.G_optimizer.step() self.train_hist['D_loss'].append(D_loss.item()) # information loss disc_loss = self.CE_loss(D_disc, torch.max(y_disc_, 1)[1]) cont_loss = self.MSE_loss(D_cont, y_cont_) info_loss = disc_loss + cont_loss self.train_hist['info_loss'].append(info_loss.item()) info_loss.backward() self.info_optimizer.step() if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, info_loss: %.8f" % ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.item(), G_loss.item(), info_loss.item())) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) with torch.no_grad(): self.visualize_results((epoch+1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") self.save() utils.generate_animation(self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name, self.epoch) utils.loss_plot(self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] if self.gpu_mode: self.y_real_, self.y_fake_ = Variable( torch.ones(self.batch_size, 1).cuda()), Variable( torch.zeros(self.batch_size, 1).cuda()) else: self.y_real_, self.y_fake_ = Variable( torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) self.D.train() print('training start!!') start_time = time.time() for epoch in range(self.epoch): self.G.train() epoch_start_time = time.time() for iter, (x_, _) in enumerate(self.data_loader): if iter == self.data_loader.dataset.__len__( ) // self.batch_size: break z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: x_, z_ = Variable(x_.cuda()), Variable(z_.cuda()) else: x_, z_ = Variable(x_), Variable(z_) # update D network self.D_optimizer.zero_grad() D_real = self.D(x_) D_real_err = torch.mean(torch.abs(D_real - x_)) G_ = self.G(z_) D_fake = self.D(G_) D_fake_err = torch.mean(torch.abs(D_fake - G_)) D_loss = D_real_err - self.k * D_fake_err self.train_hist['D_loss'].append(D_loss.data[0]) D_loss.backward() self.D_optimizer.step() # update G network self.G_optimizer.zero_grad() G_ = self.G(z_) D_fake = self.D(G_) D_fake_err = torch.mean(torch.abs(D_fake - G_)) G_loss = D_fake_err self.train_hist['G_loss'].append(G_loss.data[0]) G_loss.backward() self.G_optimizer.step() # convergence metric temp_M = D_real_err + torch.abs(self.gamma * D_real_err - D_fake_err) # operation for updating k temp_k = self.k + self.lambda_ * (self.gamma * D_real_err - D_fake_err) temp_k = temp_k.data[0] # self.k = temp_k.data[0] self.k = min(max(temp_k, 0), 1) self.M = temp_M.data[0] if ((iter + 1) % 100) == 0: print( "Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, M: %.8f, k: %.8f" % ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.data[0], G_loss.data[0], self.M, self.k)) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) self.visualize_results((epoch + 1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") self.save() utils.generate_animation( self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name, self.epoch) utils.loss_plot( self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['C_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] if self.gpu_mode: self.y_real_, self.y_fake_ = Variable( torch.ones(self.batch_size, 1).cuda()), Variable( torch.zeros(self.batch_size, 1).cuda()) else: self.y_real_, self.y_fake_ = Variable( torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) self.D.train() self.C.train() print('training start!!') start_time = time.time() for epoch in range(self.epoch): self.G.train() epoch_start_time = time.time() for iter in range(len(self.data_X) // self.batch_size): x_ = self.data_X[iter * self.batch_size:(iter + 1) * self.batch_size] z_ = torch.rand((self.batch_size, self.z_dim)) # z_ = torch.Tensor(self.batch_size, self.z_dim).normal_(0, 1) y_vec_ = self.data_Y[iter * self.batch_size:(iter + 1) * self.batch_size] if self.gpu_mode: x_, z_, y_vec_ = Variable(x_.cuda()), Variable( z_.cuda()), Variable(y_vec_.cuda()) else: x_, z_, y_vec_ = Variable(x_), Variable(z_), Variable( y_vec_) # update D network self.D_optimizer.zero_grad() self.C_optimizer.zero_grad() D_real = self.D(x_) C_real = self.C(x_) D_real_loss = self.BCE_loss(D_real, self.y_real_) C_real_loss = self.CE_loss(C_real, torch.max(y_vec_, 1)[1]) G_ = self.G(z_, y_vec_) D_fake = self.D(G_) C_fake = self.C(G_) D_fake_loss = self.BCE_loss(D_fake, self.y_fake_) C_fake_loss = self.CE_loss(C_fake, torch.max(y_vec_, 1)[1]) D_loss = D_real_loss + D_fake_loss C_loss = C_real_loss + C_fake_loss self.train_hist['D_loss'].append(D_loss.item()) self.train_hist['C_loss'].append(C_loss.item()) D_loss.backward(retain_graph=True) self.D_optimizer.step() C_loss.backward(retain_graph=True) self.C_optimizer.step() # update G network self.G_optimizer.zero_grad() G_ = self.G(z_, y_vec_) D_fake = self.D(G_) C_fake = self.C(G_) G_loss = self.BCE_loss(D_fake, self.y_real_) C_fake_loss = self.CE_loss(C_fake, torch.max(y_vec_, 1)[1]) G_loss += C_fake_loss self.train_hist['G_loss'].append(G_loss.item()) G_loss.backward(retain_graph=True) self.G_optimizer.step() if ((iter + 1) % 100) == 0: print( "Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, C_loss: %.8f" % ((epoch + 1), (iter + 1), len(self.data_X) // self.batch_size, D_loss.item(), G_loss.item(), C_loss.item())) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) self.visualize_results((epoch + 1), fix=False) print('\n[INFO]: Test the classifier:') # self.C.eval() correct = 0 nb_test = len(self.X_test) for iter in range(nb_test // self.batch_size): x_ = self.X_test[iter * self.batch_size:(iter + 1) * self.batch_size] y_vec_ = self.y_test_vec[iter * self.batch_size:(iter + 1) * self.batch_size] if self.gpu_mode: x_, y_vec_ = Variable(x_.cuda()), Variable(y_vec_.cuda()) else: x_, y_vec_ = Variable(x_), Variable(y_vec_) outputs = self.C(x_) # C_real_loss = self.CE_loss(C_real, torch.max(y_vec_, 1)[1]) # loss = self.CE_loss(outputs, torch.max(y_vec_, 1)[1]) pred = outputs.data.max(1)[ 1] # get the index of the max log-probability pred = pred.eq(torch.max(y_vec_, 1)[1].data).cpu().data.float() correct += pred.sum() print('Accuracy of the network on the test images: %.2f %%' % (100. * correct / nb_test)) print('[INFO]: Testing finish! \n') self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") self.save() utils.generate_animation( self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name, self.epoch) utils.loss_plot( self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] if self.gpu_mode: self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda()) else: self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) self.D.train() print('training start!!') start_time = time.time() for epoch in range(self.epoch): self.G.train() epoch_start_time = time.time() for iter, (x_, _) in enumerate(self.data_loader): if iter == self.data_loader.dataset.__len__() // self.batch_size: break z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: x_, z_ = Variable(x_.cuda()), Variable(z_.cuda()) else: x_, z_ = Variable(x_), Variable(z_) # update D network self.D_optimizer.zero_grad() D_real, D_real_code = self.D(x_) D_real_err = self.MSE_loss(D_real, x_) G_ = self.G(z_) D_fake, D_fake_code = self.D(G_) D_fake_err = self.MSE_loss(D_fake, G_.detach()) if list(self.margin-D_fake_err.data)[0] > 0: D_loss = D_real_err + (self.margin - D_fake_err) else: D_loss = D_real_err self.train_hist['D_loss'].append(D_loss.data[0]) D_loss.backward() self.D_optimizer.step() # update G network self.G_optimizer.zero_grad() G_ = self.G(z_) D_fake, D_fake_code = self.D(G_) D_fake_err = self.MSE_loss(D_fake, G_.detach()) G_loss = D_fake_err + self.pt_loss_weight * self.pullaway_loss(D_fake_code) self.train_hist['G_loss'].append(G_loss.data[0]) G_loss.backward() self.G_optimizer.step() if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" % ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.data[0], G_loss.data[0])) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) self.visualize_results((epoch+1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") self.save() utils.generate_animation(self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name, self.epoch) utils.loss_plot(self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] if self.gpu_mode: self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda()) else: self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) self.D.train() print('training start!!') start_time = time.time() for epoch in range(self.epoch): self.G.train() epoch_start_time = time.time() for iter, (x_, _) in enumerate(self.data_loader): if iter == self.data_loader.dataset.__len__() // self.batch_size: break z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: x_, z_ = Variable(x_.cuda()), Variable(z_.cuda()) else: x_, z_ = Variable(x_), Variable(z_) # update D network self.D_optimizer.zero_grad() D_real = self.D(x_) D_real_loss = -torch.mean(D_real) G_ = self.G(z_) D_fake = self.D(G_) D_fake_loss = torch.mean(D_fake) # gradient penalty if self.gpu_mode: alpha = torch.rand(x_.size()).cuda() else: alpha = torch.rand(x_.size()) x_hat = Variable(alpha * x_.data + (1 - alpha) * G_.data, requires_grad=True) pred_hat = self.D(x_hat) if self.gpu_mode: gradients = grad(outputs=pred_hat, inputs=x_hat, grad_outputs=torch.ones(pred_hat.size()).cuda(), create_graph=True, retain_graph=True, only_inputs=True)[0] else: gradients = grad(outputs=pred_hat, inputs=x_hat, grad_outputs=torch.ones(pred_hat.size()), create_graph=True, retain_graph=True, only_inputs=True)[0] gradient_penalty = self.lambda_ * ((gradients.view(gradients.size()[0], -1).norm(2, 1) - 1) ** 2).mean() D_loss = D_real_loss + D_fake_loss + gradient_penalty D_loss.backward() self.D_optimizer.step() if ((iter+1) % self.n_critic) == 0: # update G network self.G_optimizer.zero_grad() G_ = self.G(z_) D_fake = self.D(G_) G_loss = -torch.mean(D_fake) self.train_hist['G_loss'].append(G_loss.data[0]) G_loss.backward() self.G_optimizer.step() self.train_hist['D_loss'].append(D_loss.data[0]) if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" % ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.data[0], G_loss.data[0])) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) self.visualize_results((epoch+1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") self.save() utils.generate_animation(self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name, self.epoch) utils.loss_plot(self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
def train(self): self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist["per_epoch_time"] = [] self.train_hist['total_time'] = [] if self.gpu_mode: self.y_real_, self.y_fake_ = Variable( torch.ones(self.batch_size, 1).cuda()), Variable( torch.zeros(self.batch_size, 1).cuda()) else: self.y_real_, self.y_fake_ = Variable( torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) # self.D.tarin() print('train start!!') start_time = time.time() for epoch in range(self.epoch): # self.G.train() epoch_start_time = time.time() for iter, (x_, _) in enumerate(self.data_loader): if iter == self.data_loader.dataset.__len__( ) // self.batch_size: break z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: x_, z_ = Variable(x_.cuda()), Variable(z_.cuda()) else: x_, z_ = Variable(x_), Variable(z_) """Update D network""" self.D_optimizer.zero_grad() # train with real images y_hat_real = self.D(x_) # forward pass D_real_loss = self.BCE_loss(y_hat_real, self.y_real_) generated_images_ = self.G(z_) y_hat_fake = self.D(generated_images_) D_fake_loss = self.BCE_loss(y_hat_fake, self.y_fake_) D_loss = D_fake_loss + D_real_loss self.train_hist['D_loss'].append(D_loss.data[0]) D_loss.backward() self.D_optimizer.step() """Update generator network""" self.G_optimizer.zero_grad() generated_images_ = self.G(z_) y_hat_fake = self.D(generated_images_) G_loss = self.BCE_loss(y_hat_fake, self.y_real_) self.train_hist['G_loss'].append(G_loss.data[0]) G_loss.backward() self.G_optimizer.step() if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" % ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.data[0], G_loss.data[0])) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) self.visualize_results((epoch + 1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") self.save() utils.generate_animation( self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name, self.epoch) utils.loss_plot( self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros( self.batch_size, 1) if self.gpu_mode: self.y_real_, self.y_fake_ = self.y_real_.cuda( ), self.y_fake_.cuda() self.D.train() print('training start!!') start_time = time.time() for epoch in range(self.epoch): self.G.train() epoch_start_time = time.time() for iter, (x_, y_) in enumerate(self.data_loader): if iter == self.data_loader.dataset.__len__( ) // self.batch_size: break z_ = torch.rand((self.batch_size, self.z_dim)) y_vec_ = torch.zeros( (self.batch_size, self.class_num)).scatter_( 1, y_.type(torch.LongTensor).unsqueeze(1), 1) if self.gpu_mode: x_, z_, y_vec_ = x_.cuda(), z_.cuda(), y_vec_.cuda() # update D network self.D_optimizer.zero_grad() D_real, C_real = self.D(x_) D_real_loss = self.BCE_loss(D_real, self.y_real_) C_real_loss = self.CE_loss(C_real, torch.max(y_vec_, 1)[1]) G_ = self.G(z_, y_vec_) D_fake, C_fake = self.D(G_) D_fake_loss = self.BCE_loss(D_fake, self.y_fake_) C_fake_loss = self.CE_loss(C_fake, torch.max(y_vec_, 1)[1]) D_loss = D_real_loss + self.alpha * C_real_loss + D_fake_loss + self.alpha * C_fake_loss self.train_hist['D_loss'].append(D_loss.item()) D_loss.backward() self.D_optimizer.step() # update G network self.G_optimizer.zero_grad() z_.requires_grad = True G_ = self.G(z_, y_vec_) D_fake, C_fake = self.D(G_) G_loss = self.BCE_loss(D_fake, self.y_real_) C_fake_loss = self.CE_loss(C_fake, torch.max(y_vec_, 1)[1]) G_loss += self.alpha * C_fake_loss # penalize global Lipschitz using gradient norm gradients = grad(outputs=G_, inputs=z_, grad_outputs=torch.ones(G_.size()).cuda(), create_graph=True, retain_graph=True, only_inputs=True)[0] # gradients = torch.zeros(G_.size()).cuda() reg_loss = (gradients.view(gradients.size()[0], -1).norm(2, 1)**2).mean() G_loss += self.lambda_ * reg_loss # # penalize local Lipschitz # reg_loss = 0 # for j in range(self.n_repeat): # v = f.normalize(torch.rand(self.batch_size, self.z_dim), p=2, dim=1) # u = torch.rand(self.batch_size) + 1e-12 # avoid underflow # unif_noise = (u ** (1/float(self.z_dim))).unsqueeze(1)*v # unif_noise = unif_noise.cuda() # G_neighbor_ = self.G(z_+unif_noise, y_vec_) # dist_x = torch.sqrt(torch.sum((G_neighbor_.view(self.batch_size, -1) - G_.view(self.batch_size, -1))**2, dim=1)) # dist_z = torch.sqrt(torch.sum(unif_noise**2, dim=1)) # reg_loss += torch.mean(dist_x / dist_z) # G_loss += self.lambda_ * reg_loss / self.n_repeat self.train_hist['G_loss'].append(G_loss.item()) G_loss.backward() self.G_optimizer.step() if ((iter + 1) % 100) == 0: print( "Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, reg_loss: %.4f" % ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.item(), G_loss.item(), reg_loss.item())) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) with torch.no_grad(): self.visualize_results((epoch + 1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") self.save() utils.generate_animation( self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name, self.epoch) utils.loss_plot( self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] self.train_hist['D_norm'] = [] f = open("%s/results.txt" % self.log_dir, "w") f.write("d_loss,g_loss,d_norm\n") if self.gpu_mode: self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda()) else: self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) #for iter, ((x1_,_), (x2_,_)) in enumerate(zip(self.data_loader, self.data_loader)): # import pdb # pdb.set_trace() self.D.train() print('training start!!') start_time = time.time() for epoch in range(self.epoch): self.G.train() epoch_start_time = time.time() for iter, (x_, _) in enumerate(self.data_loader): if iter == self.data_loader.dataset.__len__() // self.batch_size: break z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: x_, z_ = Variable(x_.cuda(), requires_grad=True), \ Variable(z_.cuda()) else: x_, z_ = Variable(x_, requires_grad=True), \ Variable(z_) # update D network D_real = self.D(x_) # compute gradient penalty grad_wrt_x = grad(outputs=D_real, inputs=x_, grad_outputs=torch.ones(D_real.size()).cuda(), create_graph=True, retain_graph=True, only_inputs=True)[0] g_norm = ((grad_wrt_x.view(grad_wrt_x.size()[0], -1).norm(2, 1) - 1) ** 2).mean() self.train_hist['D_norm'].append(g_norm.data.item()) self.D_optimizer.zero_grad() G_ = self.G(z_).detach() alpha = float(np.random.random()) Xz = Variable(alpha*x_.data + (1.-alpha)*G_.data) D_Xz = self.D(Xz) D_loss = self.BCE_loss(D_Xz, alpha*self.y_real_) self.train_hist['D_loss'].append(D_loss.data.item()) D_loss.backward() self.D_optimizer.step() # update G network self.G_optimizer.zero_grad() G_ = self.G(z_) D_fake = self.D(G_) G_loss = self.BCE_loss(D_fake, self.y_real_) self.train_hist['G_loss'].append(G_loss.data.item()) G_loss.backward() self.G_optimizer.step() if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, D_norm: %.8f" % ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.data.item(), G_loss.data.item(), g_norm.data.item())) f.write("%.8f,%.8f,%.8f\n" % (D_loss.data.item(), G_loss.data.item(), g_norm.data.item())) f.flush() self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) self.visualize_results((epoch+1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") f.close() self.save() utils.generate_animation(self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name, self.epoch) utils.loss_plot(self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] if self.gpu_mode: self.y_real_, self.y_fake_ = Variable( torch.ones(self.batch_size, 1).cuda()), Variable( torch.zeros(self.batch_size, 1).cuda()) else: self.y_real_, self.y_fake_ = Variable( torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) self.D.train() print('training start!!') start_time = time.time() for epoch in range(self.epoch): epoch_start_time = time.time() self.G.train() for iter, (x_, _) in enumerate(self.data_loader): if iter == self.data_loader.dataset.__len__( ) // self.batch_size: break z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: x_, z_ = Variable(x_.cuda()), Variable(z_.cuda()) else: x_, z_ = Variable(x_), Variable(z_) # update D network self.D_optimizer.zero_grad() D_real = self.D(x_) D_real_loss = self.BCE_loss(D_real, self.y_real_) G_ = self.G(z_) D_fake = self.D(G_) D_fake_loss = self.BCE_loss(D_fake, self.y_fake_) """ DRAGAN Loss (Gradient penalty) """ # This is borrowed from https://github.com/jfsantos/dragan-pytorch/blob/master/dragan.py if self.gpu_mode: alpha = torch.rand(x_.size()).cuda() x_hat = Variable( alpha * x_.data + (1 - alpha) * (x_.data + 0.5 * x_.data.std() * torch.rand(x_.size()).cuda()), requires_grad=True) else: alpha = torch.rand(x_.size()) x_hat = Variable( alpha * x_.data + (1 - alpha) * (x_.data + 0.5 * x_.data.std() * torch.rand(x_.size())), requires_grad=True) pred_hat = self.D(x_hat) if self.gpu_mode: gradients = grad(outputs=pred_hat, inputs=x_hat, grad_outputs=torch.ones( pred_hat.size()).cuda(), create_graph=True, retain_graph=True, only_inputs=True)[0] else: gradients = grad(outputs=pred_hat, inputs=x_hat, grad_outputs=torch.ones(pred_hat.size()), create_graph=True, retain_graph=True, only_inputs=True)[0] gradient_penalty = self.lambda_ * ( (gradients.view(gradients.size()[0], -1).norm(2, 1) - 1)** 2).mean() D_loss = D_real_loss + D_fake_loss + gradient_penalty self.train_hist['D_loss'].append(D_loss.data[0]) D_loss.backward() self.D_optimizer.step() # update G network self.G_optimizer.zero_grad() G_ = self.G(z_) D_fake = self.D(G_) G_loss = self.BCE_loss(D_fake, self.y_real_) self.train_hist['G_loss'].append(G_loss.data[0]) G_loss.backward() self.G_optimizer.step() if ((iter + 1) % 10) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" % ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.data[0], G_loss.data[0])) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) self.visualize_results((epoch + 1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") self.save() utils.generate_animation( self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name, self.epoch) utils.loss_plot( self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] if self.gpu_mode: self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda()) else: self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) self.D.train() print('training start!!') start_time = time.time() for epoch in range(self.epoch): self.G.train() epoch_start_time = time.time() for iter, (x_, _) in enumerate(self.data_loader): if iter == self.data_loader.dataset.__len__() // self.batch_size: break z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: x_, z_ = Variable(x_.cuda()), Variable(z_.cuda()) else: x_, z_ = Variable(x_), Variable(z_) # update D network self.D_optimizer.zero_grad() D_real = self.D(x_) D_real_loss = -torch.mean(D_real) G_ = self.G(z_) D_fake = self.D(G_) D_fake_loss = torch.mean(D_fake) D_loss = D_real_loss + D_fake_loss D_loss.backward() self.D_optimizer.step() # clipping D for p in self.D.parameters(): p.data.clamp_(-self.c, self.c) if ((iter+1) % self.n_critic) == 0: # update G network self.G_optimizer.zero_grad() G_ = self.G(z_) D_fake = self.D(G_) G_loss = -torch.mean(D_fake) self.train_hist['G_loss'].append(G_loss.data[0]) G_loss.backward() self.G_optimizer.step() self.train_hist['D_loss'].append(D_loss.data[0]) if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" % ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.data[0], G_loss.data[0])) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) self.visualize_results((epoch+1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") self.save() utils.generate_animation(self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name, self.epoch) utils.loss_plot(self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['E_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] if torch.cuda.is_available(): self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable( torch.zeros(self.batch_size, 1).cuda()) else: self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable( torch.zeros(self.batch_size, 1)) # self.D.train() print('training start!!') start_time = time.time() self.load(149) for epoch in range(150, self.epoch): self.G_optimizer.param_groups[0]['lr'] = self.args.lrG / np.sqrt(epoch + 1) self.D_optimizer.param_groups[0]['lr'] = self.args.lrD / np.sqrt(epoch + 1) # reset training mode of G and E epoch_start_time = time.time() D_err = [] G_err = [] # learning rate decay # if (epoch+1) % 20 == 0: # self.G_optimizer.param_groups[0]['lr'] /= 2 # self.D_optimizer.param_groups[0]['lr'] /= 2 # self.E_optimizer.param_groups[0]['lr'] /= 2 # print("learning rate change!") # self.G_optimizer.param_groups[0]['lr'] /= np.sqrt(epoch+1) # self.D_optimizer.param_groups[0]['lr'] /= np.sqrt(epoch+1) # self.E_optimizer.param_groups[0]['lr'] /= np.sqrt(epoch+1) # print("learning rate change!") for iter, (X, _) in enumerate(self.data_loader): X = utils.to_var(X) """Discriminator""" z = utils.to_var(torch.randn(self.batch_size, self.z_dim)) X_hat = self.G(z) D_real = self.D(self.FC(X)) D_fake = self.D(self.FC(X_hat)) D_loss = self.BCE_loss(D_real, self.y_real_) + self.BCE_loss(D_fake, self.y_fake_) self.train_hist['D_loss'].append(D_loss.data[0]) D_err.append(D_loss.data[0]) # Optimize D_loss.backward() self.D_optimizer.step() self.__reset_grad() """Generator""" # Use both Discriminator and Encoder to update Generator z = utils.to_var(torch.randn(self.batch_size, self.z_dim)) X_hat = self.G(z) D_fake = self.D(self.FC(X_hat)) # E_loss = torch.mean( # torch.mean(0.5 * (z - z_mu) ** 2 * torch.exp(-z_sigma) + # 0.5 * z_sigma + 0.919, 1)) G_loss = self.BCE_loss(D_fake, self.y_real_) total_loss = G_loss self.train_hist['G_loss'].append(G_loss.data[0]) G_err.append(G_loss.data[0]) #E_err.append(E_loss.data[0]) # Optimize total_loss.backward() self.G_optimizer.step() #self.E_optimizer.step() self.__reset_grad() """ Plot """ if (iter + 1) == self.data_loader.dataset.__len__() // self.batch_size: # Print and plot every epoch print('Epoch-{}; D_loss: {:.4}; G_loss: {:.4}\n' .format(epoch, np.mean(D_err), np.mean(G_err))) for iter, (X, _) in enumerate(self.valid_loader): X = utils.to_var(X) self.visualize_results(X, epoch + 1) break break self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) # Save model if (epoch + 1) % 5 == 0: self.save(epoch) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") # self.save(epoch) # Generate animation of reconstructed plot utils.generate_animation( self.root + '/' + self.result_dir + '/' + self.dataset + '/' + self.model_name + '/reconstructed', self.epoch) utils.loss_plot(self.train_hist, os.path.join(self.root, self.save_dir, self.dataset, self.model_name), self.model_name)
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] self.D.train() self.E.train() print('training start!!') start_time = time.time() first_time=True self.accuracy_hist=[] for epoch in range(self.epoch): self.G.train() # check here!! epoch_start_time = time.time() decay=0.98**epoch self.E_optimizer = optim.Adam(self.E.parameters(), lr=decay * 0.3 * self.args.lrD, betas=(self.args.beta1, self.args.beta2)) self.G_optimizer = optim.Adam(self.G.parameters(), lr=decay * 3 * self.args.lrG, betas=(self.args.beta1, self.args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=decay * self.args.lrD, betas=(self.args.beta1, self.args.beta2)) for M_epoch in range(5): for iter, (batch_x, batch_y) in enumerate(self.train_loader): x_=batch_x z_=torch.rand((self.batch_size, self.z_dim)) x_, z_ = Variable(x_.cuda()), Variable(z_.cuda()) G_batch_size = batch_x.size()[0] if G_batch_size < self.batch_size: break # x_ (batch, 1L, 28L, 28L) # z_ (batch, 62L) # update D network: image_real = Variable(batch_x.cuda()) self.E.eval() y_real = self.E(image_real) y_real = nn.Softmax()(y_real) y_real = (y_real).data.cpu().numpy() # self.D_optimizer.zero_grad() D_real = self.D(x_) if first_time: y_real = (1 / float(self.class_num)) * np.ones((G_batch_size, self.class_num)) # first_time y_real = np.concatenate((y_real, 2*np.ones((np.shape(y_real)[0], 1))), axis=1) ones=np.ones((np.shape(y_real)[0],np.shape(y_real)[1])) ones[:,-1]=0 ones=torch.FloatTensor(ones) ones=Variable(ones).cuda() y_real=torch.FloatTensor(y_real).cuda() D_real_loss = torch.nn.BCEWithLogitsLoss(weight=y_real)(D_real,ones) G_input, conditional_label = self.gen_cond_label(self.batch_size) G_ = self.G(G_input, 0) D_fake = self.D(G_) y_fake_1 = np.tile(np.zeros((self.class_num)), (self.batch_size, 1)) y_fake_2 = np.tile(np.ones((1)), (self.batch_size, 1)) y_fake = np.concatenate((y_fake_1,y_fake_2),axis=1) y_fake = Variable(torch.FloatTensor(y_fake).cuda()) D_fake_loss=torch.nn.BCEWithLogitsLoss()(D_fake,y_fake) D_loss = D_real_loss + D_fake_loss self.train_hist['D_loss'].append(D_loss.data[0]) D_loss.backward() self.D_optimizer.step() # update G network: self.G_optimizer.zero_grad() G_input, conditional_label = self.gen_cond_label(self.batch_size) G_ = self.G(G_input, 0) D_fake = self.D(G_) G_y_real=np.concatenate((conditional_label.numpy(),np.tile([0],(self.batch_size,1))),axis=1) G_y_real=Variable(torch.FloatTensor(G_y_real)).cuda() G_loss=torch.nn.BCEWithLogitsLoss()(D_fake,G_y_real) self.train_hist['G_loss'].append(G_loss.data[0]) G_loss.backward() self.G_optimizer.step() if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" % ((epoch + 1), (iter + 1), len(self.data_X) // self.batch_size, D_loss.data[0], G_loss.data[0])) self.E_training(200) first_time = False self.visualize_results((epoch+1)) self.compute_accuracy() self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) self.save() self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") self.save() utils.generate_animation(self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name, self.epoch) utils.loss_plot(self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
def train(self): vis = visdom.Visdom() self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] self.M = {} self.M['pre'] = [] self.M['pre'].append(1) self.M['cur'] = [] self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros(self.batch_size, 1) if self.gpu_mode: self.y_real_, self.y_fake_ = self.y_real_.cuda(), self.y_fake_.cuda() self.D.train() print('training start!!') start_time = time.time() for epoch in range(self.epoch): self.G.train() epoch_start_time = time.time() for iter, (x_, _) in enumerate(self.data_loader): if iter == self.data_loader.dataset.__len__() // self.batch_size: break z_ = torch.Tensor(self.batch_size, self.z_dim).uniform_(-1,1) if self.gpu_mode: x_, z_ = x_.cuda(), z_.cuda() # update D network self.D_optimizer.zero_grad() D_real = self.D(x_) D_real_loss = torch.mean(torch.abs(D_real - x_)) G_ = self.G(z_) D_fake = self.D(G_) D_fake_loss = torch.mean(torch.abs(D_fake - G_)) D_loss = D_real_loss - self.k * D_fake_loss self.train_hist['D_loss'].append(D_loss.item()) D_loss.backward() self.D_optimizer.step() # update G network self.G_optimizer.zero_grad() G_ = self.G(z_) D_fake = self.D(G_) D_fake_loss = torch.mean(torch.abs(D_fake - G_)) G_loss = D_fake_loss self.train_hist['G_loss'].append(G_loss.item()) G_loss.backward() self.G_optimizer.step() # convergence metric temp_M = D_real_loss + torch.abs(self.gamma * D_real_loss - G_loss) # operation for updating k temp_k = self.k + self.lambda_ * (self.gamma * D_real_loss - G_loss) temp_k = temp_k.item() self.k = min(max(temp_k, 0), 1) self.M['cur'] = temp_M.item() if (iter + 1) %30 ==0: generated = G_.cpu().data.numpy() / 2 + 0.5 batch_image = x_.cpu().data.numpy() / 2 + 0.5 print('min image ',generated.min()) print('max image ',generated.max()) vis.images(generated, nrow=8, win='generated') vis.images(batch_image, nrow=8, win='original') print('convergence metric ',self.M['cur']) if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, M: %.8f, k: %.8f" % ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.item(), G_loss.item(), self.M['cur'], self.k)) if np.mean(self.M['pre']) < np.mean(self.M['cur']): pre_lr = self.G_optimizer.param_groups[0]['lr'] self.G_optimizer.param_groups[0]['lr'] = max(self.G_optimizer.param_groups[0]['lr'] / 2.0, self.lr_lower_boundary) self.D_optimizer.param_groups[0]['lr'] = max(self.D_optimizer.param_groups[0]['lr'] / 2.0, self.lr_lower_boundary) print('M_pre: ' + str(np.mean(self.M['pre'])) + ', M_cur: ' + str( np.mean(self.M['cur'])) + ', lr: ' + str(pre_lr) + ' --> ' + str( self.G_optimizer.param_groups[0]['lr'])) else: print('M_pre: ' + str(np.mean(self.M['pre'])) + ', M_cur: ' + str(np.mean(self.M['cur']))) self.M['pre'] = self.M['cur'] self.M['cur'] = [] self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) with torch.no_grad(): self.visualize_results((epoch+1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") self.save() utils.generate_animation(self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name, self.epoch) utils.loss_plot(self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros( self.batch_size, 1) if self.gpu_mode: self.y_real_, self.y_fake_ = self.y_real_.cuda( ), self.y_fake_.cuda() self.D.train() print('training start!!') start_time = time.time() Q_stack = [] for epoch in range(self.epoch): self.G.train() epoch_start_time = time.time() for iter, (x_, _) in enumerate(self.data_loader): if iter == self.data_loader.dataset.__len__( ) // self.batch_size: break z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: x_, z_ = x_.cuda(), z_.cuda() # update D network self.D_optimizer.zero_grad() D_real = self.D(x_) D_real_loss = self.BCE_loss(D_real, self.y_real_) G_ = self.G(z_) D_fake = self.D(G_) D_fake_loss = self.BCE_loss(D_fake, self.y_fake_) D_loss = D_real_loss + D_fake_loss self.train_hist['D_loss'].append(D_loss.item()) D_loss.backward() self.D_optimizer.step() # update G network self.G_optimizer.zero_grad() G_ = self.G(z_) D_fake = self.D(G_) ######## following codes are practical implementation for the paper ########## pertubation_del = (torch.randn(self.batch_size, self.z_dim)).cuda() eps = self.eps pertu_length = torch.norm(pertubation_del, dim=1, keepdim=True) pertubation_del = (pertubation_del / pertu_length) * eps z_prime = z_ + pertubation_del pertube_images = self.G(z_) - self.G(z_prime) pertube_latent_var = z_ - z_prime Q = torch.norm(pertube_images.view( self.batch_size, -1), dim=1) / torch.norm( pertube_latent_var.view(self.batch_size, -1), dim=1) print(Q) L_max = 0.0 L_min = 0.0 count_max = 0 count_min = 0 for i in range(self.batch_size): if Q[i] > self.eig_max: L_max += (Q[i] - self.eig_max)**2 count_max += 1 if Q[i] < self.eig_min: L_min += (Q[i] - self.eig_min)**2 count_min += 1 L = L_max + L_min #################### end of implementation for the paper #################### G_loss = self.BCE_loss(D_fake, self.y_real_) self.train_hist['G_loss'].append(G_loss.item()) G_loss.backward() self.G_optimizer.step() if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" % ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.item(), G_loss.item())) print(L) print(count_max) print(count_min) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) with torch.no_grad(): self.gen_mode(4, epoch) self.visualize_results((epoch + 1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") self.save() utils.generate_animation( self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name, self.epoch) utils.loss_plot( self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
def train_all_classes(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] self.data_loader_train = DataLoader(self.dataset_train, batch_size=self.batch_size) self.data_loader_valid = DataLoader(self.dataset_valid, batch_size=self.batch_size) print('training start!') start_time = time.time() for epoch in range(self.epoch): self.G.train() D_losses = [] G_losses = [] epoch_start_time = time.time() #for tours in range(int(50000/self.num_examples)): #we want to see always as much images for x_, y_ in self.data_loader_train: # train discriminator D self.D.zero_grad() batch_size = x_.size()[0] y_real_ = torch.ones(batch_size) y_fake_ = torch.zeros(batch_size) y_label_ = torch.zeros(batch_size, 10) y_label_.scatter_(1, y_.view(batch_size, 1), 1) x_ = x_.view(-1, 1 , 28, 28) x_, y_label_, y_real_, y_fake_ = Variable(x_.cuda()), Variable(y_label_.cuda()), Variable(y_real_.cuda()), Variable(y_fake_.cuda()) D_result, c = self.D(x_, y_label_) D_real_loss = self.BCELoss(D_result, y_real_) z_ = torch.rand((batch_size, self.z_dim, 1, 1)) y_ = (torch.rand(batch_size, 1) * 10).type(torch.LongTensor) z_ = Variable(z_.cuda()) G_result = self.G(z_, y_label_) D_result, c = self.D(G_result, y_label_) D_fake_loss = self.BCELoss(D_result, y_fake_) D_fake_score = D_result.data.mean() D_train_loss = D_real_loss + D_fake_loss D_train_loss.backward() self.D_optimizer.step() D_losses.append(D_train_loss.data[0]) # train generator G self.G.zero_grad() z_ = torch.rand((batch_size, self.z_dim, 1, 1)) y_ = (torch.rand(batch_size, 1) * 10).type(torch.LongTensor) z_ = Variable(z_.cuda()) G_result = self.G(z_, y_label_) D_result,c = self.D(G_result, y_label_) G_train_loss = self.BCELoss(D_result, y_real_) G_train_loss.backward() self.G_optimizer.step() G_losses.append(G_train_loss.data[0]) epoch_end_time = time.time() per_epoch_ptime = epoch_end_time - epoch_start_time print('[%d/%d] - ptime: %.2f, loss_d: %.3f, loss_g: %.3f' % ((epoch + 1), self.epoch, per_epoch_ptime, torch.mean(torch.FloatTensor(D_losses)), torch.mean(torch.FloatTensor(G_losses)))) self.train_hist['D_loss'].append(torch.mean(torch.FloatTensor(D_losses))) self.train_hist['G_loss'].append(torch.mean(torch.FloatTensor(G_losses))) self.train_hist['per_epoch_time'].append(per_epoch_ptime) self.save() self.visualize_results((epoch + 1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") utils.generate_animation(self.result_dir + '/' + self.model_name, self.epoch) utils.loss_plot(self.train_hist, self.save_dir, self.model_name)
def train(self): self.G.apply(self.G.weights_init) print(' training start!! (no conditional)') start_time = time.time() for classe in range(10): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] # self.G.apply(self.G.weights_init) does not work for instance del self.E self.E = Encoder(self.z_dim, self.dataset, self.conditional) self.E_optimizer = optim.Adam( self.E.parameters(), lr=self.lr) #, lr=args.lrD, betas=(args.beta1, args.beta2)) if self.gpu_mode: self.E.cuda(self.device) best = 100000 self.data_loader_train = get_iter_dataset(self.dataset_train, self.list_class_train, self.batch_size, classe) self.data_loader_valid = get_iter_dataset(self.dataset_valid, self.list_class_valid, self.batch_size, classe) early_stop = 0. for epoch in range(self.epoch): epoch_start_time = time.time() # print("number of batch data") # print(len(self.data_loader_train)) self.E.train() self.G.train() sum_loss_train = 0. n_batch = 0. #for iter in range(self.size_epoch): for iter, (x_, t_) in enumerate(self.data_loader_train): n_batch += 1 #x_ = sort_utils.get_batch(list_classes, classe, self.batch_size) #x_ = torch.FloatTensor(x_) x_ = Variable(x_) if self.gpu_mode: x_ = x_.cuda(self.device) # VAE z_, mu, logvar = self.E(x_) recon_batch = self.G(z_) # train self.G_optimizer.zero_grad() self.E_optimizer.zero_grad() g_loss = self.loss_function(recon_batch, x_, mu, logvar) g_loss.backward() #retain_variables=True) sum_loss_train += g_loss.data[0] self.G_optimizer.step() self.E_optimizer.step() self.train_hist['D_loss'].append(g_loss.data[0]) self.train_hist['G_loss'].append(g_loss.data[0]) if ((iter + 1) % 100) == 0: print( "classe : [%1d] Epoch: [%2d] [%4d/%4d] G_loss: %.8f, E_loss: %.8f" % (classe, (epoch + 1), (iter + 1), self.size_epoch, g_loss.data[0], g_loss.data[0])) sum_loss_train = sum_loss_train / np.float(n_batch) sum_loss_valid = 0. n_batch = 0. n_batch = 1. self.E.eval() self.G.eval() for iter, (x_, t_) in enumerate(self.data_loader_valid): n_batch += 1 max_val, max_indice = torch.max(t_, 0) mask_idx = torch.nonzero(t_ == classe) if mask_idx.dim() == 0: continue x_ = torch.index_select(x_, 0, mask_idx[:, 0]) t_ = torch.index_select(t_, 0, mask_idx[:, 0]) if self.gpu_mode: x_ = Variable(x_.cuda(self.device), volatile=True) else: x_ = Variable(x_) # VAE z_, mu, logvar = self.E(x_) recon_batch = self.G(z_) G_loss = self.loss_function(recon_batch, x_, mu, logvar) sum_loss_valid += G_loss.data[0] sum_loss_valid = sum_loss_valid / np.float(n_batch) print( "classe : [%1d] Epoch: [%2d] Train_loss: %.8f, Valid_loss: %.8f" % (classe, (epoch + 1), sum_loss_train, sum_loss_valid)) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) self.visualize_results((epoch + 1), classe) if sum_loss_valid < best: best = sum_loss_valid self.save_G(classe) early_stop = 0. # We dit early stopping of the valid performance doesn't # improve anymore after 50 epochs if early_stop == 150: break else: early_stop += 1 result_dir = self.result_dir + '/' + 'classe-' + str(classe) utils.generate_animation(result_dir + '/' + self.model_name, epoch + 1) utils.loss_plot(self.train_hist, result_dir, self.model_name) np.savetxt( os.path.join(result_dir, 'vae_training_' + self.dataset + '.txt'), np.transpose([self.train_hist['G_loss']])) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results")
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['info_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] if self.gpu_mode: self.y_real_, self.y_fake_ = Variable( torch.ones(self.batch_size, 1).cuda()), Variable( torch.zeros(self.batch_size, 1).cuda()) else: self.y_real_, self.y_fake_ = Variable( torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) self.D.train() print('training start!!') start_time = time.time() for epoch in range(self.epoch): self.G.train() epoch_start_time = time.time() for iter in range(len(self.data_X) // self.batch_size): x_ = self.data_X[iter * self.batch_size:(iter + 1) * self.batch_size] z_ = torch.rand((self.batch_size, self.z_dim)) if self.SUPERVISED == True: y_disc_ = self.data_Y[iter * self.batch_size:(iter + 1) * self.batch_size] else: y_disc_ = torch.from_numpy( np.random.multinomial( 1, self.len_discrete_code * [float(1.0 / self.len_discrete_code)], size=[self.batch_size])).type(torch.FloatTensor) y_cont_ = torch.from_numpy( np.random.uniform(-1, 1, size=(self.batch_size, 2))).type(torch.FloatTensor) if self.gpu_mode: x_, z_, y_disc_, y_cont_ = Variable(x_.cuda()), Variable(z_.cuda()), \ Variable(y_disc_.cuda()), Variable(y_cont_.cuda()) else: x_, z_, y_disc_, y_cont_ = Variable(x_), Variable( z_), Variable(y_disc_), Variable(y_cont_) # update D network self.D_optimizer.zero_grad() D_real, _, _ = self.D(x_) D_real_loss = self.BCE_loss(D_real, self.y_real_) G_ = self.G(z_, y_cont_, y_disc_) D_fake, _, _ = self.D(G_) D_fake_loss = self.BCE_loss(D_fake, self.y_fake_) D_loss = D_real_loss + D_fake_loss self.train_hist['D_loss'].append(D_loss.data[0]) D_loss.backward(retain_graph=True) self.D_optimizer.step() # update G network self.G_optimizer.zero_grad() G_ = self.G(z_, y_cont_, y_disc_) D_fake, D_cont, D_disc = self.D(G_) G_loss = self.BCE_loss(D_fake, self.y_real_) self.train_hist['G_loss'].append(G_loss.data[0]) G_loss.backward(retain_graph=True) self.G_optimizer.step() # information loss disc_loss = self.CE_loss(D_disc, torch.max(y_disc_, 1)[1]) cont_loss = self.MSE_loss(D_cont, y_cont_) info_loss = disc_loss + cont_loss self.train_hist['info_loss'].append(info_loss.data[0]) info_loss.backward() self.info_optimizer.step() if ((iter + 1) % 100) == 0: print( "Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, info_loss: %.8f" % ((epoch + 1), (iter + 1), len(self.data_X) // self.batch_size, D_loss.data[0], G_loss.data[0], info_loss.data[0])) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) self.visualize_results((epoch + 1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") self.save() utils.generate_animation( self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name, self.epoch) utils.generate_animation( self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name + '_cont', self.epoch) self.loss_plot( self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
def train_all_classes(self): self.G.apply(self.G.weights_init) self.train_hist = {} self.train_hist['Train_loss'] = [] self.train_hist['Valid_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] self.size_epoch = 1 if self.gpu_mode: self.y_real_, self.y_fake_ = Variable( torch.ones(self.batch_size, 1).cuda(self.device)), Variable( torch.zeros(self.batch_size, 1).cuda(self.device)) else: self.y_real_, self.y_fake_ = Variable( torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) self.E.train() print('training start!!') start_time = time.time() best = 1000000 self.data_loader_train = DataLoader(self.dataset_train, batch_size=self.batch_size) self.data_loader_valid = DataLoader(self.dataset_valid, batch_size=self.batch_size) early_stop = 0 for epoch in range(self.epoch): self.E.train() self.G.train() epoch_start_time = time.time() sum_loss_train = 0. n_batch = 0. for iter, (x_, t_) in enumerate(self.data_loader_train): y_onehot = torch.FloatTensor(t_.shape[0], 10) y_onehot.zero_() y_onehot.scatter_(1, t_[:, np.newaxis], 1.0) if self.gpu_mode: x_ = Variable(x_.cuda(self.device)) if self.conditional: y_onehot = Variable(y_onehot.cuda(self.device)) else: x_ = Variable(x_) self.E_optimizer.zero_grad() self.G_optimizer.zero_grad() # VAE z_, mu, logvar = self.E(x_, y_onehot) recon_batch = self.G(z_, y_onehot) G_loss = self.loss_function(recon_batch, x_, mu, logvar) sum_loss_train += G_loss.data[0] G_loss.backward() #retain_variables=True) self.E_optimizer.step() self.G_optimizer.step() n_batch += 1 self.train_hist['Train_loss'].append(G_loss.data[0]) sum_loss_train = sum_loss_train / np.float(n_batch) sum_loss_valid = 0. n_batch = 0. self.E.eval() self.G.eval() for iter, (x_, t_) in enumerate(self.data_loader_valid): n_batch += 1 y_onehot = torch.FloatTensor(t_.shape[0], 10) y_onehot.zero_() y_onehot.scatter_(1, t_[:, np.newaxis], 1.0) if self.gpu_mode: x_ = Variable(x_.cuda(self.device)) if self.conditional: y_onehot = Variable(y_onehot.cuda(self.device)) else: x_ = Variable(x_) # VAE z_, mu, logvar = self.E(x_, y_onehot) recon_batch = self.G(z_, y_onehot) G_loss = self.loss_function(recon_batch, x_, mu, logvar) sum_loss_valid += G_loss.data[0] self.train_hist['Valid_loss'].append(G_loss.data[0]) sum_loss_valid = sum_loss_valid / np.float(n_batch) print("Epoch: [%2d] Train_loss: %.8f, Valid_loss: %.8f" % ((epoch + 1), sum_loss_train, sum_loss_valid)) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) self.visualize_results((epoch + 1)) if sum_loss_valid < best: best = sum_loss_valid self.save() early_stop = 0. # We dit early stopping of the valid performance doesn't # improve anymore after 50 epochs if early_stop == 150: #break print("I should stop") else: early_stop += 1 self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") utils.generate_animation(self.result_dir + '/' + self.model_name, self.epoch) # utils.loss_plot(self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name, # 'num_examples_' + str(self.num_examples)), self.model_name) np.savetxt( os.path.join(self.result_dir + '/cvae_training_' + self.dataset + '.txt'), np.transpose([self.train_hist['Train_loss']]))
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] if self.gpu_mode: self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda()) else: self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) self.D.train() print('training start!!') start_time = time.time() for epoch in range(self.epoch): self.G.train() epoch_start_time = time.time() for iter, (x_, _) in enumerate(self.data_loader): if iter == self.data_loader.dataset.__len__() // self.batch_size: break z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: x_, z_ = Variable(x_.cuda()), Variable(z_.cuda()) else: x_, z_ = Variable(x_), Variable(z_) # update D network self.D_optimizer.zero_grad() D_real = self.D(x_) D_real_loss = self.BCE_loss(D_real, self.y_real_) G_ = self.G(z_) D_fake = self.D(G_) D_fake_loss = self.BCE_loss(D_fake, self.y_fake_) D_loss = D_real_loss + D_fake_loss self.train_hist['D_loss'].append(D_loss.data[0]) D_loss.backward() self.D_optimizer.step() # update G network self.G_optimizer.zero_grad() G_ = self.G(z_) D_fake = self.D(G_) G_loss = self.BCE_loss(D_fake, self.y_real_) self.train_hist['G_loss'].append(G_loss.data[0]) G_loss.backward() self.G_optimizer.step() if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" % ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.data[0], G_loss.data[0])) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) self.visualize_results((epoch+1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") self.save() utils.generate_animation(self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name, self.epoch) utils.loss_plot(self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['E_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] if torch.cuda.is_available(): self.y_real_, self.y_fake_ = Variable( torch.ones(self.batch_size, 1).cuda()), Variable( torch.zeros(self.batch_size, 1).cuda()) else: self.y_real_, self.y_fake_ = Variable( torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) self.D.train() print('training start!!') start_time = time.time() for epoch in range(self.epoch): # reset training mode of G and E self.G.train() self.E.train() epoch_start_time = time.time() for iter, (X, _) in enumerate(self.data_loader): X = utils.to_var(X) """Discriminator""" z = utils.to_var( torch.randn((self.batch_size, self.z_dim)).view(-1, self.z_dim, 1, 1)) X_hat = self.G(z) D_real = self.D(X).squeeze().view(-1, 1) D_fake = self.D(X_hat).squeeze().view(-1, 1) D_loss = self.BCE_loss(D_real, self.y_real_) + self.BCE_loss( D_fake, self.y_fake_) self.train_hist['D_loss'].append(D_loss.data[0]) # Optimize D_loss.backward() self.D_optimizer.step() self.__reset_grad() """Encoder""" z = utils.to_var( torch.randn((self.batch_size, self.z_dim)).view(-1, self.z_dim, 1, 1)) X_hat = self.G(z) z_mu, z_sigma = self.E(X_hat) z_mu, z_sigma = z_mu.squeeze(), z_sigma.squeeze() # - loglikehood E_loss = torch.mean( torch.mean( 0.5 * (z - z_mu)**2 * torch.exp(-z_sigma) + 0.5 * z_sigma + 0.5 * np.log(2 * np.pi), 1)) self.train_hist['E_loss'].append(E_loss.data[0]) # Optimize E_loss.backward() self.E_optimizer.step() self.__reset_grad() """Generator""" # Use both Discriminator and Encoder to update Generator z = utils.to_var( torch.randn((self.batch_size, self.z_dim)).view(-1, self.z_dim, 1, 1)) X_hat = self.G(z) D_fake = self.D(X_hat).squeeze().view(-1, 1) z_mu, z_sigma = self.E(X_hat) z_mu, z_sigma = z_mu.squeeze(), z_sigma.squeeze() mode_loss = torch.mean( torch.mean( 0.5 * (z - z_mu)**2 * torch.exp(-z_sigma) + 0.5 * z_sigma + 0.5 * np.log(2 * np.pi), 1)) G_loss = self.BCE_loss(D_fake, self.y_real_) total_loss = G_loss + mode_loss self.train_hist['G_loss'].append(G_loss.data[0]) # Optimize total_loss.backward() self.G_optimizer.step() self.__reset_grad() """ Plot """ if (iter + 1 ) == self.data_loader.dataset.__len__() // self.batch_size: # Print and plot every epoch print( 'Epoch-{}; D_loss: {:.4}; G_loss: {:.4}; E_loss: {:.4}\n' .format(epoch, D_loss.data[0], G_loss.data[0], E_loss.data[0])) for iter, (X, _) in enumerate(self.valid_loader): X = utils.to_var(X) self.visualize_results(X, epoch + 1) break break self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) # Save model every 5 epochs if epoch % 5 == 0: self.save() self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save final training results") self.save() # Generate animation of reconstructed plot utils.generate_animation( self.root + '/' + self.result_dir + '/' + self.dataset + '/' + self.model_name + '/reconstructed', self.epoch) utils.loss_plot( self.train_hist, os.path.join(self.root, self.save_dir, self.dataset, self.model_name), self.model_name)
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros( self.batch_size, 1) if self.gpu_mode: self.y_real_, self.y_fake_ = self.y_real_.cuda( ), self.y_fake_.cuda() self.D.train() print('training start!!') print("all_iter:", self.data_loader.dataset.__len__() // self.batch_size) start_time = time.time() for epoch in range(self.epoch): self.G.train() epoch_start_time = time.time() for iter, (x_, _) in enumerate(self.data_loader): if iter == self.data_loader.dataset.__len__( ) // self.batch_size: break z_ = torch.rand( (self.batch_size, self.z_dim)) #随机生成原料size,(batch size,dim)=(64,62) if self.gpu_mode: x_, z_ = x_.cuda(), z_.cuda() #x_是真实的列子 #print('x_:',x_.shape) # update D network self.D_optimizer.zero_grad() D_real = self.D(x_) D_real_loss = self.BCE_loss(D_real, self.y_real_) G_ = self.G(z_) D_fake = self.D(G_) D_fake_loss = self.BCE_loss(D_fake, self.y_fake_) D_loss = D_real_loss + D_fake_loss self.train_hist['D_loss'].append(D_loss.item()) D_loss.backward() self.D_optimizer.step() # update G network self.G_optimizer.zero_grad() G_ = self.G(z_) D_fake = self.D(G_) G_loss = self.BCE_loss(D_fake, self.y_real_) self.train_hist['G_loss'].append(G_loss.item()) G_loss.backward() self.G_optimizer.step() if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" % ((epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.item(), G_loss.item())) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) with torch.no_grad(): self.visualize_results((epoch + 1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") self.save() utils.generate_animation( self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name, self.epoch) utils.loss_plot( self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['info_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] if self.gpu_mode: self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda()) else: self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) self.D.train() print('training start!!') start_time = time.time() for epoch in range(self.epoch): self.G.train() epoch_start_time = time.time() for iter in range(len(self.data_X) // self.batch_size): x_ = self.data_X[iter*self.batch_size:(iter+1)*self.batch_size] z_ = torch.rand((self.batch_size, self.z_dim)) if self.SUPERVISED == True: y_disc_ = self.data_Y[iter*self.batch_size:(iter+1)*self.batch_size] else: y_disc_ = torch.from_numpy( np.random.multinomial(1, self.len_discrete_code * [float(1.0 / self.len_discrete_code)], size=[self.batch_size])).type(torch.FloatTensor) y_cont_ = torch.from_numpy(np.random.uniform(-1, 1, size=(self.batch_size, 2))).type(torch.FloatTensor) if self.gpu_mode: x_, z_, y_disc_, y_cont_ = Variable(x_.cuda()), Variable(z_.cuda()), \ Variable(y_disc_.cuda()), Variable(y_cont_.cuda()) else: x_, z_, y_disc_, y_cont_ = Variable(x_), Variable(z_), Variable(y_disc_), Variable(y_cont_) # update D network self.D_optimizer.zero_grad() D_real, _, _ = self.D(x_) D_real_loss = self.BCE_loss(D_real, self.y_real_) G_ = self.G(z_, y_cont_, y_disc_) D_fake, _, _ = self.D(G_) D_fake_loss = self.BCE_loss(D_fake, self.y_fake_) D_loss = D_real_loss + D_fake_loss self.train_hist['D_loss'].append(D_loss.data[0]) D_loss.backward(retain_graph=True) self.D_optimizer.step() # update G network self.G_optimizer.zero_grad() G_ = self.G(z_, y_cont_, y_disc_) D_fake, D_cont, D_disc = self.D(G_) G_loss = self.BCE_loss(D_fake, self.y_real_) self.train_hist['G_loss'].append(G_loss.data[0]) G_loss.backward(retain_graph=True) self.G_optimizer.step() # information loss disc_loss = self.CE_loss(D_disc, torch.max(y_disc_, 1)[1]) cont_loss = self.MSE_loss(D_cont, y_cont_) info_loss = disc_loss + cont_loss self.train_hist['info_loss'].append(info_loss.data[0]) info_loss.backward() self.info_optimizer.step() if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, info_loss: %.8f" % ((epoch + 1), (iter + 1), len(self.data_X) // self.batch_size, D_loss.data[0], G_loss.data[0], info_loss.data[0])) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) self.visualize_results((epoch+1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") self.save() utils.generate_animation(self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name, self.epoch) utils.generate_animation(self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name + '_cont', self.epoch) self.loss_plot(self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)