def backward_D_OCR(self): # Real if self.real_z_mean is None: pred_real = self.netD(self.real.detach()) else: pred_real = self.netD(**{'x': self.real.detach(), 'z': self.real_z_mean.detach()}) # Fake try: pred_fake = self.netD(**{'x': self.fake.detach(), 'z': self.z.detach()}) except: print('a') # Combined loss self.loss_Dreal, self.loss_Dfake = loss_hinge_dis(pred_fake, pred_real, self.len_text_fake.detach(), self.len_text.detach(), self.opt.mask_loss) self.loss_D = self.loss_Dreal + self.loss_Dfake # OCR loss on real data self.pred_real_OCR = self.netOCR(self.real.detach()) preds_size = torch.IntTensor([self.pred_real_OCR.size(0)] * self.opt.batch_size).detach() loss_OCR_real = self.OCR_criterion(self.pred_real_OCR, self.text_encode.detach(), preds_size, self.len_text.detach()) self.loss_OCR_real = torch.mean(loss_OCR_real[~torch.isnan(loss_OCR_real)]) # total loss loss_total = self.loss_D + self.loss_OCR_real # backward loss_total.backward() for param in self.netOCR.parameters(): param.grad[param.grad!=param.grad]=0 param.grad[torch.isnan(param.grad)]=0 param.grad[torch.isinf(param.grad)]=0 if self.opt.clip_grad > 0: clip_grad_norm_(self.netD.parameters(), self.opt.clip_grad) return loss_total
def train_GD(self): self.netG.train() self.netD.train() self.optimizer_G.zero_grad() self.optimizer_D.zero_grad() # How many chunks to split x and y into? x = torch.split(self.real, self.opt.batch_size) y = torch.split(self.label, self.opt.batch_size) counter = 0 # Optionally toggle D and G's "require_grad" if self.opt.toggle_grads: toggle_grad(self.netD, True) toggle_grad(self.netG, False) for step_index in range(self.opt.num_critic_train): self.optimizer_D.zero_grad() with torch.set_grad_enabled(False): self.forward() D_input = torch.cat([self.fake, x[counter]], 0) if x is not None else self.fake D_class = torch.cat([self.label_fake, y[counter]], 0) if y[counter] is not None else y[counter] # Get Discriminator output D_out = self.netD(D_input, D_class) if x is not None: pred_fake, pred_real = torch.split( D_out, [self.fake.shape[0], x[counter].shape[0] ]) # D_fake, D_real else: pred_fake = D_out # Combined loss self.loss_Dreal, self.loss_Dfake = loss_hinge_dis( pred_fake, pred_real, self.len_text_fake.detach(), self.len_text.detach(), self.opt.mask_loss) self.loss_D = self.loss_Dreal + self.loss_Dfake self.loss_D.backward() counter += 1 self.optimizer_D.step() # Optionally toggle D and G's "require_grad" if self.opt.toggle_grads: toggle_grad(self.netD, False) toggle_grad(self.netG, True) # Zero G's gradients by default before training G, for safety self.optimizer_G.zero_grad() self.forward() self.loss_G = loss_hinge_gen(self.netD(self.fake, self.label_fake), self.len_text_fake.detach(), self.opt.mask_loss) self.loss_G.backward() self.optimizer_G.step()
def backward_D(self): # Real pred_real = self.netD(**{'x': self.real.detach(), 'z': self.real_z_mean.detach()}) pred_fake = self.netD(**{'x':self.fake.detach(), 'z': self.z.detach()}) # Combined loss self.loss_Dreal, self.loss_Dfake = loss_hinge_dis(pred_fake, pred_real, self.len_text_fake.detach(), self.len_text.detach(), self.opt.mask_loss) self.loss_D = self.loss_Dreal + self.loss_Dfake # backward self.loss_D.backward() if any(self.netD.infer_img.blocks[0][0].conv1.bias.grad==0) or any(torch.isnan(self.netD.infer_img.blocks[0][0].conv1.bias.grad)) or any(torch.isnan(self.netD.infer_img.blocks[0][0].conv1.bias)): print('gradients of D are nan') sys.exit() if self.opt.clip_grad > 0: clip_grad_norm_(self.netD.parameters(), self.opt.clip_grad) return self.loss_D
def backward_D(self): # Real if self.real_z_mean is None: pred_real = self.netD(self.real.detach()) else: pred_real = self.netD(**{'x': self.real.detach(), 'z': self.real_z_mean.detach()}) pred_fake = self.netD(**{'x': self.fake.detach(), 'z': self.z.detach()}) # Combined loss self.loss_Dreal, self.loss_Dfake = loss_hinge_dis(pred_fake, pred_real, self.len_text_fake.detach(), self.len_text.detach(), self.opt.mask_loss) self.loss_D = self.loss_Dreal + self.loss_Dfake # backward self.loss_D.backward() if self.opt.clip_grad > 0: clip_grad_norm_(self.netD.parameters(), self.opt.clip_grad) return self.loss_D