def backward_D_OCR(self):
        # Real
        if self.real_z_mean is None:
            pred_real = self.netD(self.real.detach())
        else:
            pred_real = self.netD(**{'x': self.real.detach(), 'z': self.real_z_mean.detach()})
        # Fake
        try:
            pred_fake = self.netD(**{'x': self.fake.detach(), 'z': self.z.detach()})
        except:
            print('a')
        # Combined loss
        self.loss_Dreal, self.loss_Dfake = loss_hinge_dis(pred_fake, pred_real, self.len_text_fake.detach(), self.len_text.detach(), self.opt.mask_loss)
        self.loss_D = self.loss_Dreal + self.loss_Dfake
        # OCR loss on real data
        self.pred_real_OCR = self.netOCR(self.real.detach())
        preds_size = torch.IntTensor([self.pred_real_OCR.size(0)] * self.opt.batch_size).detach()
        loss_OCR_real = self.OCR_criterion(self.pred_real_OCR, self.text_encode.detach(), preds_size, self.len_text.detach())
        self.loss_OCR_real = torch.mean(loss_OCR_real[~torch.isnan(loss_OCR_real)])
        # total loss
        loss_total = self.loss_D + self.loss_OCR_real

        # backward
        loss_total.backward()
        for param in self.netOCR.parameters():
            param.grad[param.grad!=param.grad]=0
            param.grad[torch.isnan(param.grad)]=0
            param.grad[torch.isinf(param.grad)]=0
        if self.opt.clip_grad > 0:
             clip_grad_norm_(self.netD.parameters(), self.opt.clip_grad)


        return loss_total
    def train_GD(self):
        self.netG.train()
        self.netD.train()
        self.optimizer_G.zero_grad()
        self.optimizer_D.zero_grad()
        # How many chunks to split x and y into?
        x = torch.split(self.real, self.opt.batch_size)
        y = torch.split(self.label, self.opt.batch_size)
        counter = 0

        # Optionally toggle D and G's "require_grad"
        if self.opt.toggle_grads:
            toggle_grad(self.netD, True)
            toggle_grad(self.netG, False)

        for step_index in range(self.opt.num_critic_train):
            self.optimizer_D.zero_grad()
            with torch.set_grad_enabled(False):
                self.forward()
            D_input = torch.cat([self.fake, x[counter]],
                                0) if x is not None else self.fake
            D_class = torch.cat([self.label_fake, y[counter]],
                                0) if y[counter] is not None else y[counter]
            # Get Discriminator output
            D_out = self.netD(D_input, D_class)
            if x is not None:
                pred_fake, pred_real = torch.split(
                    D_out, [self.fake.shape[0], x[counter].shape[0]
                            ])  # D_fake, D_real
            else:
                pred_fake = D_out
            # Combined loss
            self.loss_Dreal, self.loss_Dfake = loss_hinge_dis(
                pred_fake, pred_real, self.len_text_fake.detach(),
                self.len_text.detach(), self.opt.mask_loss)
            self.loss_D = self.loss_Dreal + self.loss_Dfake
            self.loss_D.backward()
            counter += 1
            self.optimizer_D.step()

        # Optionally toggle D and G's "require_grad"
        if self.opt.toggle_grads:
            toggle_grad(self.netD, False)
            toggle_grad(self.netG, True)
        # Zero G's gradients by default before training G, for safety
        self.optimizer_G.zero_grad()
        self.forward()
        self.loss_G = loss_hinge_gen(self.netD(self.fake, self.label_fake),
                                     self.len_text_fake.detach(),
                                     self.opt.mask_loss)
        self.loss_G.backward()
        self.optimizer_G.step()
 def backward_D(self):
     # Real
     pred_real = self.netD(**{'x': self.real.detach(), 'z': self.real_z_mean.detach()})
     pred_fake = self.netD(**{'x':self.fake.detach(), 'z': self.z.detach()})
     # Combined loss
     self.loss_Dreal, self.loss_Dfake = loss_hinge_dis(pred_fake, pred_real, self.len_text_fake.detach(), self.len_text.detach(), self.opt.mask_loss)
     self.loss_D = self.loss_Dreal + self.loss_Dfake
     # backward
     self.loss_D.backward()
     if any(self.netD.infer_img.blocks[0][0].conv1.bias.grad==0) or any(torch.isnan(self.netD.infer_img.blocks[0][0].conv1.bias.grad)) or any(torch.isnan(self.netD.infer_img.blocks[0][0].conv1.bias)):
         print('gradients of D are nan')
         sys.exit()
     if self.opt.clip_grad > 0:
          clip_grad_norm_(self.netD.parameters(), self.opt.clip_grad)
     return self.loss_D
    def backward_D(self):
        # Real
        if self.real_z_mean is None:
            pred_real = self.netD(self.real.detach())
        else:
            pred_real = self.netD(**{'x': self.real.detach(), 'z': self.real_z_mean.detach()})
        pred_fake = self.netD(**{'x': self.fake.detach(), 'z': self.z.detach()})
        # Combined loss
        self.loss_Dreal, self.loss_Dfake = loss_hinge_dis(pred_fake, pred_real, self.len_text_fake.detach(), self.len_text.detach(), self.opt.mask_loss)
        self.loss_D = self.loss_Dreal + self.loss_Dfake
        # backward
        self.loss_D.backward()

        if self.opt.clip_grad > 0:
             clip_grad_norm_(self.netD.parameters(), self.opt.clip_grad)
        return self.loss_D