def discriminator_trainloop(self, real, alpha): requires_grad(self.generator, False) requires_grad(self.discriminator, True) real.requires_grad = True self.optimizer_d.zero_grad() d_real = self.discriminator(real, alpha=alpha) loss_real = F.softplus(-d_real).mean() with amp.scale_loss(loss_real, self.optimizer_d, loss_id=1) as scaled_loss_real: scaled_loss_real.backward(retain_graph=True) grad_real = grad(outputs=d_real.sum(), inputs=real, create_graph=True)[0] grad_penalty = (grad_real.view(grad_real.size(0), -1).norm(2, dim=1)**2).mean() grad_penalty = 10 / 2 * grad_penalty with amp.scale_loss(grad_penalty, self.optimizer_d, loss_id=1) as scaled_grad_penalty: scaled_grad_penalty.backward() if random.random() < 0.9: z = [ torch.randn(real.size(0), self.nz).cuda(), torch.randn(real.size(0), self.nz).cuda() ] else: z = torch.randn(real.size(0), self.nz).cuda() fake = self.generator(z, alpha=alpha) d_fake = self.discriminator(fake, alpha=alpha) loss_fake = F.softplus(d_fake).mean() with amp.scale_loss(loss_fake, self.optimizer_d) as scaled_loss_fake: scaled_loss_fake.backward() loss = scaled_loss_real + scaled_loss_fake + scaled_grad_penalty self.optimizer_d.step() return loss.item(), (d_real.mean().item(), d_fake.mean().item())
def generator_trainloop(self, batch_size, alpha): requires_grad(self.generator, True) requires_grad(self.discriminator, False) # mixing regularization if random.random() < 0.9: z = [ torch.randn(batch_size, self.nz).cuda(), torch.randn(batch_size, self.nz).cuda() ] else: z = torch.randn(batch_size, self.nz).cuda() fake = self.generator(z, alpha=alpha) d_fake = self.discriminator(fake, alpha=alpha) loss = F.softplus(-d_fake).mean() self.optimizer_g.zero_grad() with amp.scale_loss(loss, self.optimizer_g, loss_id=0) as scaled_loss: scaled_loss.backward() self.optimizer_g.step() return loss.item()