def test_mean(test_case): input = flow.Tensor(np.random.randn(2, 3), dtype=flow.float32) of_out = flow.mean(input, dim=1) np_out = np.mean(input.numpy(), axis=1) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4)) input = flow.Tensor(np.random.randn(2, 3), dtype=flow.float32) of_out = flow.mean(input, dim=0) np_out = np.mean(input.numpy(), axis=0) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4))
def _test_mean(test_case, shape, device): input = flow.tensor(np.random.randn(*shape), dtype=flow.float32, device=flow.device(device)) of_out = flow.mean(input, dim=1) np_out = np.mean(input.numpy(), axis=1) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 0.0001, 0.0001)) input = flow.tensor(np.random.randn(*shape), dtype=flow.float32, device=flow.device(device)) of_out = flow.mean(input, dim=0) np_out = np.mean(input.numpy(), axis=0) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 0.0001, 0.0001))
def forward(self, x): """ x: N x C x T """ if x.dim() != 3: raise RuntimeError("{} accept 3D tensor as input".format(self.__name__)) # N x 1 x 1 mean = flow.mean(x, (1, 2), keepdim=True) var = flow.mean((x - mean) ** 2, (1, 2), keepdim=True) # N x C x T if self.elementwise_affine: x = self.gamma * (x - mean) / flow.sqrt(var + self.eps) + self.beta else: x = (x - mean) / flow.sqrt(var + self.eps) return x
def compare_loss(device_type, dim, reduction, cls, data_generator): x, y, x1, y1 = data_generator(dim, device_type, *get_sbp(device_type)) reduce_loss_func = cls(reduction=reduction).to(device_type) none_loss_func = cls(reduction="none").to(device_type) loss_mean = reduce_loss_func(x, y) loss_none = (flow.mean(none_loss_func(x1, y1)) if reduction == "mean" else flow.sum(none_loss_func(x1, y1))) loss_mean.backward() loss_none.backward() assert np.allclose( loss_none.to_local().numpy(), loss_mean.to_local().numpy(), rtol=1e-05, atol=1e-05, ) assert np.allclose( loss_none.numpy(), loss_mean.numpy(), rtol=1e-05, atol=1e-05, ) assert np.allclose( x.grad.to_local().numpy(), x1.grad.to_local().numpy(), rtol=1e-05, atol=1e-05, )
def _test_mean_negative_dim(test_case, shape, device): if len(shape) < 4: shape = (2, 3, 4, 5) input = flow.tensor(np.random.randn(*shape), dtype=flow.float32, device=flow.device(device)) of_out = flow.mean(input, dim=(-2, -1, -3)) np_out = np.mean(input.numpy(), axis=(-2, -1, -3)) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 0.0001, 0.0001))
def forward(self, input, target): prob, out = self._op(input, target, depth=input.shape[len(input.shape) - 1]) if self.reduction == "mean": return flow.mean(out) elif self.reduction == "sum": return flow.sum(out) else: return out
def _test_mean_backward(test_case, shape, device): np_arr = np.random.randn(*shape) x = flow.tensor( np_arr, dtype=flow.float32, device=flow.device(device), requires_grad=True ) y = flow.mean(x, dim=1) z = y.sum() z.backward() np_grad = np.zeros(shape=np_arr.shape) np_grad[:] = 1 / x.size(1) test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad, 1e-05, 1e-05))
def forward(self, out_labels, out_images, target_images): # Adversarial Loss adversarial_loss = flow.mean(1 - out_labels) # Perception Loss perception_loss = self.mse_loss(self.loss_network(out_images), self.loss_network(target_images)) # Image Loss image_loss = self.mse_loss(out_images, target_images) # TV Loss tv_loss = self.tv_loss(out_images) return (image_loss + 0.001 * adversarial_loss + 0.006 * perception_loss + 2e-8 * tv_loss)
def gradient_penalty(self, y, x): """Compute gradient penalty: (L2_norm(dy/dx) - 1)**2.""" weight = flow.ones(y.size()).to(self.device) dydx = flow.autograd.grad(outputs=y, inputs=x, out_grads=weight, retain_graph=True, create_graph=True)[0] dydx = dydx.view(dydx.size(0), -1) dydx_l2norm = flow.sqrt(flow.sum(dydx**2, dim=1)) return flow.mean((dydx_l2norm - 1)**2)
def forward(self): ( labels, dense_fields, wide_sparse_fields, deep_sparse_fields, ) = self.train_dataloader() labels = labels.to("cuda").to(dtype=flow.float32) dense_fields = dense_fields.to("cuda") wide_sparse_fields = wide_sparse_fields.to("cuda") deep_sparse_fields = deep_sparse_fields.to("cuda") predicts = self.wdl_module(dense_fields, wide_sparse_fields, deep_sparse_fields) loss = self.loss(predicts, labels) reduce_loss = flow.mean(loss) return reduce_loss
def sisnr(self, x, s, eps=1e-8): """ Arguments: x: separated signal, N x S tensor s: reference signal, N x S tensor Return: sisnr: N tensor """ def l2norm(mat, keepdim=False): return flow.linalg.norm(mat, dim=-1, keepdim=keepdim) if x.shape != s.shape: raise RuntimeError( "Dimention mismatch when calculate si-snr, {} vs {}".format( x.shape, s.shape)) x_zm = x - flow.mean(x, dim=-1, keepdim=True) s_zm = s - flow.mean(s, dim=-1, keepdim=True) t = (flow.sum(x_zm * s_zm, dim=-1, keepdim=True) * s_zm / (l2norm(s_zm, keepdim=True)**2 + eps)) res = 20 * flow.log(eps + l2norm(t) / (l2norm(x_zm - t) + eps)) / 2.3025851 return res
def build(self): ( labels, dense_fields, wide_sparse_fields, deep_sparse_fields, ) = self.dataloader() labels = labels.to("cuda").to(dtype=flow.float32) dense_fields = dense_fields.to("cuda") wide_sparse_fields = wide_sparse_fields.to("cuda") deep_sparse_fields = deep_sparse_fields.to("cuda") logits = self.module(dense_fields, wide_sparse_fields, deep_sparse_fields) loss = self.bce_loss(logits, labels) reduce_loss = flow.mean(loss) reduce_loss.backward() return reduce_loss
def forward(self, input, target): input_shape_len = len(input.shape) if input_shape_len == 4: b, c, h, w = input.shape[0], input.shape[1], input.shape[ 2], input.shape[3] input = flow.tmp.transpose(input, (0, 2, 3, 1)) input = flow.tmp.reshape(input, shape=[-1, input.shape[3]]) target = flow.tmp.flatten(target) prob, out = self._op(input, target, depth=input.shape[len(input.shape) - 1]) if self.reduction == "mean": return flow.mean(out) elif self.reduction == "sum": return flow.sum(out) else: if input_shape_len == 4: out = flow.tmp.reshape(out, (b, h, w)) return out
def forward(self, input, target, weight=None): assert (input.shape == target.shape ), "The Input shape must be the same as Target shape" _cross_entropy_loss = flow.negative(target * flow.log(input) + (1 - target) * flow.log(1 - input)) if weight is not None: assert (weight.shape == input.shape ), "The weight shape must be the same as Input shape" _weighted_loss = weight * _cross_entropy_loss else: _weighted_loss = _cross_entropy_loss if self.reduction == "mean": return flow.mean(_weighted_loss) elif self.reduction == "sum": return flow.sum(_weighted_loss) else: return _weighted_loss
def forward(self, input: Tensor) -> Tensor: assert (len(input.shape) >= 3), "The dimensions of input tensor must larger than 2" assert (input.shape[1] == self.num_channels ), "The channels of input tensor must equal num_channels" origin_shape = input.shape reshape_to_1d = flow.reshape( input, shape=[origin_shape[0], self.num_groups, -1]) mean = flow.mean(reshape_to_1d, dim=2, keepdim=True) variance = flow.var(reshape_to_1d, dim=2, unbiased=False, keepdim=True) normalized = (reshape_to_1d - mean) / flow.sqrt(variance + self.eps) normalized = flow.reshape( normalized, shape=[origin_shape[0], self.num_channels, -1]) if self.weight is not None: normalized = normalized * self.weight.reshape( 1, self.num_channels, 1) if self.bias is not None: normalized = normalized + self.bias.reshape( 1, self.num_channels, 1) res = flow.reshape(normalized, shape=tuple(input.shape)) return res
def ae_step(self, data, lambda_kl): x = cc(data) mu, log_sigma, emb, dec = self.model(x) criterion = nn.L1Loss() loss_rec = criterion(dec, x) loss_kl = 0.5 * flow.mean( flow.exp(log_sigma) + flow.mul(mu, mu) - 1 - log_sigma) loss = self.config["lambda"][ "lambda_rec"] * loss_rec + lambda_kl * loss_kl self.opt.zero_grad() loss.backward() grad_norm = flow.nn.utils.clip_grad_norm_( self.model.parameters(), max_norm=self.config["optimizer"]["grad_norm"]) self.opt.step() meta = { "loss_rec": loss_rec.item(), "loss_kl": loss_kl.item(), "loss": loss.item(), "grad_norm": grad_norm, } return meta
def forward(self, input, target): assert len(input.shape) == 2 or len(input.shape) == 4 input = flow.negative(input) if len(input.shape) == 2: res = self.nllloss_1d(input, target) elif len(input.shape) == 4: b, c, h, w = input.shape[0], input.shape[1], input.shape[ 2], input.shape[3] input = flow.tmp.transpose(input, (0, 2, 3, 1)) input = flow.tmp.reshape(input, shape=[-1, input.shape[3]]) target = flow.tmp.flatten(target) res = self.nllloss_1d(input, target) res = flow.tmp.reshape(res, (b, h, w)) else: raise NotImplemented if self.reduction == "none": return res elif self.reduction == "sum": return flow.sum(res) else: return flow.mean(res)
def forward(self, logits, label): loss = flow._C.sparse_softmax_cross_entropy(logits, label) loss = flow.mean(loss) return loss
def train(self): # Training Begins for epoch in range(self.start_epoch, self.num_epochs): start_time_epoch = time.time() # Constants cycle_loss_lambda = 10 identity_loss_lambda = 5 # Preparing Dataset n_samples = len(self.dataset_A) dataset = trainingDataset(datasetA=self.dataset_A, datasetB=self.dataset_B, n_frames=128) train_loader = flow.utils.data.DataLoader( dataset=dataset, batch_size=self.mini_batch_size, shuffle=True, drop_last=False, ) pbar = tqdm(enumerate(train_loader)) for i, (real_A, real_B) in enumerate(train_loader): num_iterations = (n_samples // self.mini_batch_size) * epoch + i if num_iterations > 10000: identity_loss_lambda = 0 if num_iterations > self.start_decay: self.adjust_lr_rate(self.generator_optimizer, name="generator") self.adjust_lr_rate(self.generator_optimizer, name="discriminator") real_A = real_A.to(self.device).float() real_B = real_B.to(self.device).float() # Generator Loss function fake_B = self.generator_A2B(real_A) cycle_A = self.generator_B2A(fake_B) fake_A = self.generator_B2A(real_B) cycle_B = self.generator_A2B(fake_A) identity_A = self.generator_B2A(real_A) identity_B = self.generator_A2B(real_B) d_fake_A = self.discriminator_A(fake_A) d_fake_B = self.discriminator_B(fake_B) # for the second step adverserial loss d_fake_cycle_A = self.discriminator_A(cycle_A) d_fake_cycle_B = self.discriminator_B(cycle_B) # Generator Cycle loss cycleLoss = flow.mean(flow.abs(real_A - cycle_A)) + flow.mean( flow.abs(real_B - cycle_B)) # Generator Identity Loss identiyLoss = flow.mean( flow.abs(real_A - identity_A)) + flow.mean( flow.abs(real_B - identity_B)) # Generator Loss generator_loss_A2B = flow.mean((1 - d_fake_B)**2) generator_loss_B2A = flow.mean((1 - d_fake_A)**2) # Total Generator Loss generator_loss = (generator_loss_A2B + generator_loss_B2A + cycle_loss_lambda * cycleLoss + identity_loss_lambda * identiyLoss) self.generator_loss_store.append(generator_loss.item()) # Backprop for Generator self.reset_grad() generator_loss.backward() self.generator_optimizer.step() # Discriminator Feed Forward d_real_A = self.discriminator_A(real_A) d_real_B = self.discriminator_B(real_B) generated_A = self.generator_B2A(real_B) d_fake_A = self.discriminator_A(generated_A) # for the second step adverserial loss cycled_B = self.generator_A2B(generated_A) d_cycled_B = self.discriminator_B(cycled_B) generated_B = self.generator_A2B(real_A) d_fake_B = self.discriminator_B(generated_B) # for the second step adverserial loss cycled_A = self.generator_B2A(generated_B) d_cycled_A = self.discriminator_A(cycled_A) # Loss Functions d_loss_A_real = flow.mean((1 - d_real_A)**2) d_loss_A_fake = flow.mean((0 - d_fake_A)**2) d_loss_A = (d_loss_A_real + d_loss_A_fake) / 2.0 d_loss_B_real = flow.mean((1 - d_real_B)**2) d_loss_B_fake = flow.mean((0 - d_fake_B)**2) d_loss_B = (d_loss_B_real + d_loss_B_fake) / 2.0 # the second step adverserial loss d_loss_A_cycled = flow.mean((0 - d_cycled_A)**2) d_loss_B_cycled = flow.mean((0 - d_cycled_B)**2) d_loss_A_2nd = (d_loss_A_real + d_loss_A_cycled) / 2.0 d_loss_B_2nd = (d_loss_B_real + d_loss_B_cycled) / 2.0 # Final Loss for discriminator with the second step adverserial loss d_loss = (d_loss_A + d_loss_B) / 2.0 + (d_loss_A_2nd + d_loss_B_2nd) / 2.0 self.discriminator_loss_store.append(d_loss.item()) # Backprop for Discriminator self.reset_grad() d_loss.backward() self.discriminator_optimizer.step() if (i + 1) % 2 == 0: pbar.set_description( "Iter:{} Generator Loss:{:.4f} Discrimator Loss:{:.4f} GA2B:{:.4f} GB2A:{:.4f} G_id:{:.4f} G_cyc:{:.4f} D_A:{:.4f} D_B:{:.4f}" .format( num_iterations, generator_loss.item(), d_loss.item(), generator_loss_A2B, generator_loss_B2A, identiyLoss, cycleLoss, d_loss_A, d_loss_B, )) if epoch % 2000 == 0 and epoch != 0: end_time = time.time() store_to_file = "Epoch: {} Generator Loss: {:.4f} Discriminator Loss: {}, Time: {:.2f}\n\n".format( epoch, generator_loss.item(), d_loss.item(), end_time - start_time_epoch, ) self.store_to_file(store_to_file) print( "Epoch: {} Generator Loss: {:.4f} Discriminator Loss: {}, Time: {:.2f}\n\n" .format( epoch, generator_loss.item(), d_loss.item(), end_time - start_time_epoch, )) # Save the Entire model print("Saving model Checkpoint ......") store_to_file = "Saving model Checkpoint ......" self.store_to_file(store_to_file) self.saveModelCheckPoint(epoch, self.modelCheckpoint) print("Model Saved!") if epoch % 2000 == 0 and epoch != 0: # Validation Set validation_start_time = time.time() self.validation_for_A_dir() self.validation_for_B_dir() validation_end_time = time.time() store_to_file = "Time taken for validation Set: {}".format( validation_end_time - validation_start_time) self.store_to_file(store_to_file) print("Time taken for validation Set: {}".format( validation_end_time - validation_start_time))
def to_numpy(x, mean=True): if mean: x = flow.mean(x) return x.numpy()
d_loss.backward() optimizerD.step() optimizerD.zero_grad() ############################ # (2) Update G network: minimize 1-D(G(z)) + Perception Loss + Image Loss + TV Loss ########################### fake_img_0 = netG(z) fake_out_0 = netD(fake_img_0) g_loss = generator_criterion(fake_out_0, fake_img_0, real_img) g_loss.backward() optimizerG.step() optimizerG.zero_grad() fake_out = flow.mean(fake_out) real_out = flow.mean(fake_out) # loss for current batch before optimization running_results["g_loss"] += g_loss.numpy() * batch_size running_results["d_loss"] += d_loss.numpy() * batch_size running_results["d_score"] += real_out.numpy() * batch_size running_results["g_score"] += fake_out.numpy() * batch_size train_bar.set_description( desc= "[%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f" % ( epoch, NUM_EPOCHS, running_results["d_loss"] / running_results["batch_sizes"], running_results["g_loss"] / running_results["batch_sizes"],
def train_one_epoch(self, epoch, train_loader): self.model.train() batch_steps = len(train_loader) step_loss = AverageMeter() auxiliary_loss = AuxiliaryLossAverageMeter() span = 0 for step, (_, inputs, targets) in enumerate(train_loader): if self.ngpu > 0: inputs = map_to_cuda(inputs) targets = map_to_cuda(targets) start = time.time() loss, aux_loss = self.model(inputs, targets) loss = flow.mean(loss) / self.accum_steps loss.backward() end = time.time() span += end - start if self.get_rank() == 0: step_loss.update(loss.item() * self.accum_steps, inputs["inputs"].size(0)) auxiliary_loss.update(aux_loss, self.accum_steps, inputs["inputs"].size(0)) if self.global_training_step % self.accum_steps == 0: if self.local_rank == 0: self.mean_loss.update(step_loss.avg) grad_norm = flow.nn.utils.clip_grad_norm_( self.model.parameters(), self.grad_clip, error_if_nonfinite=False) if self.grad_noise > 0.0: for p in self.model.parameters(): if p.requires_grad: noise = flow.tensor( np.random.normal( 0, self.grad_noise, p.grad.shape, ), device=loss.device, ) p.grad += noise / self.accum_steps if math.isnan(grad_norm.numpy()): logging.warning("Grad norm is NAN. DO NOT UPDATE MODEL!") else: self.scheduler.step() self.optimizer.step() self.optimizer.zero_grad() if (self.scheduler.global_step % self.log_interval == 0 and self.local_rank == 0): process = (step + 1) / batch_steps * 100 print_info = ( "-Training-Epoch-%d(%.5f%%), Global Step:%d, lr:%.8f, Loss:%.5f, AvgLoss: %.5f, Run Time:%.3f" % ( epoch, process, self.scheduler.global_step, self.scheduler.lr, step_loss.avg, self.mean_loss.mean(), span, )) print_info += auxiliary_loss.avg_infos logger.info(print_info) span = 0 step_loss.reset() auxiliary_loss.reset() self.global_training_step += 1 if self.is_debug and step > 30: break return self.mean_loss.mean()
def _mean(self, dim=[], keepdim=False): return flow.mean(self, dim, keepdim)
def train(self): """Implements the training loop for MaskCycleGAN-VC """ for epoch in range(self.start_epoch, self.num_epochs + 1): for i, (real_A, mask_A, real_B, mask_B) in enumerate(self.train_dataloader): num_iterations = (self.n_samples // self.mini_batch_size) * epoch + i if num_iterations > 10000: self.identity_loss_lambda = 0 if num_iterations > self.decay_after: self.adjust_lr_rate(self.generator_optimizer, generator=True) self.adjust_lr_rate(self.generator_optimizer, generator=False) real_A = real_A.to(self.device, dtype=flow.float) mask_A = mask_A.to(self.device, dtype=flow.float) real_B = real_B.to(self.device, dtype=flow.float) mask_B = mask_B.to(self.device, dtype=flow.float) # Train Generator self.generator_A2B.train() self.generator_B2A.train() self.discriminator_A.eval() self.discriminator_B.eval() self.discriminator_A2.eval() self.discriminator_B2.eval() # Generator Feed Forward fake_B = self.generator_A2B(real_A, mask_A) cycle_A = self.generator_B2A(fake_B, flow.ones_like(fake_B)) fake_A = self.generator_B2A(real_B, mask_B) cycle_B = self.generator_A2B(fake_A, flow.ones_like(fake_A)) identity_A = self.generator_B2A(real_A, flow.ones_like(real_A)) identity_B = self.generator_A2B(real_B, flow.ones_like(real_B)) d_fake_A = self.discriminator_A(fake_A) d_fake_B = self.discriminator_B(fake_B) # For Two Step Adverserial Loss d_fake_cycle_A = self.discriminator_A2(cycle_A) d_fake_cycle_B = self.discriminator_B2(cycle_B) # Generator Cycle Loss cycleLoss = flow.mean(flow.abs(real_A - cycle_A)) + flow.mean( flow.abs(real_B - cycle_B)) # Generator Identity Loss identityLoss = flow.mean( flow.abs(real_A - identity_A)) + flow.mean( flow.abs(real_B - identity_B)) # Generator Loss g_loss_A2B = flow.mean((1 - d_fake_B)**2) g_loss_B2A = flow.mean((1 - d_fake_A)**2) # Generator Two Step Adverserial Loss generator_loss_A2B_2nd = flow.mean((1 - d_fake_cycle_B)**2) generator_loss_B2A_2nd = flow.mean((1 - d_fake_cycle_A)**2) # Total Generator Loss g_loss = (g_loss_A2B + g_loss_B2A + generator_loss_A2B_2nd + generator_loss_B2A_2nd + self.cycle_loss_lambda * cycleLoss + self.identity_loss_lambda * identityLoss) # Backprop for Generator self.reset_grad() g_loss.backward() self.generator_optimizer.step() # Train Discriminator self.generator_A2B.eval() self.generator_B2A.eval() self.discriminator_A.train() self.discriminator_B.train() self.discriminator_A2.train() self.discriminator_B2.train() # Discriminator Feed Forward d_real_A = self.discriminator_A(real_A) d_real_B = self.discriminator_B(real_B) d_real_A2 = self.discriminator_A2(real_A) d_real_B2 = self.discriminator_B2(real_B) generated_A = self.generator_B2A(real_B, mask_B) d_fake_A = self.discriminator_A(generated_A) # For Two Step Adverserial Loss A->B cycled_B = self.generator_A2B(generated_A, flow.ones_like(generated_A)) d_cycled_B = self.discriminator_B2(cycled_B) generated_B = self.generator_A2B(real_A, mask_A) d_fake_B = self.discriminator_B(generated_B) # For Two Step Adverserial Loss B->A cycled_A = self.generator_B2A(generated_B, flow.ones_like(generated_B)) d_cycled_A = self.discriminator_A2(cycled_A) # Loss Functions d_loss_A_real = flow.mean((1 - d_real_A)**2) d_loss_A_fake = flow.mean((0 - d_fake_A)**2) d_loss_A = (d_loss_A_real + d_loss_A_fake) / 2.0 d_loss_B_real = flow.mean((1 - d_real_B)**2) d_loss_B_fake = flow.mean((0 - d_fake_B)**2) d_loss_B = (d_loss_B_real + d_loss_B_fake) / 2.0 # Two Step Adverserial Loss d_loss_A_cycled = flow.mean((0 - d_cycled_A)**2) d_loss_B_cycled = flow.mean((0 - d_cycled_B)**2) d_loss_A2_real = flow.mean((1 - d_real_A2)**2) d_loss_B2_real = flow.mean((1 - d_real_B2)**2) d_loss_A_2nd = (d_loss_A2_real + d_loss_A_cycled) / 2.0 d_loss_B_2nd = (d_loss_B2_real + d_loss_B_cycled) / 2.0 # Final Loss for discriminator with the Two Step Adverserial Loss d_loss = (d_loss_A + d_loss_B) / 2.0 + (d_loss_A_2nd + d_loss_B_2nd) / 2.0 # Backprop for Discriminator self.reset_grad() d_loss.backward() self.discriminator_optimizer.step() if (i + 1) % 2 == 0: print( "Iter:{} Generator Loss:{:.4f} Discrimator Loss:{:.4f} GA2B:{:.4f} GB2A:{:.4f} G_id:{:.4f} G_cyc:{:.4f} D_A:{:.4f} D_B:{:.4f}" .format( num_iterations, g_loss.item(), d_loss.item(), g_loss_A2B, g_loss_B2A, identityLoss, cycleLoss, d_loss_A, d_loss_B, )) # Save each model checkpoint and validation if epoch % self.epochs_per_save == 0 and epoch != 0: self.saveModelCheckPoint(epoch, PATH="model_checkpoint") self.validation_for_A_dir() self.validation_for_B_dir()