def __init__(self, hparams): super(MixtureGaussianLoss, self).__init__() self.quantize_channels = hparams.quantize_channels self.log_scale_min = hparams.log_scale_min self.mix_gaussian_loss = mix_gaussian_loss(log_scale_min=hparams.log_scale_min, reduce=False) self.reduce_sum_op = P.ReduceSum() self.reduce_mean_op = P.ReduceMean()
def test_gaussian_mixture(): np.random.seed(1234) x, sr = librosa.load(pysptk.util.example_audio_file(), sr=None) assert sr == 16000 T = len(x) x = x.reshape(1, T, 1) y = torch.from_numpy(x).float() y_hat = torch.rand(1, 30, T).float() print(y.shape, y_hat.shape) loss = mix_gaussian_loss(y_hat, y) print(loss) loss = mix_gaussian_loss(y_hat, y, reduce=False) print(loss.size(), y.size()) assert loss.size() == y.size() y = sample_from_mix_gaussian(y_hat) print(y.shape)
def forward(self, input, target, lengths=None, mask=None, max_len=None): if lengths is None and mask is None: raise RuntimeError("Should provide either lengths or mask") # (B, T, 1) if mask is None: mask = sequence_mask(lengths, max_len).unsqueeze(-1) # (B, T, 1) mask_ = mask.expand_as(target) losses = mix_gaussian_loss( input, target, log_scale_min=hparams.log_scale_min, reduce=False) assert losses.size() == target.size() return ((losses * mask_).sum()) / mask_.sum()