def forward(self, p_vects, q_vects, p_frames_mask, q_frames_mask):
        '''
        p/q_vects = [num_speakers X num_feats X max_num_mfcc_frames x mfcc_dim]
        p/q_frames_mask = [num_speakers X num_feats X max_num_mfcc_frames x mfcc_dim]
                          -> The associated 0s and 1s mask of p/q_lengths
        n.b. mfcc_dim = 13 usually (using c0 for energy instead of log-energy)
             num_feats = 46*47*0.5 = 1128 usually
             max_num_mfcc_frames = the maximum number of frames associated
             with a particular phone for any speaker -> often set to 4000
        '''
        # Apply the attack
        noise = torch.exp(self.noise_root)

        # Need to add spectral noise
        # Pad to spectral dimension
        padding = torch.zeros(p_vects.size(0), p_vects.size(1),
                              p_vects.size(2),
                              self.spectral_dim - self.mfcc_dim)
        padded_p_vects = torch.cat((p_vects, padding), 3)
        padded_q_vects = torch.cat((q_vects, padding), 3)

        # Apply inverse dct
        log_spectral_p = dct.idct(padded_p_vects)
        log_spectral_q = dct.idct(padded_q_vects)

        # Apply inverse log
        spectral_p = torch.exp(log_spectral_p)
        spectral_q = torch.exp(log_spectral_q)

        # Restructure noise
        noise_struct = noise.unsqueeze(1).unsqueeze(1).repeat(
            1, p_vects.size(1), p_vects.size(2), 1)

        # Add the adversarial attack noise
        attacked_spectral_p = spectral_p + noise_struct
        attacked_spectral_q = spectral_q + noise_struct

        # Apply the log
        attacked_log_spectral_p = torch.log(attacked_spectral_p)
        attacked_log_spectral_q = torch.log(attacked_spectral_q)

        # Apply the dct
        attacked_padded_p = dct.dct(attacked_log_spectral_p)
        attacked_padded_q = dct.dct(attacked_log_spectral_q)

        # Truncate to mfcc dimension
        p_vects_attacked = torch.narrow(attacked_padded_p, 3, 0, self.mfcc_dim)
        q_vects_attacked = torch.narrow(attacked_padded_q, 3, 0, self.mfcc_dim)

        # Apply mask of zeros/ones, to ensure spectral noise only applied up to p/q lengths
        p_vects_masked = p_vects_attacked * p_frames_mask
        q_vects_masked = q_vects_attacked * q_frames_mask

        # Pass through trained model
        trained_model = torch.load(self.trained_model_path)
        trained_model.eval()
        y = trained_model(p_vects_masked, q_vects_masked, p_frames_mask,
                          q_frames_mask)

        return y
    def attack_cov(self, means, covs, means_atck, noise):
        '''
        This update is derived from a log normal
        approximation of a shifted log normal distribution
        '''
        step1 = dct.idct(covs)
        step2 = torch.transpose(dct.idct(torch.transpose(step1, -1, -2)), -1,
                                -2)
        step3 = torch.diagonal(step2, offset=0, dim1=-2, dim2=-1)
        step4 = dct.idct(means) + (step3 * 0.5)
        padding = torch.zeros(means.size(0), means.size(1), self.spectral_dim -
                              self.mfcc_dim).to(self.device)
        padded_step4 = torch.cat((step4, padding), 2)
        step5 = torch.exp(padded_step4) + noise
        step6 = torch.log(step5) * 2
        step6_trunc = torch.narrow(step6, 2, 0, self.mfcc_dim)
        step7 = step6_trunc - (2 * dct.idct(means_atck))
        step8 = torch.diag_embed(step7)
        step9 = dct.dct(step8)
        step10 = torch.transpose(dct.dct(torch.transpose(step9, -1, -2)), -1,
                                 -2)

        # Make sure no negative diagonal values
        step11 = torch.diag_embed(
            torch.clamp(torch.diagonal(step10, offset=0, dim1=-2, dim2=-1),
                        min=1.0))

        stepa = torch.diagonal(covs, offset=0, dim1=-2, dim2=-1)
        stepb = torch.diag_embed(stepa)

        #attacked_covs = covs - stepb + step11
        attacked_covs = step11  # Have to neglect off-diagonal terms to ensure covariance matrices are positive definite
        noised = attacked_covs + (1e-2 * torch.eye(13).to(self.device))

        return noised
Exemplo n.º 3
0
    def forward(self, p_means, p_covariances, q_means, q_covariances,
                num_phones_mask):
        '''
        p/q_means = [num_speakers X num_feats X mfcc_dim]
        p/q_covariances = [num_speakers X num_feats X mfcc_dim X mfcc_dim]
        num_phones_mask = [num_speakers X num_feats],
        with a 0 corresponding to positiion that should be -1 (no phones observed)
        and a 1 everywhere else.
        n.b. num_feats = 46*47*0.5 = 1128 usually, where 47 = num_phones
        '''
        noise = torch.exp(self.noise_root)

        # Need to add spectral noise with first order Taylor approximation
        # Pad to spectral dimension
        padding = torch.zeros(p_means.size(0), p_means.size(1),
                              self.spectral_dim - self.mfcc_dim)
        padded_p_means = torch.cat((p_means, padding), 2)
        padded_q_means = torch.cat((q_means, padding), 2)

        # Apply inverse dct
        log_spectral_p = dct.idct(padded_p_means)
        log_spectral_q = dct.idct(padded_q_means)

        # Apply inverse log
        spectral_p = torch.exp(log_spectral_p)
        spectral_q = torch.exp(log_spectral_q)

        # Hadamard division with the spectral noise
        attacked_spectral_p = noise / spectral_p
        attacked_spectral_q = noise / spectral_q

        # Apply the dct
        attacked_padded_p = dct.dct(attacked_spectral_p)
        attacked_padded_q = dct.dct(attacked_spectral_q)

        # Truncate to mfcc dimension
        p_means_attacked_second_term = torch.narrow(attacked_padded_p, 2, 0,
                                                    self.mfcc_dim)
        q_means_attacked_second_term = torch.narrow(attacked_padded_q, 2, 0,
                                                    self.mfcc_dim)

        # Combine Taylor expansion
        p_means_attacked = p_means + p_means_attacked_second_term
        q_means_attacked = q_means + q_means_attacked_second_term

        # Pass through trained model
        trained_model = torch.load(self.trained_model_path)
        trained_model.eval()
        y = trained_model(p_means_attacked, p_covariances, q_means_attacked,
                          q_covariances, num_phones_mask)

        return y
Exemplo n.º 4
0
    def forward(self, p_means, p_covariances, q_means, q_covariances,
                num_phones_mask):
        '''
        p/q_means = [num_speakers X num_feats X mfcc_dim]
        p/q_covariances = [num_speakers X num_feats X mfcc_dim X mfcc_dim]
        num_phones_mask = [num_speakers X num_feats],
        with a 0 corresponding to positiion that should be -1 (no phones observed)
        and a 1 everywhere else.
        n.b. num_feats = 46*47*0.5 = 1128 usually, where 47 = num_phones
        '''
        noise = torch.exp(self.noise_root)

        # Need to add spectral noise
        # Pad to spectral dimension
        padding = torch.zeros(p_means.size(0), p_means.size(1),
                              self.spectral_dim - self.mfcc_dim)
        padded_p_means = torch.cat((p_means, padding), 2)
        padded_q_means = torch.cat((q_means, padding), 2)

        # Apply inverse dct
        log_spectral_p = dct.idct(padded_p_means)
        log_spectral_q = dct.idct(padded_q_means)

        # Apply inverse log
        spectral_p = torch.exp(log_spectral_p)
        spectral_q = torch.exp(log_spectral_q)

        # Add the adversarial attack noise
        attacked_spectral_p = spectral_p + noise
        attacked_spectral_q = spectral_q + noise

        # Apply the log
        attacked_log_spectral_p = torch.log(attacked_spectral_p)
        attacked_log_spectral_q = torch.log(attacked_spectral_q)

        # Apply the dct
        attacked_padded_p = dct.dct(attacked_log_spectral_p)
        attacked_padded_q = dct.dct(attacked_log_spectral_q)

        # Truncate to mfcc dimension
        p_means_attacked = torch.narrow(attacked_padded_p, 2, 0, self.mfcc_dim)
        q_means_attacked = torch.narrow(attacked_padded_q, 2, 0, self.mfcc_dim)

        # Pass through trained model
        trained_model = torch.load(self.trained_model_path)
        trained_model.eval()
        y = trained_model(p_means_attacked, p_covariances, q_means_attacked,
                          q_covariances, num_phones_mask)

        return y
    def attack_mean(self, means, noise):
        # Need to add spectral noise
        # Pad to spectral dimension
        padding = torch.zeros(means.size(0), means.size(1), self.spectral_dim -
                              self.mfcc_dim).to(self.device)
        padded_means = torch.cat((means, padding), 2)

        # Apply inverse dct
        log_spectral = dct.idct(padded_means)

        # Apply inverse log
        spectral = torch.exp(log_spectral)

        # Add the adversarial attack noise
        attacked_spectral = spectral + noise

        # Apply the log
        attacked_log_spectral = torch.log(attacked_spectral)

        # Apply the dct
        attacked_padded = dct.dct(attacked_log_spectral)

        # Truncate to mfcc dimension
        means_attacked = torch.narrow(attacked_padded, 2, 0, self.mfcc_dim)

        return means_attacked
Exemplo n.º 6
0
def spectral_attack(X, attack):
    X = torch.from_numpy(X).float()
    X_sq = X.squeeze()
    attack = torch.from_numpy(attack).float()

    # Add the attack in the spectral space
    # Pad to spectral dimension
    padding = torch.zeros(attack.size(0) - X_sq.size(0))
    padded_X = torch.cat((X_sq, padding))

    # Apply inverse dct
    log_spectral_X = dct.idct(padded_X)

    # Apply inverse log
    spectral_X = torch.exp(log_spectral_X)

    # Add the adversarial attack
    attacked_spectral_X = spectral_X + attack

    # Get back to mfcc domain
    attacked_log_spectral_X = torch.log(attacked_spectral_X)
    attacked_padded_X = dct.dct(attacked_log_spectral_X)
    X_attacked = torch.narrow(attacked_padded_X, 0, 0, X_sq.size(0))
    X_attacked = X_attacked.detach().numpy()

    return X_attacked
Exemplo n.º 7
0
def test_idct():
    for norm in [None, 'ortho']:
        for N in [5, 2, 32, 111]:
            x = np.random.normal(size=(1, N))
            X = dct.dct(torch.tensor(x), norm=norm)
            y = dct.idct(X, norm=norm).numpy()
            assert np.abs(x - y).max() < EPS, x
    def attack(self, samples, noise):
        '''
        Perform attack in the spectral space
        '''
        # Pad to spectral dimension
        padding = torch.zeros(samples.size(0), samples.size(1),
                              samples.size(2), self.spectral_dim -
                              self.mfcc_dim).to(self.device)
        padded_samples = torch.cat((samples, padding), 3)

        # Apply inverse dct
        log_spectral = dct.idct(padded_samples)

        # Apply inverse log
        spectral = torch.exp(log_spectral)

        # Add the adversarial attack noise
        attacked_spectral = spectral + noise

        # Apply the log
        attacked_log_spectral = torch.log(attacked_spectral)

        # Apply the dct
        attacked_padded = dct.dct(attacked_log_spectral)

        # Truncate to mfcc dimension
        samples_attacked = torch.narrow(attacked_padded, 3, 0, self.mfcc_dim)

        return samples_attacked
Exemplo n.º 9
0
    def forward(self, x):
        filt = dct.idct(F.pad(self.weight,
                              (0, self.index.size(0) - self.weight.size(1))),
                        norm='ortho')
        filt = filt[:, self.index.long()]
        filt = torch.reshape(filt, (self.no, self.ni))

        x = F.linear(x, filt, bias=self.bias)
        return x
Exemplo n.º 10
0
def h_func_dct(lateral_slice):
    l, m, n = lateral_slice.shape
    dct_slice = dct.dct(lateral_slice)
    tubes = [dct_slice[i, :, 0] for i in range(l)]
    h_tubes = []
    for tube in tubes:
        h_tubes.append(torch.exp(tube) / torch.sum(torch.exp(tube)))
    res_slice = torch.stack(h_tubes, dim=0).reshape(l, m, n)
    idct_a = dct.idct(res_slice)
    return torch.sum(idct_a, dim=0)                                                                               
Exemplo n.º 11
0
 def forward(self, x: torch.Tensor) -> torch.Tensor:
     if self.train:
         x_flat = x.view([-1, np.prod(x.size()[1:])])
         x_dct = dct.dct(x_flat)
         r = self.greater_mask(x_dct)
         b = self.bernoulli_mask(x_flat.shape)
         y_dct = x_dct * r + x_dct * ~r * b
         y = dct.idct(y_dct)
         y = y.view(x.size())
         return y
     else:
         return x
Exemplo n.º 12
0
    def forward(self, input):
        """
        This is the fully manual implementation of the forward and backward
        passes via the torch.autograd.Function.

        :param input: the input map (e.g., an image)
        :return: the result of 2D convolution
        """
        # ctx, input, filter, bias, padding = (0, 0), stride = (1, 1),
        # args = None, out_size = None, is_manual = tensor([0]),
        # conv_index = None
        filter = self.weight
        # N - number of input maps (or images in the batch).
        # C - number of input channels.
        # H - height of the input map (e.g., height of an image).
        # W - width of the input map (e.g. width of an image).
        N, C, H, W = input.size()

        # F - number of filters.
        # C - number of channels in each filter.
        # HH - the height of the filter.
        # WW - the width of the filter (its length).
        F, C, HH, WW = filter.size()

        pad_filter_H = H - HH
        pad_filter_W = W - WW

        filter = torch_pad(filter, (0, pad_filter_W, 0, pad_filter_H),
                           'constant', 0)

        input = dct(input)
        filter = dct(filter)
        # permute from N, C, H, W to H, W, N, C
        input = input.permute(2, 3, 0, 1)
        # permute from F, C, H, W to H, W, C, F
        filter = filter.permute(2, 3, 1, 0)
        result = torch.matmul(input, filter)
        # permute from H, W, N, F to N, F, H, W
        result = result.permute(2, 3, 0, 1)
        result = idct(result)
        out_H, out_W = self.out_HW(H, W, HH, WW)
        result = result[..., :out_H, :out_W]
        if self.bias is not None:
            # Add the bias term for each filter (it has to be unsqueezed to
            # the dimension of the out to properly sum up the values).
            unsqueezed_bias = self.bias.unsqueeze(-1).unsqueeze(-1)
            result += unsqueezed_bias
        if (self.stride_H != 1 or self.stride_W != 1) and (
                self.stride_type is StrideType.STANDARD):
            result = result[:, :, ::self.stride_H, ::self.stride_W]
        return result
 def reset_parameters(self):
     # initialise using dct function
     I = torch.eye(self.N)
     if self.cuda:
         I = I.cuda()
     if self.type == 'dct1':
         self.weight.data = dct.dct1(I).data.t()
     elif self.type == 'idct1':
         self.weight.data = dct.idct1(I).data.t()
     elif self.type == 'dct':
         self.weight.data = dct.dct(I, norm=self.norm).data.t()
     elif self.type == 'idct':
         self.weight.data = dct.idct(I, norm=self.norm).data.t()
     self.weight.requires_grad = False  # don't learn this!
Exemplo n.º 14
0
 def forward(self, x):
     n, d = x.size()
     x = self.A * x  # first diagonal matrix
     x = self.pack(x)
     x = dct.dct(x)  # forward DCT
     x = self.unpack(x)
     x = self.D * x  # second diagonal matrix
     x = self.pack(x)
     x = self.riffle(x)
     x = dct.idct(x)  # inverse DCT
     x = self.unpack(x)
     if self.bias is not None:
         return x + self.bias
     else:
         return x
Exemplo n.º 15
0
 def reset_parameters(self):
     super(LinearACDC, self).reset_parameters()
     # this is probably not a good way to do this
     if 'A' not in self.__dict__.keys():
         self.A = nn.Parameter(torch.Tensor(self.out_features, 1))
         self.D = nn.Parameter(torch.Tensor(self.out_features, 1))
     self.A.data.normal_(1., 1e-2)
     self.D.data.normal_(1., 1e-2)
     # need to have DCT matrices stored for speed
     # they have to be Parameters so they'll be
     N = self.out_features
     self.dct = dct.dct(torch.eye(N))
     self.idct = dct.idct(torch.eye(N))
     # remove weight Parameter
     del self.weight
Exemplo n.º 16
0
    def forward(self, x):
        filt = dct.idct(F.pad(self.weight,
                              (0, self.index.size(0) - self.weight.size(1))),
                        norm='ortho')
        filt = filt[:, self.index.long()]
        filt = torch.reshape(
            filt, (self.no, self.ni, self.kernel_size, self.kernel_size))

        x = F.conv2d(x,
                     filt,
                     bias=self.bias,
                     stride=self.stride,
                     padding=self.padding,
                     groups=self.groups)
        return x
Exemplo n.º 17
0
 def reset_parameters(self):
     super(ConvACDC, self).reset_parameters()
     # this is probably not a good way to do this
     assert self.kernel_size[0] == self.kernel_size[
         1], "%s" % self.kernel_size
     N = self.out_channels * self.kernel_size[0]
     if 'A' not in self.__dict__.keys():
         self.A = nn.Parameter(torch.Tensor(N, 1))
         self.D = nn.Parameter(torch.Tensor(N, 1))
     self.A.data.normal_(1., 1e-2)
     self.D.data.normal_(1., 1e-2)
     # initialise DCT matrices
     self.dct = dct.dct(torch.eye(N))
     self.idct = dct.idct(torch.eye(N))
     # remove weight Parameter
     del self.weight
def spectral_convert(X, num_channels):
    X = torch.from_numpy(X).float()
    X_sq = X.squeeze()
    # Pad to spectral dimension
    padding = torch.zeros(num_channels - X_sq.size(0))
    padded_X = torch.cat((X_sq, padding))

    # Apply inverse dct
    log_spectral_X = dct.idct(padded_X)

    # Apply inverse log
    spectral_X = torch.exp(log_spectral_X)

    # Convert back to numpy
    spectral_X = spectral_X.detach().numpy()

    return spectral_X
Exemplo n.º 19
0
def isdct_torch(dcts,
                *,
                frame_step,
                frame_length=None,
                window=torch.hamming_window):
    """Compute Inverse Short-Time Discrete Cosine Transform of `dct`.

    Parameters other than `dcts` are keyword-only.

    Parameters
    ----------
    dcts : DCT matrix/matrices from `sdct_torch`

    frame_step : Number of samples between adjacent DCT columns (should be the
        same value that was passed to `sdct_torch`).

    frame_length : Ignored.  Window length and DCT frame length in samples.
        Can be None (default) or same value as passed to `sdct_torch`.

    window : Window to use for DCT.  Either a window tensor (see documentation for `torch.stft`),
        or a window tensor constructor, `window(frame_length) -> Tensor`.
        Default: hamming window.

    Returns
    -------
    signals : Time-domain signal(s) reconstructed from `dcts`, a `[..., n_samples]` tensor.
        Note that `n_samples` may be different from the original signals' lengths as passed to `sdct_torch`,
        because no padding is applied.
    """
    *_, frame_length2, n_frames = dcts.shape
    assert frame_length in {None, frame_length2}
    signals = torch_overlap_add(
        torch_dct.idct(dcts.transpose(-1, -2), norm="ortho").transpose(-1, -2),
        frame_step=frame_step,
    )
    if callable(window):
        window = window(frame_length2).to(signals)
    if window is not None:
        window_frames = window[:, None].expand(-1, n_frames)
        window_signal = torch_overlap_add(window_frames, frame_step=frame_step)
        signals = signals / window_signal
    return signals
Exemplo n.º 20
0
def test_cuda():
    if torch.cuda.is_available():
        device = torch.device('cuda:0')

        for N in [2, 5, 32, 111]:
            x = np.random.normal(size=(
                1,
                N,
            ))
            ref = fftpack.dct(x, type=1)
            act = dct.dct1(torch.tensor(x, device=device)).cpu().numpy()
            assert np.abs(ref - act).max() < EPS, ref

        for d in [2, 3, 4]:
            x = np.random.normal(size=(2, ) * d)
            ref = fftpack.dct(x, type=1)
            act = dct.dct1(torch.tensor(x, device=device)).cpu().numpy()
            assert np.abs(ref - act).max() < EPS, ref

        for norm in [None, 'ortho']:
            for N in [2, 3, 5, 32, 111]:
                x = np.random.normal(size=(
                    1,
                    N,
                ))
                ref = fftpack.dct(x, type=2, norm=norm)
                act = dct.dct(torch.tensor(x, device=device),
                              norm=norm).cpu().numpy()
                assert np.abs(ref - act).max() < EPS, (norm, N)

            for d in [2, 3, 4, 11]:
                x = np.random.normal(size=(2, ) * d)
                ref = fftpack.dct(x, type=2, norm=norm)
                act = dct.dct(torch.tensor(x, device=device),
                              norm=norm).cpu().numpy()
                assert np.abs(ref - act).max() < EPS, (norm, d)

            for N in [5, 2, 32, 111]:
                x = np.random.normal(size=(1, N))
                X = dct.dct(torch.tensor(x, device=device), norm=norm)
                y = dct.idct(X, norm=norm).cpu().numpy()
                assert np.abs(x - y).max() < EPS, x
Exemplo n.º 21
0
def t_product_multiprocess(A, B):
    tmp = torch_mp.get_context('spawn')
    
    assert(A.shape[0] == B.shape[0] and A.shape[2] == B.shape[1])
    dct_A = torch.transpose(dct.dct(torch.transpose(A, 0, 2)), 0, 2)
    dct_B = torch.transpose(dct.dct(torch.transpose(B, 0, 2)), 0, 2)
    dct_C = torch.zeros(A.shape[0], A.shape[1], B.shape[2])
    
    #dct_A.share_memory_()
    #dct_B.share_memory_()
    #dct_C.share_memory_()
    
    processes = []
    # num_cores = torch_mp.cpu_count()
    for i in range(dct_C.shape[0]):
        p = tmp.Process(target=t_product_slice, args=(dct_A, dct_B, dct_C, i))
        p.start()
        processes.append(p)
    for p in processes: 
        p.join()
     
    C = torch.transpose(dct.idct(torch.transpose(dct_C, 0, 2)), 0, 2)
    return C
Exemplo n.º 22
0
def image_idct(dct_x):
    """Inverts image_dct(), by performing a type-III DCT."""
    dct_x = torch.as_tensor(dct_x)
    dct_y = torch_dct.idct(torch.transpose(dct_x, 1, 2), norm='ortho')
    image = torch_dct.idct(torch.transpose(dct_y, 1, 2), norm='ortho')
    return image
Exemplo n.º 23
0
    def forward(self, p_vects, q_vects, p_frames_mask, q_frames_mask,
                num_phones_mask):
        '''
        p/q_vects = [num_speakers X num_feats X max_num_mfcc_frames x mfcc_dim]
        p/q_lengths = [num_speakers X num_feats] -> stores the number of observed
                                                    frames associated
                                                    with the corresponding phone
        p/q_frames_mask = [num_speakers X num_feats X max_num_mfcc_frames x mfcc_dim]
                          -> The associated 0s and 1s mask of p/q_lengths
        num_phones_mask = [num_speakers X num_feats],
        with a 0 corresponding to position that should be -1 (no phones observed)
        and a 1 everywhere else.
        n.b. mfcc_dim = 13 usually (using c0 for energy instead of log-energy)
             num_feats = 46*47*0.5 = 1128 usually
             max_num_mfcc_frames = the maximum number of frames associated
             with a particular phone for any speaker -> often set to 4000
        '''
        # Apply the attack
        noise = torch.exp(self.noise_root)

        # Need to add spectral noise
        # Pad to spectral dimension
        padding = torch.zeros(p_vects.size(0), p_vects.size(1),
                              p_vects.size(2), self.spectral_dim -
                              self.mfcc_dim).to(self.device)
        padded_p_vects = torch.cat((p_vects, padding), 3)
        padded_q_vects = torch.cat((q_vects, padding), 3)

        # Apply inverse dct
        log_spectral_p = dct.idct(padded_p_vects)
        log_spectral_q = dct.idct(padded_q_vects)

        # Apply inverse log
        spectral_p = torch.exp(log_spectral_p)
        spectral_q = torch.exp(log_spectral_q)

        # Add the adversarial attack noise
        attacked_spectral_p = spectral_p + noise
        attacked_spectral_q = spectral_q + noise

        # Apply the log
        attacked_log_spectral_p = torch.log(attacked_spectral_p)
        attacked_log_spectral_q = torch.log(attacked_spectral_q)

        # Apply the dct
        attacked_padded_p = dct.dct(attacked_log_spectral_p)
        attacked_padded_q = dct.dct(attacked_log_spectral_q)

        # Truncate to mfcc dimension
        p_vects_attacked = torch.narrow(attacked_padded_p, 3, 0, self.mfcc_dim)
        q_vects_attacked = torch.narrow(attacked_padded_q, 3, 0, self.mfcc_dim)

        # Apply mask of zeros/ones, to ensure spectral noise only applied up to p/q lengths
        p_vects_masked = p_vects_attacked * p_frames_mask
        q_vects_masked = q_vects_attacked * q_frames_mask

        # Compute the p/q_means tensor and covariance tensor
        p_means, p_covariances, q_means, q_covariances = self.get_pq_means_covs(
            p_vects_masked, q_vects_masked, p_frames_mask, q_frames_mask,
            num_phones_mask)

        # add small noise to all covariance matrices to ensure they are non-singular
        p_covariances_noised = p_covariances + (1e-2 *
                                                torch.eye(13).to(self.device))
        q_covariances_noised = q_covariances + (1e-2 *
                                                torch.eye(13).to(self.device))

        #        print(p_covariances_noised[0,3,:,:])
        #        print(q_covariances_noised[1,4,:,:])

        # Pass through trained model
        trained_model = torch.load(self.trained_model_path)
        trained_model.to(self.device)
        trained_model.eval()
        y = trained_model(p_means, p_covariances_noised, q_means,
                          q_covariances_noised, num_phones_mask)

        return y
Exemplo n.º 24
0
import torch
import torch_dct as dct

x = torch.randn(200)
X = dct.dct(x)  # DCT-II done through the last dimension
y = dct.idct(X)  # scaled DCT-III done through the last dimension
assert (torch.abs(x - y)).sum() < 1e-10  # x == y within numerical tolerance