Exemplo n.º 1
0
def test_stft_istft_identity(ctx, window_size, stride, fft_size, window_type,
                             center, pad_mode):
    backend = ctx.backend[0].split(":")[0]
    if backend == 'cuda':
        pytest.skip(
            'CUDA Convolution N-D is only supported in CUDNN extension')

    x_shape = create_stft_input_shape(window_size)
    x = np.random.randn(*x_shape)

    # Skip for NOLA condition violation
    length = x_shape[1]
    if is_nola_violation(window_type, window_size, stride, fft_size, length,
                         center):
        pytest.skip('NOLA condition violation.')
        return

    x = nn.Variable.from_numpy_array(x)
    with nn.context_scope(ctx):
        yr, yi = F.stft(x, window_size, stride, fft_size, window_type, center,
                        pad_mode)
        z = F.istft(yr,
                    yi,
                    window_size,
                    stride,
                    fft_size,
                    window_type,
                    center,
                    pad_mode="constant")
    z.forward()

    assert (np.allclose(x.d, z.d, atol=1e-5, rtol=1e-5))
Exemplo n.º 2
0
def istft_backward(inputs,
                   window_size,
                   stride,
                   fft_size,
                   window_type='hanning',
                   center=True,
                   pad_mode='reflect',
                   as_stft_backward=False):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]

    dx_r, dx_i = F.stft(dy,
                        window_size,
                        stride,
                        fft_size,
                        window_type,
                        center,
                        pad_mode,
                        as_istft_backward=not as_stft_backward)
    return dx_r, dx_i
Exemplo n.º 3
0
def test_istft(ctx, window_size, stride, fft_size, window_type, center):
    backend = ctx.backend[0].split(":")[0]
    if backend == 'cuda':
        pytest.skip('CUDA Convolution N-D is only supported in CUDNN extension')

    # clear all previous STFT conv/deconv kernels
    nn.clear_parameters()

    # Make sure that iSTFT(STFT(x)) = x
    x = np.random.randn(1, window_size * 10)

    nx = nn.Variable.from_numpy_array(x)
    with nn.context_scope(ctx):
        nyr, nyi = F.stft(nx,
                          window_size=window_size,
                          stride=stride,
                          fft_size=fft_size,
                          window_type=window_type,
                          center=center)
        nz = F.istft(nyr, nyi,
                     window_size=window_size,
                     stride=stride,
                     fft_size=fft_size,
                     window_type=window_type,
                     center=center)
    nz.forward()

    invalid = window_size - stride
    assert(np.allclose(nx.d[:, invalid:-invalid],
                       nz.d[:, invalid:-invalid],
                       atol=1e-5, rtol=1e-5))
Exemplo n.º 4
0
def test_stft(window_size, stride, fft_size, window_type):
    # clear all previous STFT conv/deconv kernels
    nn.clear_parameters()

    # Compare to `scipy.signal.stft` - only done if SciPy available
    x = np.random.randn(1, window_size * 10)

    nx = nn.Variable.from_numpy_array(x)
    nyr, nyi = F.stft(nx,
                      window_size=window_size,
                      stride=stride,
                      fft_size=fft_size,
                      window_type=window_type,
                      center=False)
    nn.forward_all([nyr, nyi])

    stft_nnabla = nyr.d + 1j * nyi.d
    _f, _t, stft_scipy = sig.stft(x,
                                  window=window_type,
                                  nperseg=window_size,
                                  noverlap=window_size - stride,
                                  nfft=fft_size,
                                  boundary=None,
                                  padded=False)

    # scipy does a different scaling - take care here
    stft_nnabla /= fft_size // 2

    assert (np.allclose(stft_nnabla, stft_scipy, atol=1e-5, rtol=1e-5))
Exemplo n.º 5
0
def test_istft(window_size, stride, fft_size, window_type, center):
    # clear all previous STFT conv/deconv kernels
    nn.clear_parameters()

    # Make sure that iSTFT(STFT(x)) = x
    x = np.random.randn(1, window_size * 10)

    nx = nn.Variable.from_numpy_array(x)
    nyr, nyi = F.stft(nx,
                      window_size=window_size,
                      stride=stride,
                      fft_size=fft_size,
                      window_type=window_type,
                      center=center)
    nz = F.istft(nyr,
                 nyi,
                 window_size=window_size,
                 stride=stride,
                 fft_size=fft_size,
                 window_type=window_type,
                 center=center)
    nz.forward()

    invalid = window_size - stride
    assert (np.allclose(nx.d[:, invalid:-invalid],
                        nz.d[:, invalid:-invalid],
                        atol=1e-5,
                        rtol=1e-5))
Exemplo n.º 6
0
 def compute_mel(self, wave):
     hp = self.hparams
     reals, imags = F.stft(wave,
                           window_size=hp.win_length,
                           stride=hp.hop_length,
                           fft_size=hp.n_fft)
     linear = F.pow_scalar(
         F.add2(F.pow_scalar(reals, 2), F.pow_scalar(imags, 2)), 0.5)
     mels = F.batch_matmul(self.basis, linear)
     mels = F.log(F.clip_by_value(mels, 1e-5,
                                  np.inf)).apply(need_grad=False)
     return mels
Exemplo n.º 7
0
    def __init__(self, waveglow, hp):
        mel_input = F.constant(shape=[1, hp.n_mels, 88])
        wave = waveglow.infer(mel_input, sigma=0)
        real, imag = F.stft(wave,
                            window_size=hp.win_length,
                            stride=hp.hop_length,
                            fft_size=hp.n_fft)
        bias_spec = F.pow_scalar(real**2 + imag**2, 0.5)
        bias_spec.forward(clear_buffer=True)

        self.bias_spec = bias_spec.d.copy()[:, :, 0][0, :, None]
        self.hparams = hp
Exemplo n.º 8
0
def ref_istft(y_r, y_i, window_size, stride, fft_size, window_type, center,
              pad_mode, as_stft_backward):
    if not as_stft_backward:
        # Use librosa.istft as the forward reference.

        # Convert to librosa.istft input format.
        y = y_r + 1j * y_i

        # Get original signal length.
        x_shape = create_stft_input_shape(window_size)
        length = x_shape[1]

        # librosa.istft does not support batched input.
        b = y.shape[0]
        xs = []
        for i in range(b):
            x = librosa.istft(y[i],
                              hop_length=stride,
                              win_length=window_size,
                              window=window_type,
                              center=center,
                              length=length)
            xs.append(x)
        return np.array(xs)
    else:
        # Use F.stft backward as the reference

        y_r = nn.Variable.from_numpy_array(y_r)
        y_i = nn.Variable.from_numpy_array(y_i)

        # Just create stft inputs
        x = F.istft(y_r, y_i, window_size, stride, fft_size, window_type,
                    center, pad_mode, True)

        # Execute istft backward
        x.need_grad = True
        x.grad.zero()
        z_r, z_i = F.stft(x, window_size, stride, fft_size, window_type,
                          center, pad_mode)

        z_r.g = y_r.d
        z_i.g = y_i.d
        z = F.sink(z_r, z_i, one_input_grad=False)
        z.forward()
        z.backward()

        return x.g
Exemplo n.º 9
0
def ref_stft(x, window_size, stride, fft_size, window_type, center, pad_mode,
             as_istft_backward):
    if not as_istft_backward:
        # Use librosa.stft as the forward reference.

        # librosa.stft does not support batched input.
        window_type = 'hann' if window_type == 'hanning' else window_type
        b = x.shape[0]
        ys = []
        for i in range(b):
            y = librosa.stft(x[i],
                             n_fft=fft_size,
                             hop_length=stride,
                             win_length=window_size,
                             window=window_type,
                             center=center,
                             pad_mode=pad_mode)
            ys.append(y)

        # Convert to nnabla stft output format
        ys = np.array(ys)
        y_r = ys.real
        y_i = ys.imag

        return y_r, y_i
    else:
        # Use F.istft backward as the reference

        x = nn.Variable.from_numpy_array(x)

        # Just create istft inputs
        y_r, y_i = F.stft(x, window_size, stride, fft_size, window_type,
                          center, pad_mode)

        # Execute istft backward
        y_r.need_grad = True
        y_i.need_grad = True
        y_r.grad.zero()
        y_i.grad.zero()
        z = F.istft(y_r, y_i, window_size, stride, fft_size, window_type,
                    center, pad_mode)

        z.forward()
        z.backward(x.data)

        return y_r.g, y_i.g
Exemplo n.º 10
0
def test_stft(ctx, window_size, stride, fft_size, window_type):
    backend = ctx.backend[0].split(":")[0]
    if backend == 'cuda':
        pytest.skip('CUDA Convolution N-D is only supported in CUDNN extension')

    # clear all previous STFT conv/deconv kernels
    nn.clear_parameters()

    # Compare to `scipy.signal.stft` - only done if SciPy available
    x = np.random.randn(1, window_size * 10)

    nx = nn.Variable.from_numpy_array(x)

    with nn.context_scope(ctx):
        nyr, nyi = F.stft(nx,
                          window_size=window_size,
                          stride=stride,
                          fft_size=fft_size,
                          window_type=window_type,
                          center=False)
    nn.forward_all([nyr, nyi])

    stft_nnabla = nyr.d + 1j * nyi.d

    window_type_scipy = window_type
    if window_type == 'rectangular' or window_type is None:
        window_type_scipy = 'boxcar'

    _f, _t, stft_scipy = sig.stft(x,
                                  window=window_type_scipy,
                                  nperseg=window_size,
                                  noverlap=window_size-stride,
                                  nfft=fft_size,
                                  boundary=None,
                                  padded=False)

    # scipy does a different scaling - take care here
    stft_nnabla /= fft_size // 2

    assert(np.allclose(stft_nnabla,
                       stft_scipy,
                       atol=1e-5, rtol=1e-5))
Exemplo n.º 11
0
def stft(x, n_fft=4096, n_hop=1024, center=True, patch_length=None):
    '''
    Multichannel STFT
    Input: (nb_samples, nb_channels, nb_timesteps)
    Output: (nb_samples, nb_channels, nb_bins, nb_frames),
            (nb_samples, nb_channels, nb_bins, nb_frames)
    '''
    nb_samples, nb_channels, _ = x.shape
    x = F.reshape(x, (nb_samples * nb_channels, -1))
    real, imag = F.stft(x,
                        n_fft,
                        n_hop,
                        n_fft,
                        window_type='hanning',
                        center=center,
                        pad_mode='reflect')
    real = F.reshape(real, (nb_samples, nb_channels, n_fft // 2 + 1, -1))
    imag = F.reshape(imag, (nb_samples, nb_channels, n_fft // 2 + 1, -1))

    if patch_length is not None:
        # slice 256(patch_length) frames from 259 frames
        return real[..., :patch_length], imag[..., :patch_length]
    return real, imag
Exemplo n.º 12
0
def STFT(x, n_fft=4096, n_hop=1024, center=True):
    """Multichannel STFT

    Input: (nb_samples, nb_channels, nb_timesteps)
    Output: (nb_samples, nb_channels, nb_bins, nb_frames), 
            (nb_samples, nb_channels, nb_bins, nb_frames)
    """
    nb_samples, nb_channels, _ = x.shape
    x = F.reshape(x, (nb_samples * nb_channels, -1))

    real, imag = F.stft(x,
                        n_fft,
                        n_hop,
                        n_fft,
                        window_type='hanning',
                        center=center,
                        pad_mode='reflect')

    real = F.reshape(real, (nb_samples, nb_channels, n_fft // 2 + 1, -1))

    imag = F.reshape(imag, (nb_samples, nb_channels, n_fft // 2 + 1, -1))

    return real, imag