Пример #1
0
def extend_dataset(y, sr):

    #return (y,)

    # Make 2x faster
    D = librosa.stft(y, n_fft=2048, hop_length=512)

    D_fast = librosa.phase_vocoder(D, 2.0, hop_length=512)
    y_fast = librosa.istft(D_fast, hop_length=512)

    # Concatenate two 2x frames together
    y_fast = append(y_fast, y_fast)

    # Make 2x slower
    D_slow = librosa.phase_vocoder(D, 0.5, hop_length=512)
    y_slow = librosa.istft(D_slow, hop_length=512)

    # split two 0.5x frames together
    y_slow1, y_slow2 = split(y_slow, 2)

    ## Frequency scaling
    #y_pitch_up = librosa.effects.pitch_shift(y, sr, n_steps=4)
    #y_pitch_down = librosa.effects.pitch_shift(y, sr, n_steps=-4)

    samples = min([len(y), len(y_fast), len(y_slow1), len(y_slow2)])
    y = y[:samples]
    y_fast = y_fast[:samples]
    y_slow1 = y_slow1[:samples]
    y_slow2 = y_slow2[:samples]

    return (y, y_fast, y_slow1, y_slow2)
Пример #2
0
def extend_dataset(y, sr):

        #return (y,)

	# Make 2x faster
	D       = librosa.stft(y, n_fft=2048, hop_length=512)

	D_fast  = librosa.phase_vocoder(D, 2.0, hop_length=512)
	y_fast  = librosa.istft(D_fast, hop_length=512)

	# Concatenate two 2x frames together
	y_fast = append(y_fast, y_fast)

	# Make 2x slower
	D_slow  = librosa.phase_vocoder(D, 0.5, hop_length=512)
	y_slow  = librosa.istft(D_slow, hop_length=512)

	# split two 0.5x frames together
	y_slow1, y_slow2 = split(y_slow, 2)

	## Frequency scaling
	#y_pitch_up = librosa.effects.pitch_shift(y, sr, n_steps=4)
	#y_pitch_down = librosa.effects.pitch_shift(y, sr, n_steps=-4)

        samples = min([len(y), len(y_fast), len(y_slow1), len(y_slow2)])
        y = y[:samples]
        y_fast = y_fast[:samples]
        y_slow1 = y_slow1[:samples]
        y_slow2 = y_slow2[:samples]

	return (y, y_fast, y_slow1, y_slow2)
Пример #3
0
def test_phase_vocoder(y_multi, rate):
    y, sr = y_multi
    D = librosa.stft(y)

    D0 = librosa.phase_vocoder(D[0], rate=rate)
    D1 = librosa.phase_vocoder(D[1], rate=rate)
    D2 = librosa.phase_vocoder(D, rate=rate)

    assert np.allclose(D2[0], D0)
    assert np.allclose(D2[1], D1)
    assert not np.allclose(D2[0], D2[1])
    def test_phase_vocoder(self, rate, test_pseudo_complex):
        hop_length = 256
        num_freq = 1025
        num_frames = 400
        torch.random.manual_seed(42)

        # Due to cummulative sum, numerical error in using torch.float32 will
        # result in bottom right values of the stretched sectrogram to not
        # match with librosa.
        spec = torch.randn(num_freq,
                           num_frames,
                           device=self.device,
                           dtype=torch.complex128)
        phase_advance = torch.linspace(0,
                                       np.pi * hop_length,
                                       num_freq,
                                       device=self.device,
                                       dtype=torch.float64)[..., None]

        stretched = F.phase_vocoder(
            torch.view_as_real(spec) if test_pseudo_complex else spec,
            rate=rate,
            phase_advance=phase_advance)

        expected_stretched = librosa.phase_vocoder(spec.cpu().numpy(),
                                                   rate=rate,
                                                   hop_length=hop_length)

        self.assertEqual(
            torch.view_as_complex(stretched)
            if test_pseudo_complex else stretched,
            torch.from_numpy(expected_stretched))
Пример #5
0
def stretch_demo(input_file, output_file, speed):
    '''Phase-vocoder time stretch demo function.

    :parameters:
      - input_file : str
          path to input audio
      - output_file : str
          path to save output (wav)
      - speed : float > 0
          speed up by this factor
    '''

    N_FFT = 2048
    HOP_LENGTH = N_FFT / 4

    # 1. Load the wav file, resample
    print 'Loading ', input_file

    y, sr = librosa.load(input_file)

    # 2. generate STFT @ 2048 samples
    print 'Computing short-time fourier transform... '
    D = librosa.stft(y, n_fft=N_FFT, hop_length=HOP_LENGTH)

    print 'Playing back at %3.f%% speed' % (speed * 100)
    D_stretch = librosa.phase_vocoder(D, speed, hop_length=HOP_LENGTH)

    y_stretch = librosa.istft(D_stretch, hop_length=HOP_LENGTH)

    print 'Saving stretched audio to: ', output_file
    librosa.output.write_wav(output_file, y_stretch, sr)
Пример #6
0
def stretch_demo(input_file, output_file, speed):
    '''Phase-vocoder time stretch demo function.

    :parameters:
      - input_file : str
          path to input audio
      - output_file : str
          path to save output (wav)
      - speed : float > 0
          speed up by this factor
    '''

    N_FFT       = 2048
    HOP_LENGTH  = N_FFT /4

    # 1. Load the wav file, resample
    print 'Loading ', input_file

    y, sr = librosa.load(input_file)

    # 2. generate STFT @ 2048 samples
    print 'Computing short-time fourier transform... '
    D = librosa.stft(y, n_fft=N_FFT, hop_length=HOP_LENGTH)

    print 'Playing back at %3.f%% speed' % (speed * 100)
    D_stretch = librosa.phase_vocoder(D, speed, hop_length=HOP_LENGTH)

    y_stretch = librosa.istft(D_stretch, hop_length=HOP_LENGTH)

    print 'Saving stretched audio to: ', output_file
    librosa.output.write_wav(output_file, y_stretch, sr)
Пример #7
0
def test_phase_vocoder(complex_specgrams, rate, hop_length):
    # Due to cummulative sum, numerical error in using torch.float32 will
    # result in bottom right values of the stretched sectrogram to not
    # match with librosa.

    complex_specgrams = complex_specgrams.type(torch.float64)
    phase_advance = torch.linspace(0,
                                   np.pi * hop_length,
                                   complex_specgrams.shape[-3],
                                   dtype=torch.float64)[..., None]

    complex_specgrams_stretch = F.phase_vocoder(complex_specgrams,
                                                rate=rate,
                                                phase_advance=phase_advance)

    # == Test shape
    expected_size = list(complex_specgrams.size())
    expected_size[-2] = int(np.ceil(expected_size[-2] / rate))

    assert complex_specgrams.dim() == complex_specgrams_stretch.dim()
    assert complex_specgrams_stretch.size() == torch.Size(expected_size)

    # == Test values
    index = [0] * (complex_specgrams.dim() - 3) + [slice(None)] * 3
    mono_complex_specgram = complex_specgrams[index].numpy()
    mono_complex_specgram = mono_complex_specgram[..., 0] + \
        mono_complex_specgram[..., 1] * 1j
    expected_complex_stretch = librosa.phase_vocoder(mono_complex_specgram,
                                                     rate=rate,
                                                     hop_length=hop_length)

    complex_stretch = complex_specgrams_stretch[index].numpy()
    complex_stretch = complex_stretch[..., 0] + 1j * complex_stretch[..., 1]

    assert np.allclose(complex_stretch, expected_complex_stretch, atol=1e-5)
Пример #8
0
def time_shift(wav, tg_lngth):
    """
    Shifts the audio length while not affecting pitch by using a phase
    vocoder
    :param wav: The audio to shift
    :param tg_lngth: The target length of the audio to be shifted, measured
    in terms of number of samples
    :return: A timeshifted audio file that is the length of tg_length
    """
    D = stft(wav)
    shift_factor = tg_lngth / len(wav)
    D_shifted = phase_vocoder(D, shift_factor)
    return istft(D_shifted)
Пример #9
0
def freq_shift(wav, fq, tg_fq, sample_rate):
    """
   Shifts the given audio from its current frequency the target frequency
   :param wav: The audio to shift
   :param fq: The frequency of the current audio
   :param tg_fq: The target frequency to shift to (in Hz)
   :param sample_rate: The sampling rate used for the audio when imported
   by librosa
   :return: A shifted frequency audio sample to the given target frequency
   """
    D = stft(wav)
    shift_factor = 1 + (fq - tg_fq) / fq
    D_shifted = phase_vocoder(D, shift_factor)
    x_shifted = istft(D_shifted)
    return resample(x_shifted, sample_rate, int(sample_rate / shift_factor))
Пример #10
0
def timemap_stretch(x, sr, path, hop_length=32, n_fft=4096):
    """
    Stretch audio x so that it aligns with another
    audio clip, according to a warping path

    Parameters
    ----------
    x: ndarray(N)
        An array of audio samples
    sr: int
        Sample rate
    path: ndarray(K, 2)
        Warping path.  Indices of x are in first row
    hop_length: int
        Hop length to use in the phase vocoder
    n_fft: int
        Number of fft samples to use in the phase vocoder
    """
    # Break down into regions of constant slope
    xdiff = path[1::, 0] - path[0:-1, 0]
    ydiff = path[1::, 1] - path[0:-1, 1]
    xdiff = xdiff[1::] - xdiff[0:-1]
    ydiff = ydiff[1::] - ydiff[0:-1]
    diff = xdiff + ydiff
    ret = np.array([])
    i1 = 0
    while i1 < len(diff):
        i2 = i1 + 1
        while i2 < len(diff) and diff[i2] == 0:
            i2 += 1
        while i2 < len(diff) and path[i2, 0] - path[i1, 0] < n_fft:
            i2 += 1
        if i2 >= len(diff):
            break
        fac = (path[i2, 1] - path[i1, 1]) / (path[i2, 0] - path[i1, 0])
        if fac > 0:
            fac = 1 / fac
            xi = x[path[i1, 0]:path[i2, 0] + 1]
            D = librosa.stft(xi, n_fft=n_fft, hop_length=hop_length)
            DNew = librosa.phase_vocoder(D, fac, hop_length=hop_length)
            xifast = librosa.istft(DNew, hop_length=hop_length)
            ret = np.concatenate((ret, xifast))
        i1 = i2
    return ret
Пример #11
0
def beat_stretch(D, beats_orig, beats_target):

    D_out = np.empty((D.shape[0], beats_target[-1]), dtype=D.dtype)

    # Compute beat deltas

    db_orig = np.diff(beats_orig)
    db_target = np.diff(beats_target)

    t = 0

    for (i, delta) in enumerate(db_target):
        Dslice = D[:, beats_orig[i]:beats_orig[i + 1]]

        Dnew = librosa.phase_vocoder(Dslice, float(db_orig[i]) / delta)

        D_out[:, t:t + delta] = Dnew[:, :delta]

        t = t + delta

    return D_out
Пример #12
0
def beat_stretch(D, beats_orig, beats_target):
    
    D_out = np.empty( (D.shape[0], beats_target[-1]), dtype=D.dtype)
    
    # Compute beat deltas
    
    db_orig   = np.diff(beats_orig)
    db_target = np.diff(beats_target)
    
    t = 0

    for (i, delta) in enumerate(db_target):
        Dslice = D[:, beats_orig[i]:beats_orig[i+1]]
        
        Dnew   = librosa.phase_vocoder(Dslice, float(db_orig[i])/delta)
        
        D_out[:, t:t+delta] = Dnew[:,:delta]
        
        t = t + delta
        
    return D_out
Пример #13
0
def speedDown(y, n_step=0.5):
    y_D = librosa.stft(y)
    y_D_slow = librosa.phase_vocoder(y_D, n_step)
    y_slower = librosa.istft(y_D_slow)
    return y_slower
Пример #14
0
def speedUp(y, n_step=2):
    y_D = librosa.stft(y)
    y_D_fast = librosa.phase_vocoder(y_D, n_step)
    y_faster = librosa.istft(y_D_fast)
    return y_faster
Пример #15
0
 def speed(self, sp):
     tmp = librosa.stft(self.y, n_fft=2048, hop_length=512)
     tmp_speed = librosa.phase_vocoder(tmp, sp, hop_length=512)
     return librosa.istft(tmp_speed, hop_length=512)
Пример #16
0
def vocode(x, y, rate=2.0):
    return librosa.phase_vocoder(x, rate), y
Пример #17
0
def test_phase_vocoder(complex_specgrams, rate, hop_length):

    # Using a decorator here causes parametrize to fail on Python 2
    if not IMPORT_LIBROSA:
        raise unittest.SkipTest('Librosa is not available')

    # Due to cummulative sum, numerical error in using torch.float32 will
    # result in bottom right values of the stretched sectrogram to not
    # match with librosa.

    complex_specgrams = complex_specgrams.type(torch.float64)
    phase_advance = torch.linspace(0,
                                   np.pi * hop_length,
                                   complex_specgrams.shape[-3],
                                   dtype=torch.float64)[..., None]

    complex_specgrams_stretch = F.phase_vocoder(complex_specgrams,
                                                rate=rate,
                                                phase_advance=phase_advance)

    # == Test shape
    expected_size = list(complex_specgrams.size())
    expected_size[-2] = int(np.ceil(expected_size[-2] / rate))

    assert complex_specgrams.dim() == complex_specgrams_stretch.dim()
    assert complex_specgrams_stretch.size() == torch.Size(expected_size)

    # == Test values
    index = [0] * (complex_specgrams.dim() - 3) + [slice(None)] * 3
    mono_complex_specgram = complex_specgrams[index].numpy()
    mono_complex_specgram = mono_complex_specgram[..., 0] + \
        mono_complex_specgram[..., 1] * 1j
    expected_complex_stretch = librosa.phase_vocoder(mono_complex_specgram,
                                                     rate=rate,
                                                     hop_length=hop_length)

    complex_stretch = complex_specgrams_stretch[index].numpy()
    complex_stretch = complex_stretch[..., 0] + 1j * complex_stretch[..., 1]

    assert np.allclose(complex_stretch, expected_complex_stretch, atol=1e-5)

    def test_torchscript_create_fb_matrix(self):

        n_stft = 100
        f_min = 0.0
        f_max = 20.0
        n_mels = 10
        sample_rate = 16000

        _test_torchscript_functional(F.create_fb_matrix, n_stft, f_min, f_max,
                                     n_mels, sample_rate)

    def test_torchscript_amplitude_to_DB(self):

        spec = torch.rand((6, 201))
        multiplier = 10.0
        amin = 1e-10
        db_multiplier = 0.0
        top_db = 80.0

        _test_torchscript_functional(F.amplitude_to_DB, spec, multiplier, amin,
                                     db_multiplier, top_db)

    def test_torchscript_create_dct(self):

        n_mfcc = 40
        n_mels = 128
        norm = "ortho"

        _test_torchscript_functional(F.create_dct, n_mfcc, n_mels, norm)

    def test_torchscript_mu_law_encoding(self):

        tensor = torch.rand((1, 10))
        qc = 256

        _test_torchscript_functional(F.mu_law_encoding, tensor, qc)

    def test_torchscript_mu_law_decoding(self):

        tensor = torch.rand((1, 10))
        qc = 256

        _test_torchscript_functional(F.mu_law_decoding, tensor, qc)

    def test_torchscript_complex_norm(self):

        complex_tensor = torch.randn(1, 2, 1025, 400, 2),
        power = 2

        _test_torchscript_functional(F.complex_norm, complex_tensor, power)

    def test_mask_along_axis(self):

        specgram = torch.randn(2, 1025, 400),
        mask_param = 100
        mask_value = 30.
        axis = 2

        _test_torchscript_functional(F.mask_along_axis, specgram, mask_param,
                                     mask_value, axis)

    def test_mask_along_axis_iid(self):

        specgram = torch.randn(2, 1025, 400),
        specgrams = torch.randn(4, 2, 1025, 400),
        mask_param = 100
        mask_value = 30.
        axis = 2

        _test_torchscript_functional(F.mask_along_axis_iid, specgrams,
                                     mask_param, mask_value, axis)

    def test_torchscript_gain(self):
        tensor = torch.rand((1, 1000))
        gainDB = 2.0

        _test_torchscript_functional(F.gain, tensor, gainDB)

    def test_torchscript_dither(self):
        tensor = torch.rand((1, 1000))

        _test_torchscript_functional(F.dither, tensor)
        _test_torchscript_functional(F.dither, tensor, "RPDF")
        _test_torchscript_functional(F.dither, tensor, "GPDF")
import librosa
from IPython import get_ipython
from pydub import AudioSegment
from pydub.playback import play
warnings.filterwarnings('ignore')

path = r'/Users/peterzuker/Desktop/Audio Modification/10047/model_input/spells/1/exemplars/1499777912068.wav'

#reload the audio to use librosa's expected format
lr_speech_data, lr_speech_rate = librosa.load(path)

stretched = librosa.effects.time_stretch(lr_speech_data, 1.47)

y, sr = librosa.load(path)
D = librosa.stft(y, n_fft=2048, hop_length=512)
D_slow = librosa.phase_vocoder(D, 1. / 3, hop_length=512)
y_slow = librosa.istft(D_slow, hop_length=512)

wavfile.write('test.wav', y_slow, D_slow)

rate, data = wavfile.read(path)
sound = AudioSegment.from_file(path, format="wav")

play(sound)


def remove_silence(audio, threshold):
    #identify all samples with an absolute value greater than the threshold
    greater_index = numpy.greater(numpy.absolute(audio), threshold)
    #filter to only include the identified samples
    above_threshold_data = audio[greater_index]