def test_resample_all(): """ Tests that a properly structured directory of labelled audio files is successfully resampled at the desired rate, saved as a different file type, and written to the desired new or old location. :return: """ aud1, sr = librosa.load(librosa.ex('trumpet')) aud2, _ = librosa.load(librosa.ex('nutcracker')) # setup mock mnist style dataset file structure rand_loc1 = ''.join(random.choices(string.ascii_letters, k=6)) rand_loc2 = ''.join(random.choices(string.ascii_letters, k=6)) rand_loc1 = os.path.join(ROOT_DIR, rand_loc1) rand_loc2 = os.path.join(ROOT_DIR, rand_loc2) sub_dir1 = os.path.join(rand_loc1, 'l1') sub_dir2 = os.path.join(rand_loc1, 'l2') save1 = os.path.join(sub_dir1, 'test1.m4a') save2 = os.path.join(sub_dir2, 'test2.m4a') # write '.m4a' data to mock file structure os.makedirs(sub_dir1, exist_ok=True) os.makedirs(sub_dir2, exist_ok=True) with SoundFile(save1, 'w', sr, channels=1, format='WAV') as f1: f1.write(aud1) with SoundFile(save2, 'w', sr, channels=1, format='WAV') as f2: f2.write(aud2) # verify new files of type '.wav' save in same/old directory resample_all(rand_loc1, rand_loc1, sr) assert os.path.isfile(os.path.join( sub_dir1, 'rs_test1.wav')), "File not saved to old directory" assert os.path.isfile(os.path.join( sub_dir2, 'rs_test2.wav')), "File not saved to old directory" # verify new files of '.wav' are saved in new/different directory resample_all(rand_loc1, rand_loc2, sr) assert os.path.isfile( os.path.join(rand_loc2, sub_dir1, 'rs_test1.wav')), "File not saved to new directory" assert os.path.isfile( os.path.join(rand_loc2, sub_dir2, 'rs_test2.wav')), "File not saved to new directory" assert os.path.isfile(os.path.join( ROOT_DIR, 'manifest.txt')), "Manifest of resampled files not generated" # Delete all generated test directories and files. shutil.rmtree(rand_loc1) shutil.rmtree(rand_loc2) os.remove(os.path.join(ROOT_DIR, 'manifest.txt'))
def rootdir(): f1 = li.ex('trumpet') f2 = li.ex('nutcracker') f3 = li.ex('vibeace') df = pd.DataFrame({ 'Title': ['a', 'b', 'c'], 'URL': [None, None, None], 'Filename': [f1, f2, f3], 'Date': [None, None, None], 'Speakers': [1, 2, 3] }) df.to_csv('test.csv', index=False) return os.path.dirname(f1)
def test_clip_audio(): """ Test to ensure audio file is clipped to correct length and written to disk successfully. :return: """ filename = librosa.ex('nutcracker') audio, sr = librosa.load(filename) length = len(audio) / sr clip = length // 2 extend = int(length * 2) aud1, sr1 = clip_audio(audio, clip, sr) aud2, sr2 = clip_audio(audio, extend, sr) assert len( aud1 ) / sr1 == clip, "Number of sample points does not meet meet expected length for clipped audio" assert len( aud2 ) / sr2 == extend, "Number of sample points does not meet meet expected length for extended audio" rand_loc = ''.join(random.choices(string.ascii_letters, k=6)) save_to = os.path.join(ROOT_DIR, rand_loc, 'test.wav') clip_audio(audio, extend, sr, save_to=save_to) assert os.path.exists(save_to), "Save to path was unsuccessful" assert os.path.isfile(save_to), "File did not save successfully" shutil.rmtree(os.path.dirname(save_to))
def get_spectrogram(wav): y, sr = librosa.load(librosa.ex('trumpet')) # Get the magnitude spectrogram S = np.abs(librosa.stft(y)) # Invert using Griffin-Lim y_inv = librosa.griffinlim(S) # Invert without estimating phase y_istft = librosa.istft(S) return S
def fft_example(): t = np.arange(256) freq = np.fft.fftfreq(t.shape[-1]) S = np.fft.fft(np.sin(t)) fig, ax = plt.subplots(nrows=2, ncols=1, sharey=True) ax[0].plot(freq, S.real) ax[0].set(title='Real', xlabel='Frequency') ax[1].plot(freq, S.imag) ax[1].set(title='Imaginary', xlabel='Frequency') plt.tight_layout() #-------------------- t = np.arange(400) n = np.zeros((400,), dtype=complex) n[40:60] = np.exp(1j * np.random.uniform(0, 2 * np.pi, (20,))) s = np.fft.ifft(n) fig, ax = plt.subplots(nrows=2, ncols=1, sharey=True) ax[0].plot(t, s.real) ax[0].set(title='Real', xlabel='Time') ax[1].plot(t, s.imag) ax[1].set(title='Imaginary', xlabel='Time') #-------------------- if True: t = np.arange(1024) y = 12 * np.sin(t) + 20 * np.sin(10 * t) + 7 * np.sin(25 * t) + np.random.randn(t.shape[-1]) else: import librosa y, sr = librosa.load(librosa.ex('trumpet')) t = np.arange(len(y)) / sr S = np.fft.fft(y) y_hat = np.fft.ifft(S) S_mag = np.abs(S) y_mag_hat = np.fft.ifft(S_mag) S_phase = np.exp(1.0j * np.angle(S)) y_phase_hat = np.fft.ifft(S_phase) fig, ax = plt.subplots(nrows=2, ncols=2, sharey=True) ax[0, 0].plot(t, y) ax[0, 0].set(title='$y$') ax[0, 1].plot(t, y_hat) ax[0, 1].set(title='$\hat{y}$') ax[1, 0].plot(t, y_mag_hat) ax[1, 0].set(title='$\hat{y}_{mag}$') ax[1, 1].plot(t, y_phase_hat) ax[1, 1].set(title='$\hat{y}_{phase}$') plt.tight_layout() plt.show()
def test_resample(): """ Test to ensure audio file is resampled and written to disk successfully. :return: """ filename = librosa.ex('nutcracker') rand_loc = ''.join(random.choices(string.ascii_letters, k=6)) save_to = os.path.join(ROOT_DIR, rand_loc, 'test.wav') resample(filename, save_to, sr=22050) assert os.path.exists( save_to), "Expected path of saved file does not exist" assert os.path.isfile(save_to), "File did not save successfully" assert os.path.exists(os.path.join( ROOT_DIR, 'manifest.txt')), "Resampling log not successfully generated" shutil.rmtree(os.path.dirname(save_to)) os.remove(os.path.join(ROOT_DIR, 'manifest.txt'))
def melspectrogram_test(): y, sr = librosa.load(librosa.ex('trumpet')) if True: # If a time-series input y, sr is provided, then its magnitude spectrogram S is first computed, and then mapped onto the mel scale by mel_f.dot(S**power). S = librosa.feature.melspectrogram(y=y, sr=sr) # Passing through arguments to the Mel filters. #S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000) #S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=512, n_mels=128, fmax=8000) #S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=512, n_mels=128, fmax=8000, htk=True) else: # If a spectrogram input S is provided, then it is mapped directly onto the mel basis by mel_f.dot(S). D = np.abs(librosa.stft(y))**2 S = librosa.feature.melspectrogram(S=D, sr=sr) #-------------------- plt.figure(figsize=(10, 4)) S_dB = librosa.power_to_db(S, ref=np.max) librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000) plt.colorbar(format='%+2.0f dB') plt.title('Mel-frequency Spectrogram') plt.tight_layout() plt.show()
# Code source: Brian McFee # License: ISC ################## # Standard imports import numpy as np import matplotlib.pyplot as plt from IPython.display import Audio import librosa import librosa.display ############################################# # Load an example with vocals. y, sr = librosa.load(librosa.ex('fishin'), duration=120) # And compute the spectrogram magnitude and phase S_full, phase = librosa.magphase(librosa.stft(y)) # Play back a 5-second excerpt with vocals Audio(data=y[10 * sr:15 * sr], rate=sr) ####################################### # Plot a 5-second slice of the spectrum idx = slice(*librosa.time_to_frames([10, 15], sr=sr)) fig, ax = plt.subplots() img = librosa.display.specshow(librosa.amplitude_to_db(S_full[:, idx], ref=np.max), y_axis='log', x_axis='time',
# sphinx_gallery_thumbnail_number = 15 # %% # All of librosa's plotting functions rely on matplotlib. # To demonstrate everything we can do, it will help to # import matplotlib's pyplot API here. import numpy as np import matplotlib.pyplot as plt import librosa import librosa.display # %% # First, we'll load in a demo track y, sr = librosa.load(librosa.ex('trumpet')) # %% # The first thing we might want to do is display an ordinary # (linear) spectrogram. # We'll do this by first computing the short-time Fourier # transform, and then mapping the magnitudes to a decibel # scale. # D = librosa.stft(y) # STFT of y S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max) # %% # If you're familiar with matplotlib already, you may know
y_axis='log', ax=ax[0]) ax[0].set(title='STFT (escala log)') ax[0].set(xlabel=None) # No segundo subplot exibe o espectograma da escala mel img2 = librosa.display.specshow(M_db, x_axis='time', y_axis='mel', ax=ax[1]) ax[1].set(title='Melspectograma') ax[1].set(xlabel=None) # No terceiro subplot exibe a formula de onda img3 = librosa.display.waveplot(y, sr=sr, ax=ax[2]) ax[2].set(title='Waveform') ax[2].set(xlabel=None) ax[2].set(ylabel='Hz') fig.colorbar(img1, ax=ax[0], format="%+2.f dB") fig.colorbar(img2, ax=ax[1], format="%+2.f dB") fig.colorbar(img3, ax=ax[2], format="%+2.f dB") plt.show() if __name__ == '__main__': # Precisa passar um caminho válido de uma música ou um arquivo de exemplo, tipo # print_spectograms("songs/Guns N' Roses - Welcome To The Jungle.webm") print_spectograms(librosa.ex("choice"))
<http://www.terasoft.com.tw/conf/ismir2014/proceedings/T110_127_Paper.pdf>`_. """ import numpy as np import matplotlib.pyplot as plt from IPython.display import Audio import librosa import librosa.display ######################## # Load an example clip with harmonics and percussives y, sr = librosa.load(librosa.ex('fishin'), duration=5, offset=10) Audio(data=y, rate=sr) ############################################### # Compute the short-time Fourier transform of y D = librosa.stft(y) ##################################################### # Decompose D into harmonic and percussive components # # :math:`D = D_\text{harmonic} + D_\text{percussive}` D_harmonic, D_percussive = librosa.decompose.hpss(D) #################################################################### # We can plot the two components along with the original spectrogram
def stft_test(): filepath = librosa.ex('nutcracker') #filepath = librosa.ex('trumpet') #y, sr = librosa.load(filepath) y, sr = librosa.load(filepath, sr=None, mono=True) #y, sr = librosa.load(filepath, sr=22050, mono=True, offset=0.0, duration=None, dtype=np.float32, res_type='kaiser_best') print('Audio time-series: shape = {}, dtype = {}.'.format(y.shape, y.dtype)) # 'nutcracker': (2643264,), 'trumpet': (117601,) print('Sampling rate = {}.'.format(sr)) #-------------------- # The STFT represents a signal in the time-frequency domain by computing discrete Fourier transforms (DFT) over short overlapping windows. #D = librosa.stft(y) D = librosa.stft(y, n_fft=2048, hop_length=None, win_length=None, window='hann', center=True, dtype=None, pad_mode='constant') # The shape of D = (1 + floor(n_fft / 2), ceil(len(y) / hop_length)). # hop_length = win_length // 4 = n_fft // 4 (default). # n_fft D # 'nutcracker' 'trumpet' # 256 (129, 41302) (129, 1838) # 512 (257, 20651) (257, 919) # 1024 (513, 10326) (513, 460) # 2048 (1025, 5163) (1025, 230) # 4096 (2049, 2582) (2049, 115) # 8192 (4097, 1291) (4097, 58) print('STFT: shape = {}, dtype = {}.'.format(D.shape, D.dtype)) # Separate a complex-valued spectrogram D into its magnitude (S) and phase (P) components. D_mag, D_phase = librosa.magphase(D, power=1) # mag = np.abs(D)**power, phase = np.exp(1.0j * np.angle(D)). D_phase_angle = np.angle(D_phase) # The phase angle. [rad]. magnitude = np.abs(D) #magnitude = np.abs(D)**2 phase_angle = np.angle(D) print('STFT magitude #1: shape = {}, dtype = {}.'.format(D_mag.shape, D_mag.dtype)) print('STFT phase #1: shape = {}, dtype = {}.'.format(D_phase.shape, D_phase.dtype)) # np.complex64. print('STFT phase angle #1: shape = {}, dtype = {}.'.format(D_phase_angle.shape, D_phase_angle.dtype)) print('STFT magitude #2: shape = {}, dtype = {}.'.format(magnitude.shape, magnitude.dtype)) print('STFT phase angle #2: shape = {}, dtype = {}.'.format(phase_angle.shape, phase_angle.dtype)) assert D_mag.shape == D_phase.shape assert magnitude.shape == phase_angle.shape assert D_mag.shape == magnitude.shape assert np.allclose(D_mag, magnitude) #assert np.allclose(D_phase, phase_angle) # NOTE [info] >> pi and -pi are the same in angle. #-------------------- # Inverse STFT. y, sr = librosa.load(librosa.ex('trumpet')) D = librosa.stft(y) y_hat = librosa.istft(D) print('The shape of y = {}.'.format(y.shape)) print('The shape of y_hat = {}.'.format(y_hat.shape)) print(y) print(y_hat) # Exactly preserving length of the input signal requires explicit padding. # Otherwise, a partial frame at the end of y will not be represented. n = len(y) n_fft = 2048 y_pad = librosa.util.fix_length(y, size=n + n_fft // 2) D = librosa.stft(y_pad, n_fft=n_fft) y_hat = librosa.istft(D, length=n) print('Max error = {}.'.format(np.max(np.abs(y - y_hat)))) # NOTE [caution] >> y, not y_pad. D_mag, D_phase = librosa.magphase(D) y_mag_hat = librosa.istft(D_mag, length=n) print('Max error = {}.'.format(np.max(np.abs(y - y_mag_hat)))) y_phase_hat = librosa.istft(D_phase, length=n) print('Max error = {}.'.format(np.max(np.abs(y - y_phase_hat)))) fig, ax = plt.subplots(nrows=2, ncols=2, sharey=True) librosa.display.waveshow(y, sr=sr, ax=ax[0, 0]) ax[0, 0].set(title='$y$') librosa.display.waveshow(y_hat, sr=sr, ax=ax[0, 1]) ax[0, 1].set(title='$\hat{y}$') librosa.display.waveshow(y_mag_hat, sr=sr, ax=ax[1, 0]) ax[1, 0].set(title='$\hat{y}_{mag}$') librosa.display.waveshow(y_phase_hat, sr=sr, ax=ax[1, 1]) ax[1, 1].set(title='$\hat{y}_{phase}$') plt.tight_layout() plt.show()
def test_clip_all(): """ Tests that a properly structured directory of labelled audio files is successfully clipped or extended to a uniform length according to method inputs. :return: """ aud1, sr = librosa.load(librosa.ex('trumpet')) aud2, _ = librosa.load(librosa.ex('nutcracker')) # setup mock mnist style dataset file structure rand_loc1 = ''.join(random.choices(string.ascii_letters, k=6)) rand_loc2 = ''.join(random.choices(string.ascii_letters, k=6)) rand_loc3 = ''.join(random.choices(string.ascii_letters, k=6)) rand_loc1 = os.path.join(ROOT_DIR, rand_loc1) rand_loc2 = os.path.join(ROOT_DIR, rand_loc2) rand_loc3 = os.path.join(ROOT_DIR, rand_loc3) sub_dir1 = os.path.join(rand_loc1, 'l1') sub_dir2 = os.path.join(rand_loc1, 'l2') save1 = os.path.join(sub_dir1, 'test1.wav') save2 = os.path.join(sub_dir2, 'test2.wav') os.makedirs(sub_dir1, exist_ok=True) os.makedirs(sub_dir2, exist_ok=True) os.makedirs(rand_loc3, exist_ok=True) # write audio data to mock file structure with SoundFile(save1, 'w', sr, channels=1, format='WAV') as f1: f1.write(aud1) with SoundFile(save2, 'w', sr, channels=1, format='WAV') as f2: f2.write(aud2) length = len(aud2) / sr clip_to = length // 2 # verify clipped audio is saved in same/old directory clip_all(rand_loc1, rand_loc1, clip_to, sr) assert os.path.isfile(os.path.join( sub_dir1, 'test1.wav')), "File not saved to old directory" assert os.path.isfile(os.path.join( sub_dir2, 'test2.wav')), "File not saved to old directory" # verify new files are saved in new/different directory clip_all(rand_loc1, rand_loc2, clip_to, sr) assert os.path.isfile(os.path.join( rand_loc2, sub_dir1, 'test1.wav')), "File not saved to new directory" assert os.path.isfile(os.path.join( rand_loc2, sub_dir2, 'test2.wav')), "File not saved to new directory" # verify that log/manifest of previously clipped files is written to disk clip_all(rand_loc1, rand_loc2, clip_to, sr, log=os.path.join(ROOT_DIR, 'manifest.txt')) assert os.path.isfile(os.path.join( ROOT_DIR, 'manifest.txt')), "Manifest of resampled files not generated" # Verify that previously clipped files are skipped: clip_all(rand_loc1, rand_loc3, clip_to, sr, restart=True, log=os.path.join(ROOT_DIR, 'manifest.txt')) assert not os.listdir( rand_loc3 ), "Files erroneously re-written despite being in the manifest/log" # Delete all generated test directories and files. shutil.rmtree(rand_loc1) shutil.rmtree(rand_loc2) shutil.rmtree(rand_loc3) os.remove(os.path.join(ROOT_DIR, 'manifest.txt'))
def AUDIOFILE(): return librosa.ex('brahms')
# Code source: Brian McFee # License: ISC ################## # Standard imports from __future__ import print_function import numpy as np import matplotlib.pyplot as plt import librosa import librosa.display ############################################# # Load an example signal y, sr = librosa.load(librosa.ex('trumpet')) # And compute the spectrogram magnitude and phase S_full, phase = librosa.magphase(librosa.stft(y)) ################### # Plot the spectrum plt.figure(figsize=(12, 4)) librosa.display.specshow(librosa.amplitude_to_db(S_full, ref=np.max), y_axis='log', x_axis='time', sr=sr) plt.colorbar() plt.tight_layout() ###########################################################
""" ################################################## # We'll need numpy and matplotlib for this example import numpy as np import matplotlib.pyplot as plt import soundfile as sf import librosa as librosa import librosa.display as display ###################################################################### # First, we'll start with an audio file that we want to stream # We'll use an example track at 44.1 KHz filename = librosa.ex('brahms', hq=True) ##################################################################### # Next, we'll set up the block reader to work on short segments of # audio at a time. # We'll generate 16 frames at a time, each frame having 4096 samples # and 50% overlap. # n_fft = 4096 hop_length = n_fft // 2 # fill_value pads out the last frame with zeros so that we have a # full frame at the end of the signal, even if the signal doesn't # divide evenly into full frames.
def SIGNAL(): y, sr = librosa.load(librosa.ex('brahms'), sr=None) return y, sr
# Code source: Brian McFee # License: ISC ################################################## # We'll need numpy and matplotlib for this example import numpy as np import matplotlib.pyplot as plt import librosa import librosa.display ###################################################### # The method works fine for longer signals, but the # results are harder to visualize. y, sr = librosa.load(librosa.ex('trumpet', hq=True), sr=44100) #################################################### # These parameters are taken directly from the paper n_fft = 1024 hop_length = int(librosa.time_to_samples(1. / 200, sr=sr)) lag = 2 n_mels = 138 fmin = 27.5 fmax = 16000. max_size = 3 ######################################################## # The paper uses a log-frequency representation, but for # simplicity, we'll use a Mel spectrogram instead. S = librosa.feature.melspectrogram(y,
:param rsr: The rate to which the original signal will be downsampled. """ cutoff = rsr / 2 sos = sig.butter(10, cutoff, fs=sr, btype='lowpass', analog=False, output='sos') return sig.sosfilt(sos, signal) @staticmethod def _log_bin(arr, n_bins): """ Helper method. Divide spectrogram frequency bins logarithmically :param arr: The array to divide. :param n_bins: The number of bins to divide the array into. """ bands = np.array([10 * 2**i for i in range(n_bins - 1)]) idxs = np.arange(len(arr)) split_arr = np.split(arr, np.searchsorted(idxs, bands)) return split_arr if __name__ == '__main__': path = librosa.ex('trumpet') a, s = librosa.load(path) fp = Fingerprint(a, s) fp.show()
All the stuff needed to preprocess audio data for NN ''' import librosa, librosa.display import matplotlib.pyplot as plt import os import numpy as np data_folder = os.path.join(os.getcwd(), '../GuitarNotes/') print(data_folder) file='choice' #sound src filepaths: ## ../pitch/sms-tools/sounds/ ## ./GuitarNotes/ #waveform signal, sr = librosa.load(librosa.ex(file), sr=22050) #sr * duration(T) --> 22050 * 25 # librosa.display.waveplot(signal, sr=sr) # plt.xlabel("Time") # plt.ylabel("Amplitude") # plt.show() #fft --> spectrum ''' FFT: - Moves the signal from time to frequency domain - No time information - Static snapshot of amplitude and frequency for the entire duration ''' fft = np.fft.fft(signal) magnitude = np.abs(fft) #gives us the magnitude of frequency (and converts from complex plane)
Created on Mon Feb 1 14:40:38 2021 @author: federicovisi """ from audio_features import audio_features from audio_features import normalize import librosa import librosa.display import matplotlib.pyplot as plt #%% Function call # get the path of one of the librosa audio examples path = librosa.ex('nutcracker') # call the audio_features function # y = audio; sr = sample rate; df = pandas dataframe containing a time vectore and 26 audio features y, sr, df = audio_features(path) #%% Plot waveform and 4 audio features librosa.display.waveplot(y, sr=sr, alpha=0.4) plt.plot(df['time'].values, normalize(df['spectral_centroid']), color='r') plt.plot(df['time'].values, normalize(df['rolloff']), color='g') plt.plot(df['time'].values, normalize(df['rms']), color='m') plt.plot(df['time'].values, normalize(df['contrast']), color='y') #%% Save audio features as csv file df.to_csv('audio_features.csv', index=False)
def feature_extraction_example(): # Load the example clip. y, sr = librosa.load(librosa.ex('nutcracker')) # Set the hop length: at 22050 Hz, 512 samples ~= 23ms. hop_length = 512 # Separate harmonics and percussives into two waveforms. y_harmonic, y_percussive = librosa.effects.hpss(y) # Beat track on the percussive signal. tempo, beat_frames = librosa.beat.beat_track(y=y_percussive, sr=sr) # Compute MFCC features from the raw signal. mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13) # The first-order differences (delta features). mfcc_delta = librosa.feature.delta(mfcc) # Stack and synchronize between beat events. # This time, we'll use the mean value (default) instead of median. beat_mfcc_delta = librosa.util.sync(np.vstack([mfcc, mfcc_delta]), beat_frames) # Compute chroma features from the harmonic signal. chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr) # Aggregate chroma features between beat events. # We'll use the median value of each feature between beat frames. beat_chroma = librosa.util.sync(chromagram, beat_frames, aggregate=np.median) # Finally, stack all beat-synchronous features together. beat_features = np.vstack([beat_chroma, beat_mfcc_delta]) #-------------------- # Spectral features. #librosa.feature.chroma_stft(y=None, sr=22050, S=None, norm=inf, n_fft=2048, hop_length=512, win_length=None, window='hann', center=True, pad_mode='constant', tuning=None, n_chroma=12) #librosa.feature.chroma_cqt(y=None, sr=22050, C=None, hop_length=512, fmin=None, norm=inf, threshold=0.0, tuning=None, n_chroma=12, n_octaves=7, window=None, bins_per_octave=36, cqt_mode='full') #librosa.feature.chroma_cens(y=None, sr=22050, C=None, hop_length=512, fmin=None, tuning=None, n_chroma=12, n_octaves=7, bins_per_octave=36, cqt_mode='full', window=None, norm=2, win_len_smooth=41, smoothing_window='hann') #librosa.feature.melspectrogram(y=None, sr=22050, S=None, n_fft=2048, hop_length=512, win_length=None, window='hann', center=True, pad_mode='constant', power=2.0) #librosa.feature.mfcc(y=None, sr=22050, S=None, n_mfcc=20, dct_type=2, norm='ortho', lifter=0) #librosa.feature.spectral_centroid(y=None, sr=22050, S=None, n_fft=2048, hop_length=512, freq=None, win_length=None, window='hann', center=True, pad_mode='constant') #librosa.feature.spectral_bandwidth(y=None, sr=22050, S=None, n_fft=2048, hop_length=512, win_length=None, window='hann', center=True, pad_mode='constant', freq=None, centroid=None, norm=True, p=2) #librosa.feature.spectral_contrast(y=None, sr=22050, S=None, n_fft=2048, hop_length=512, win_length=None, window='hann', center=True, pad_mode='constant', freq=None, fmin=200.0, n_bands=6, quantile=0.02, linear=False) #librosa.feature.spectral_flatness(y=None, S=None, n_fft=2048, hop_length=512, win_length=None, window='hann', center=True, pad_mode='constant', amin=1e-10, power=2.0) #librosa.feature.spectral_rolloff(y=None, sr=22050, S=None, n_fft=2048, hop_length=512, win_length=None, window='hann', center=True, pad_mode='constant', freq=None, roll_percent=0.85) #librosa.feature.rms(y=None, S=None, frame_length=2048, hop_length=512, center=True, pad_mode='constant') #librosa.feature.poly_features(y=None, sr=22050, S=None, n_fft=2048, hop_length=512, win_length=None, window='hann', center=True, pad_mode='constant', order=1, freq=None) #librosa.feature.tonnetz(y=None, sr=22050, chroma=None) #librosa.feature.zero_crossing_rate(y, frame_length=2048, hop_length=512, center=True) # Rhythm features. #librosa.feature.tempogram(y=None, sr=22050, onset_envelope=None, hop_length=512, win_length=384, center=True, window='hann', norm=inf) #librosa.feature.fourier_tempogram(y=None, sr=22050, onset_envelope=None, hop_length=512, win_length=384, center=True, window='hann') # Feature manipulation. #librosa.feature.delta(data, width=9, order=1, axis=- 1, mode='interp') #librosa.feature.stack_memory(data, n_steps=2, delay=1) # Feature inversion. #librosa.feature.inverse.mel_to_stft(M, sr=22050, n_fft=2048, power=2.0) #librosa.feature.inverse.mel_to_audio(M, sr=22050, n_fft=2048, hop_length=None, win_length=None, window='hann', center=True, pad_mode='constant', power=2.0, n_iter=32, length=None, dtype=np.float32) #librosa.feature.inverse.mfcc_to_mel(mfcc, n_mels=128, dct_type=2, norm='ortho', ref=1.0, lifter=0) #librosa.feature.inverse.mfcc_to_audio(mfcc, n_mels=128, dct_type=2, norm='ortho', ref=1.0, lifter=0) if True: # REF [site] >> https://librosa.org/doc/main/generated/librosa.feature.rms.html y, sr = librosa.load(librosa.ex('trumpet')) #S, phase = librosa.magphase(librosa.stft(y)) # Use a STFT window of constant ones and no frame centering to get consistent results with the RMS computed from the audio samples y. S, phase = librosa.magphase(librosa.stft(y, window=np.ones, center=False)) #rms = librosa.feature.rms(y=y) rms = librosa.feature.rms(S=S) fig, ax = plt.subplots(nrows=2, sharex=True) times = librosa.times_like(rms) ax[0].semilogy(times, rms[0], label='RMS Energy') ax[0].set(xticks=[]) ax[0].legend() ax[0].label_outer() librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max), y_axis='log', x_axis='time', ax=ax[1]) ax[1].set(title='log Power spectrogram') plt.show()
once, or when streaming data from a recording device. """ ################################################## # We'll need numpy and matplotlib for this example import numpy as np import matplotlib.pyplot as plt import soundfile as sf import librosa as librosa import librosa.display as display ###################################################################### # First, we'll start with an audio file that we want to stream filename = librosa.ex('humpback') ##################################################################### # Next, we'll set up the block reader to work on short segments of # audio at a time. # We'll generate 64 frames at a time, each frame having 2048 samples # and 75% overlap. # n_fft = 2048 hop_length = 512 # fill_value pads out the last frame with zeros so that we have a # full frame at the end of the signal, even if the signal doesn't # divide evenly into full frames.
and its margin-based extension due to `Dreidger, Mueller and Disch, 2014 <http://www.terasoft.com.tw/conf/ismir2014/proceedings/T110_127_Paper.pdf>`_. """ from __future__ import print_function import numpy as np import matplotlib.pyplot as plt import librosa import librosa.display ######################## # Load an example clip with harmonics and percussives y, sr = librosa.load(librosa.ex('choice')) ############################################### # Compute the short-time Fourier transform of y D = librosa.stft(y) ##################################################### # Decompose D into harmonic and percussive components # # :math:`D = D_\text{harmonic} + D_\text{percussive}` D_harmonic, D_percussive = librosa.decompose.hpss(D) #################################################################### # We can plot the two components along with the original spectrogram # Pre-compute a global reference power from the input spectrum
# Code source: Brian McFee # License: ISC # sphinx_gallery_thumbnail_number = 5 import numpy as np import scipy import matplotlib.pyplot as plt import librosa import librosa.display ####################################################################### # We'll use a track that has harmonic, melodic, and percussive elements # Karissa Hobbs - Let's Go Fishin' y, sr = librosa.load(librosa.ex('fishin')) ####################################### # First, let's plot the original chroma chroma_orig = librosa.feature.chroma_cqt(y=y, sr=sr) # For display purposes, let's zoom in on a 15-second chunk from the middle of the song idx = tuple([slice(None), slice(*list(librosa.time_to_frames([45, 60])))]) # And for comparison, we'll show the CQT matrix as well. C = np.abs(librosa.cqt(y=y, sr=sr, bins_per_octave=12 * 3, n_bins=7 * 12 * 3)) fig, ax = plt.subplots(nrows=2, sharex=True) img1 = librosa.display.specshow(librosa.amplitude_to_db(C, ref=np.max)[idx], y_axis='cqt_note', x_axis='time',
def SIGNAL(): y, sr = librosa.load(librosa.ex('trumpet'), sr=None) return y, sr
def viterbi_decoding_example(): # Problem of silence/non-silence detection. y, sr = librosa.load(librosa.ex('trumpet')) # Compute the spectrogram magnitude and phase. S_full, phase = librosa.magphase(librosa.stft(y)) # Plot the spectrum. fig, ax = plt.subplots() img = librosa.display.specshow(librosa.amplitude_to_db(S_full, ref=np.max), y_axis='log', x_axis='time', sr=sr, ax=ax) fig.colorbar(img, ax=ax) # There are periods of silence and non-silence throughout this recording. # Plot the root-mean-square (RMS) curve. rms = librosa.feature.rms(y=y)[0] times = librosa.frames_to_time(np.arange(len(rms))) fig, ax = plt.subplots() ax.plot(times, rms) ax.axhline(0.02, color='r', alpha=0.5) ax.set(xlabel='Time', ylabel='RMS') # We'll normalize the RMS by its standard deviation to expand the range of the probability vector. r_normalized = (rms - 0.02) / np.std(rms) p = np.exp(r_normalized) / (1 + np.exp(r_normalized)) fig, ax = plt.subplots() ax.plot(times, p, label='P[V=1|x]') ax.axhline(0.5, color='r', alpha=0.5, label='Descision threshold') ax.set(xlabel='Time') ax.legend() # A simple silence detector would classify each frame independently of its neighbors. #plt.figure(figsize=(12, 6)) fig, ax = plt.subplots(nrows=2, sharex=True) librosa.display.specshow(librosa.amplitude_to_db(S_full, ref=np.max), y_axis='log', x_axis='time', sr=sr, ax=ax[0]) ax[0].label_outer() ax[1].step(times, p>=0.5, label='Non-silent') ax[1].set(ylim=[0, 1.05]) ax[1].legend() # We can do better using the Viterbi algorithm. # We'll assume that a silent frame is equally likely to be followed by silence or non-silence, but that non-silence is slightly more likely to be followed by non-silence. # This is accomplished by building a self-loop transition matrix, where transition[i, j] is the probability of moving from state i to state j in the next frame. transition = librosa.sequence.transition_loop(2, [0.5, 0.6]) print(transition) # Our p variable only indicates the probability of non-silence, so we need to also compute the probability of silence as its complement. full_p = np.vstack([1 - p, p]) print(full_p) # We'll use viterbi_discriminative here, since the inputs are state likelihoods conditional on data (in our case, data is rms). states = librosa.sequence.viterbi_discriminative(full_p, transition) #sphinx_gallery_thumbnail_number = 5 fig, ax = plt.subplots(nrows=2, sharex=True) librosa.display.specshow(librosa.amplitude_to_db(S_full, ref=np.max), y_axis='log', x_axis='time', sr=sr, ax=ax[0]) ax[0].label_outer() ax[1].step(times, p>=0.5, label='Frame-wise') ax[1].step(times, states, linestyle='--', color='orange', label='Viterbi') ax[1].set(ylim=[0, 1.05]) ax[1].legend() plt.show()
def AUDIOFILE(): return librosa.ex('trumpet')
################################################## # We'll need numpy and matplotlib for this example from __future__ import print_function import numpy as np import matplotlib.pyplot as plt import librosa import librosa.display ###################################################### # We'll load in a five-second clip of a track that has # noticeable vocal vibrato. # The method works fine for longer signals, but the # results are harder to visualize. y, sr = librosa.load(librosa.ex('fishin', hq=True), sr=44100, duration=5, offset=35) #################################################### # These parameters are taken directly from the paper n_fft = 1024 hop_length = int(librosa.time_to_samples(1. / 200, sr=sr)) lag = 2 n_mels = 138 fmin = 27.5 fmax = 16000. max_size = 3 ########################################################
import librosa import numpy import soundfile def apply_fadeout(audio, sr, duration=3.0): # convert to audio indices (samples) length = int(duration * sr) end = audio.shape[0] start = end - length # compute fade out curve # linear fade fade_curve = numpy.linspace(1.0, 0.0, length) # apply the curve audio[start:end] = audio[start:end] * fade_curve path = librosa.ex('brahms') orig, sr = librosa.load(path, duration=5.0) out = orig.copy() apply_fadeout(out, sr, duration=2.0) soundfile.write('original.wav', orig, samplerate=sr) soundfile.write('faded.wav', out, samplerate=sr)