def beat_tracking_example(): # Get the file path to an included audio example. filename = librosa.example('nutcracker') # Load the audio as a waveform 'y' and store the sampling rate as 'sr'. y, sr = librosa.load(filename) # Run the default beat tracker. tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr) print('Estimated tempo: {:.2f} beats per minute'.format(tempo)) # Convert the frame indices of beat events into timestamps. beat_times = librosa.frames_to_time(beat_frames, sr=sr)
def tensorflow_example(): import SpecAugment.spec_augment_tensorflow y, sr = librosa.load(librosa.example('nutcracker'), sr=None, mono=True) #y, sr = librosa.load(librosa.example('trumpet'), sr=None, mono=True) #y, sr = librosa.load('./stereo.ogg', sr=None, mono=True) mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=256, hop_length=128, fmax=8000) print('Mel spectrogram: shape = {}, dtype = {}.'.format(mel_spectrogram.shape, mel_spectrogram.dtype)) warped_masked_spectrogram = SpecAugment.spec_augment_tensorflow.spec_augment(mel_spectrogram=mel_spectrogram) print('Mel spectrogram (augmented): shape = {}, dtype = {}.'.format(warped_masked_spectrogram.shape, warped_masked_spectrogram.dtype)) #print(warped_masked_spectrogram) SpecAugment.spec_augment_tensorflow.visualization_spectrogram(mel_spectrogram, 'Before augmentation') SpecAugment.spec_augment_tensorflow.visualization_spectrogram(warped_masked_spectrogram, 'After augmentation')
def __init__(self, scale=0.25, sample_rate=22050, examples=None): self.scale = scale self.sample_rate = sample_rate if examples is None: examples = [ 'brahms', 'choice', 'fishin', 'nutcracker', 'trumpet', 'vibeace' ] self.examples = [] for example in examples: waveform, sample_rate = librosa.load(librosa.example(example)) if sample_rate != self.sample_rate: waveform = librosa.core.resample(waveform, sample_rate, self.sample_rate) self.examples.append(torch.from_numpy(waveform)) else: self.examples = examples
def pytorch_example(): import torch import SpecAugment.spec_augment_pytorch y, sr = librosa.load(librosa.example('nutcracker'), sr=None, mono=True) #y, sr = librosa.load(librosa.example('trumpet'), sr=None, mono=True) #y, sr = librosa.load('./stereo.ogg', sr=None, mono=True) mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=256, hop_length=128, fmax=8000) mel_spectrogram = torch.tensor(mel_spectrogram) mel_spectrogram = torch.unsqueeze(mel_spectrogram, axis=0) print('Mel spectrogram: shape = {}, dtype = {}.'.format(mel_spectrogram.shape, mel_spectrogram.dtype)) warped_masked_spectrogram = SpecAugment.spec_augment_pytorch.spec_augment(mel_spectrogram=mel_spectrogram, time_warping_para=50, frequency_masking_para=50, time_masking_para=1000, frequency_mask_num=2, time_mask_num=2) print('Mel spectrogram (augmented): shape = {}, dtype = {}.'.format(warped_masked_spectrogram.shape, warped_masked_spectrogram.dtype)) #print(warped_masked_spectrogram) SpecAugment.spec_augment_pytorch.visualization_spectrogram(mel_spectrogram, 'Before augmentation') SpecAugment.spec_augment_pytorch.visualization_spectrogram(warped_masked_spectrogram, 'After augmentation')
def test_example_fail(): librosa.example("no such track")
def test_example(key, hq): fn = librosa.example(key, hq=hq) assert os.path.exists(fn)
y_ = librosa.resample(y, orig_sr=44100, target_sr=22050) plt.plot(t_, y_) S_ = librosa.stft(y_) S_ = np.abs(S_) # also plot the spectrogram of the signal librosa.display.specshow(S_) # your code here # Q: What was different this time? Why is this method better? # A: the signal was properly processed, so the high frequency components # of the signal did not cause aliasing ############################################################################# import math as mt y_brahms, sr_brahms = librosa.load(librosa.example('brahms')) dur_brahms = y_brahms.shape[0] / sr_brahms t_brahms = np.linspace(0, mt.ceil(dur_brahms), mt.ceil(dur_brahms) * sr_brahms, endpoint=False) t_brahms = t_brahms[0:y_brahms.shape[0]] plt.plot(t_brahms, y_brahms) y_brahms_down = librosa.resample(y_brahms, orig_sr=sr_brahms, target_sr=sr_brahms / 2) plt.plot(t_brahms[0:y_brahms_down.shape[0]], y_brahms_down)
def fp(): filename = librosa.example('nutcracker') audio, rate = librosa.load(filename) return Fingerprint(audio, rate)
def getPoints(self): return [(self.x, self.y + self.max_height - self.height), (self.x + self.width, self.y + self.max_height - self.height)] def clamp(self, min_value, max_value, value): if value < min_value: return min_value elif value > max_value: return max_value return value if __name__ == "__main__": # Sample file from librosa filename = librosa.example('nutcracker') # Own audio file #filename = "Song.wav" # timeSeries: 1-dimensional numpy.ndarray of floating-point values # sampleRate: number of samples recorded per second timeSeries, sampleRate = librosa.load(filename) # matrix of frequencies and time # hop_length: number of audio samples between adjacent frames # n_fft: number of samples in each frame stft = np.abs(librosa.stft(timeSeries, hop_length=512, n_fft=2048 * 8)) # Convert amplitude to decibels D = librosa.amplitude_to_db(stft, ref=np.max)
import librosa import numpy as np import matplotlib.pyplot as plt import matplotlib.patches as patches import matplotlib.path as path import matplotlib.animation as animation y, sr = librosa.load(librosa.example('brahms')) S = np.abs(librosa.stft(y, len(y) // 10)) frames, bins = S.shape n = np.zeros(bins) + 50 bins = np.arange(bins + 1) left = np.array(bins[:-1]) right = np.array(bins[1:]) bottom = np.zeros(len(left)) top = bottom + n nrects = len(left) nverts = nrects * (1 + 3 + 1) verts = np.zeros((nverts, 2)) codes = np.ones(nverts, int) * path.Path.LINETO codes[0::5] = path.Path.MOVETO codes[4::5] = path.Path.CLOSEPOLY verts[0::5, 0] = left verts[0::5, 1] = bottom verts[1::5, 0] = left verts[1::5, 1] = top verts[2::5, 0] = right
def data_augmentation_example(): y, sr = librosa.load(librosa.example('nutcracker')) #y, sr = librosa.load(librosa.example('trumpet')) #y, sr = librosa.load(librosa.example('brahms')) #y, sr = librosa.load(librosa.example('vibeace', hq=True)) plt.figure(figsize=(10, 4)) librosa.display.waveshow(y, sr=sr, x_axis='time') plt.title('Original') plt.tight_layout() #-------------------- # Inject noise. def inject_noise(y, noise_factor): noise = np.random.randn(len(y)) augmented = y + noise_factor * noise # Cast back to same data type. augmented = augmented.astype(type(y[0])) return augmented noise_factor = 0.02 y_augmented = inject_noise(y, noise_factor) plt.figure(figsize=(10, 4)) librosa.display.waveshow(y_augmented, sr=sr, x_axis='time') plt.title('Noise Injection') plt.tight_layout() #-------------------- # Shift time. def shift_time(y, sr, shift_max, shift_direction): shift = np.random.randint(sr * shift_max) if shift_direction == 'right': shift = -shift elif shift_direction == 'both': direction = np.random.randint(0, 2) if direction == 1: shift = -shift augmented = np.roll(y, shift) # Set to silence for heading / tailing. if shift > 0: augmented[:shift] = 0 else: augmented[shift:] = 0 return augmented shift_max = 10 shift_direction = 'right' y_augmented = shift_time(y, sr, shift_max, shift_direction) plt.figure(figsize=(10, 4)) librosa.display.waveshow(y_augmented, sr=sr, x_axis='time') plt.title('Time Shift') plt.tight_layout() #-------------------- # Change pitch. pitch_factor = 0.2 y_augmented = librosa.effects.pitch_shift(y, sr=sr, n_steps=pitch_factor) plt.figure(figsize=(10, 4)) librosa.display.waveshow(y_augmented, sr=sr, x_axis='time') plt.title('Pitch Shift') plt.tight_layout() #-------------------- # Change speed. # Stretch times series by a fixed rate. stretch_factor = 0.8 # If rate < 1, then the signal is slowed down. #stretch_factor = 1.2 # If rate > 1, then the signal is sped up. y_augmented = librosa.effects.time_stretch(y, rate=stretch_factor) plt.figure(figsize=(10, 4)) librosa.display.waveshow(y_augmented, sr=sr, x_axis='time') plt.title('Time Stretch') plt.tight_layout() plt.show()