Пример #1
0
def test_resample_all():
    """
    Tests that a properly structured directory of labelled audio files is successfully
    resampled at the desired rate, saved as a different file type, and written to
    the desired new or old location.
    :return:
    """
    aud1, sr = librosa.load(librosa.ex('trumpet'))
    aud2, _ = librosa.load(librosa.ex('nutcracker'))

    # setup mock mnist style dataset file structure
    rand_loc1 = ''.join(random.choices(string.ascii_letters, k=6))
    rand_loc2 = ''.join(random.choices(string.ascii_letters, k=6))
    rand_loc1 = os.path.join(ROOT_DIR, rand_loc1)
    rand_loc2 = os.path.join(ROOT_DIR, rand_loc2)
    sub_dir1 = os.path.join(rand_loc1, 'l1')
    sub_dir2 = os.path.join(rand_loc1, 'l2')
    save1 = os.path.join(sub_dir1, 'test1.m4a')
    save2 = os.path.join(sub_dir2, 'test2.m4a')

    # write '.m4a' data to mock file structure
    os.makedirs(sub_dir1, exist_ok=True)
    os.makedirs(sub_dir2, exist_ok=True)

    with SoundFile(save1, 'w', sr, channels=1, format='WAV') as f1:
        f1.write(aud1)
    with SoundFile(save2, 'w', sr, channels=1, format='WAV') as f2:
        f2.write(aud2)

    # verify new files of type '.wav' save in same/old directory
    resample_all(rand_loc1, rand_loc1, sr)
    assert os.path.isfile(os.path.join(
        sub_dir1, 'rs_test1.wav')), "File not saved to old directory"
    assert os.path.isfile(os.path.join(
        sub_dir2, 'rs_test2.wav')), "File not saved to old directory"

    # verify new files of '.wav' are saved in new/different directory
    resample_all(rand_loc1, rand_loc2, sr)
    assert os.path.isfile(
        os.path.join(rand_loc2, sub_dir1,
                     'rs_test1.wav')), "File not saved to new directory"
    assert os.path.isfile(
        os.path.join(rand_loc2, sub_dir2,
                     'rs_test2.wav')), "File not saved to new directory"
    assert os.path.isfile(os.path.join(
        ROOT_DIR, 'manifest.txt')), "Manifest of resampled files not generated"

    # Delete all generated test directories and files.
    shutil.rmtree(rand_loc1)
    shutil.rmtree(rand_loc2)
    os.remove(os.path.join(ROOT_DIR, 'manifest.txt'))
Пример #2
0
def rootdir():

    f1 = li.ex('trumpet')
    f2 = li.ex('nutcracker')
    f3 = li.ex('vibeace')
    df = pd.DataFrame({
        'Title': ['a', 'b', 'c'],
        'URL': [None, None, None],
        'Filename': [f1, f2, f3],
        'Date': [None, None, None],
        'Speakers': [1, 2, 3]
    })

    df.to_csv('test.csv', index=False)
    return os.path.dirname(f1)
Пример #3
0
def test_clip_audio():
    """
    Test to ensure audio file is clipped to correct length and written to disk successfully.
    :return:
    """
    filename = librosa.ex('nutcracker')
    audio, sr = librosa.load(filename)
    length = len(audio) / sr

    clip = length // 2
    extend = int(length * 2)

    aud1, sr1 = clip_audio(audio, clip, sr)
    aud2, sr2 = clip_audio(audio, extend, sr)
    assert len(
        aud1
    ) / sr1 == clip, "Number of sample points does not meet meet expected length for clipped audio"
    assert len(
        aud2
    ) / sr2 == extend, "Number of sample points does not meet meet expected length for extended audio"

    rand_loc = ''.join(random.choices(string.ascii_letters, k=6))
    save_to = os.path.join(ROOT_DIR, rand_loc, 'test.wav')

    clip_audio(audio, extend, sr, save_to=save_to)
    assert os.path.exists(save_to), "Save to path was unsuccessful"
    assert os.path.isfile(save_to), "File did not save successfully"

    shutil.rmtree(os.path.dirname(save_to))
Пример #4
0
def get_spectrogram(wav):
    y, sr = librosa.load(librosa.ex('trumpet'))
    # Get the magnitude spectrogram
    S = np.abs(librosa.stft(y))
    # Invert using Griffin-Lim
    y_inv = librosa.griffinlim(S)
    # Invert without estimating phase
    y_istft = librosa.istft(S)
    return S
Пример #5
0
def fft_example():
	t = np.arange(256)
	freq = np.fft.fftfreq(t.shape[-1])

	S = np.fft.fft(np.sin(t))

	fig, ax = plt.subplots(nrows=2, ncols=1, sharey=True)
	ax[0].plot(freq, S.real)
	ax[0].set(title='Real', xlabel='Frequency')
	ax[1].plot(freq, S.imag)
	ax[1].set(title='Imaginary', xlabel='Frequency')
	plt.tight_layout()

	#--------------------
	t = np.arange(400)
	n = np.zeros((400,), dtype=complex)
	n[40:60] = np.exp(1j * np.random.uniform(0, 2 * np.pi, (20,)))

	s = np.fft.ifft(n)

	fig, ax = plt.subplots(nrows=2, ncols=1, sharey=True)
	ax[0].plot(t, s.real)
	ax[0].set(title='Real', xlabel='Time')
	ax[1].plot(t, s.imag)
	ax[1].set(title='Imaginary', xlabel='Time')

	#--------------------
	if True:
		t = np.arange(1024)
		y = 12 * np.sin(t) + 20 * np.sin(10 * t) + 7 * np.sin(25 * t) + np.random.randn(t.shape[-1])
	else:
		import librosa
		y, sr = librosa.load(librosa.ex('trumpet'))
		t = np.arange(len(y)) / sr

	S = np.fft.fft(y)
	y_hat = np.fft.ifft(S)

	S_mag = np.abs(S)
	y_mag_hat = np.fft.ifft(S_mag)
	S_phase = np.exp(1.0j * np.angle(S))
	y_phase_hat = np.fft.ifft(S_phase)

	fig, ax = plt.subplots(nrows=2, ncols=2, sharey=True)
	ax[0, 0].plot(t, y)
	ax[0, 0].set(title='$y$')
	ax[0, 1].plot(t, y_hat)
	ax[0, 1].set(title='$\hat{y}$')
	ax[1, 0].plot(t, y_mag_hat)
	ax[1, 0].set(title='$\hat{y}_{mag}$')
	ax[1, 1].plot(t, y_phase_hat)
	ax[1, 1].set(title='$\hat{y}_{phase}$')
	plt.tight_layout()

	plt.show()
Пример #6
0
def test_resample():
    """
    Test to ensure audio file is resampled and written to disk successfully.
    :return:
    """
    filename = librosa.ex('nutcracker')

    rand_loc = ''.join(random.choices(string.ascii_letters, k=6))
    save_to = os.path.join(ROOT_DIR, rand_loc, 'test.wav')

    resample(filename, save_to, sr=22050)
    assert os.path.exists(
        save_to), "Expected path of saved file does not exist"
    assert os.path.isfile(save_to), "File did not save successfully"
    assert os.path.exists(os.path.join(
        ROOT_DIR, 'manifest.txt')), "Resampling log not successfully generated"

    shutil.rmtree(os.path.dirname(save_to))
    os.remove(os.path.join(ROOT_DIR, 'manifest.txt'))
Пример #7
0
def melspectrogram_test():
	y, sr = librosa.load(librosa.ex('trumpet'))

	if True:
		# If a time-series input y, sr is provided, then its magnitude spectrogram S is first computed, and then mapped onto the mel scale by mel_f.dot(S**power).
		S = librosa.feature.melspectrogram(y=y, sr=sr)

		# Passing through arguments to the Mel filters.
		#S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
		#S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=512, n_mels=128, fmax=8000)
		#S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=512, n_mels=128, fmax=8000, htk=True)
	else:
		# If a spectrogram input S is provided, then it is mapped directly onto the mel basis by mel_f.dot(S).
		D = np.abs(librosa.stft(y))**2
		S = librosa.feature.melspectrogram(S=D, sr=sr)

	#--------------------
	plt.figure(figsize=(10, 4))
	S_dB = librosa.power_to_db(S, ref=np.max)
	librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000)
	plt.colorbar(format='%+2.0f dB')
	plt.title('Mel-frequency Spectrogram')
	plt.tight_layout()
	plt.show()
Пример #8
0
# Code source: Brian McFee
# License: ISC

##################
# Standard imports
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import Audio

import librosa

import librosa.display

#############################################
# Load an example with vocals.
y, sr = librosa.load(librosa.ex('fishin'), duration=120)

# And compute the spectrogram magnitude and phase
S_full, phase = librosa.magphase(librosa.stft(y))

# Play back a 5-second excerpt with vocals
Audio(data=y[10 * sr:15 * sr], rate=sr)

#######################################
# Plot a 5-second slice of the spectrum
idx = slice(*librosa.time_to_frames([10, 15], sr=sr))
fig, ax = plt.subplots()
img = librosa.display.specshow(librosa.amplitude_to_db(S_full[:, idx],
                                                       ref=np.max),
                               y_axis='log',
                               x_axis='time',
Пример #9
0
# sphinx_gallery_thumbnail_number = 15

# %%
# All of librosa's plotting functions rely on matplotlib.
# To demonstrate everything we can do, it will help to
# import matplotlib's pyplot API here.
import numpy as np
import matplotlib.pyplot as plt

import librosa
import librosa.display

# %%
# First, we'll load in a demo track

y, sr = librosa.load(librosa.ex('trumpet'))


# %%
# The first thing we might want to do is display an ordinary
# (linear) spectrogram.
# We'll do this by first computing the short-time Fourier
# transform, and then mapping the magnitudes to a decibel
# scale.
#

D = librosa.stft(y)  # STFT of y
S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)

# %%
# If you're familiar with matplotlib already, you may know
Пример #10
0
                                    y_axis='log',
                                    ax=ax[0])
    ax[0].set(title='STFT (escala log)')
    ax[0].set(xlabel=None)

    # No segundo subplot exibe o espectograma da escala mel
    img2 = librosa.display.specshow(M_db,
                                    x_axis='time',
                                    y_axis='mel',
                                    ax=ax[1])
    ax[1].set(title='Melspectograma')
    ax[1].set(xlabel=None)

    # No terceiro subplot exibe a formula de onda
    img3 = librosa.display.waveplot(y, sr=sr, ax=ax[2])
    ax[2].set(title='Waveform')
    ax[2].set(xlabel=None)
    ax[2].set(ylabel='Hz')

    fig.colorbar(img1, ax=ax[0], format="%+2.f dB")
    fig.colorbar(img2, ax=ax[1], format="%+2.f dB")
    fig.colorbar(img3, ax=ax[2], format="%+2.f dB")

    plt.show()


if __name__ == '__main__':
    # Precisa passar um caminho válido de uma música ou um arquivo de exemplo, tipo
    # print_spectograms("songs/Guns N' Roses - Welcome To The Jungle.webm")
    print_spectograms(librosa.ex("choice"))
Пример #11
0
<http://www.terasoft.com.tw/conf/ismir2014/proceedings/T110_127_Paper.pdf>`_.


"""

import numpy as np
import matplotlib.pyplot as plt

from IPython.display import Audio

import librosa
import librosa.display

########################
# Load an example clip with harmonics and percussives
y, sr = librosa.load(librosa.ex('fishin'), duration=5, offset=10)

Audio(data=y, rate=sr)

###############################################
# Compute the short-time Fourier transform of y
D = librosa.stft(y)

#####################################################
# Decompose D into harmonic and percussive components
#
# :math:`D = D_\text{harmonic} + D_\text{percussive}`
D_harmonic, D_percussive = librosa.decompose.hpss(D)

####################################################################
# We can plot the two components along with the original spectrogram
Пример #12
0
def stft_test():
	filepath = librosa.ex('nutcracker')
	#filepath = librosa.ex('trumpet')

	#y, sr = librosa.load(filepath)
	y, sr = librosa.load(filepath, sr=None, mono=True)
	#y, sr = librosa.load(filepath, sr=22050, mono=True, offset=0.0, duration=None, dtype=np.float32, res_type='kaiser_best')

	print('Audio time-series: shape = {}, dtype = {}.'.format(y.shape, y.dtype))  # 'nutcracker': (2643264,), 'trumpet': (117601,)
	print('Sampling rate = {}.'.format(sr))

	#--------------------
	# The STFT represents a signal in the time-frequency domain by computing discrete Fourier transforms (DFT) over short overlapping windows.
	#D = librosa.stft(y)
	D = librosa.stft(y, n_fft=2048, hop_length=None, win_length=None, window='hann', center=True, dtype=None, pad_mode='constant')

	# The shape of D = (1 + floor(n_fft / 2), ceil(len(y) / hop_length)).
	#	hop_length = win_length // 4 = n_fft // 4 (default).

	# n_fft		D
	# 			'nutcracker'	'trumpet'
	# 256		(129, 41302)	(129, 1838)
	# 512		(257, 20651)	(257, 919)
	# 1024		(513, 10326)	(513, 460)
	# 2048		(1025, 5163)	(1025, 230)
	# 4096		(2049, 2582)	(2049, 115)
	# 8192		(4097, 1291)	(4097, 58)

	print('STFT: shape = {}, dtype = {}.'.format(D.shape, D.dtype))

	# Separate a complex-valued spectrogram D into its magnitude (S) and phase (P) components.
	D_mag, D_phase = librosa.magphase(D, power=1)  # mag = np.abs(D)**power, phase = np.exp(1.0j * np.angle(D)).
	D_phase_angle = np.angle(D_phase)  # The phase angle. [rad].

	magnitude = np.abs(D)
	#magnitude = np.abs(D)**2
	phase_angle = np.angle(D)

	print('STFT magitude #1:    shape = {}, dtype = {}.'.format(D_mag.shape, D_mag.dtype))
	print('STFT phase #1:       shape = {}, dtype = {}.'.format(D_phase.shape, D_phase.dtype))  # np.complex64.
	print('STFT phase angle #1: shape = {}, dtype = {}.'.format(D_phase_angle.shape, D_phase_angle.dtype))
	print('STFT magitude #2:    shape = {}, dtype = {}.'.format(magnitude.shape, magnitude.dtype))
	print('STFT phase angle #2: shape = {}, dtype = {}.'.format(phase_angle.shape, phase_angle.dtype))

	assert D_mag.shape == D_phase.shape
	assert magnitude.shape == phase_angle.shape
	assert D_mag.shape == magnitude.shape

	assert np.allclose(D_mag, magnitude)
	#assert np.allclose(D_phase, phase_angle)  # NOTE [info] >> pi and -pi are the same in angle. 

	#--------------------
	# Inverse STFT.

	y, sr = librosa.load(librosa.ex('trumpet'))

	D = librosa.stft(y)
	y_hat = librosa.istft(D)

	print('The shape of y     = {}.'.format(y.shape))
	print('The shape of y_hat = {}.'.format(y_hat.shape))
	print(y)
	print(y_hat)

	# Exactly preserving length of the input signal requires explicit padding.
	# Otherwise, a partial frame at the end of y will not be represented.
	n = len(y)
	n_fft = 2048
	y_pad = librosa.util.fix_length(y, size=n + n_fft // 2)

	D = librosa.stft(y_pad, n_fft=n_fft)

	y_hat = librosa.istft(D, length=n)
	print('Max error = {}.'.format(np.max(np.abs(y - y_hat))))  # NOTE [caution] >> y, not y_pad.

	D_mag, D_phase = librosa.magphase(D)
	y_mag_hat = librosa.istft(D_mag, length=n)
	print('Max error = {}.'.format(np.max(np.abs(y - y_mag_hat))))

	y_phase_hat = librosa.istft(D_phase, length=n)
	print('Max error = {}.'.format(np.max(np.abs(y - y_phase_hat))))

	fig, ax = plt.subplots(nrows=2, ncols=2, sharey=True)
	librosa.display.waveshow(y, sr=sr, ax=ax[0, 0])
	ax[0, 0].set(title='$y$')
	librosa.display.waveshow(y_hat, sr=sr, ax=ax[0, 1])
	ax[0, 1].set(title='$\hat{y}$')
	librosa.display.waveshow(y_mag_hat, sr=sr, ax=ax[1, 0])
	ax[1, 0].set(title='$\hat{y}_{mag}$')
	librosa.display.waveshow(y_phase_hat, sr=sr, ax=ax[1, 1])
	ax[1, 1].set(title='$\hat{y}_{phase}$')
	plt.tight_layout()
	plt.show()
Пример #13
0
def test_clip_all():
    """
    Tests that a properly structured directory of labelled audio files is successfully
    clipped or extended to a uniform length according to method inputs.
    :return:
    """
    aud1, sr = librosa.load(librosa.ex('trumpet'))
    aud2, _ = librosa.load(librosa.ex('nutcracker'))

    # setup mock mnist style dataset file structure
    rand_loc1 = ''.join(random.choices(string.ascii_letters, k=6))
    rand_loc2 = ''.join(random.choices(string.ascii_letters, k=6))
    rand_loc3 = ''.join(random.choices(string.ascii_letters, k=6))
    rand_loc1 = os.path.join(ROOT_DIR, rand_loc1)
    rand_loc2 = os.path.join(ROOT_DIR, rand_loc2)
    rand_loc3 = os.path.join(ROOT_DIR, rand_loc3)
    sub_dir1 = os.path.join(rand_loc1, 'l1')
    sub_dir2 = os.path.join(rand_loc1, 'l2')
    save1 = os.path.join(sub_dir1, 'test1.wav')
    save2 = os.path.join(sub_dir2, 'test2.wav')

    os.makedirs(sub_dir1, exist_ok=True)
    os.makedirs(sub_dir2, exist_ok=True)
    os.makedirs(rand_loc3, exist_ok=True)

    # write audio data to mock file structure
    with SoundFile(save1, 'w', sr, channels=1, format='WAV') as f1:
        f1.write(aud1)
    with SoundFile(save2, 'w', sr, channels=1, format='WAV') as f2:
        f2.write(aud2)

    length = len(aud2) / sr
    clip_to = length // 2

    # verify clipped audio is saved in same/old directory
    clip_all(rand_loc1, rand_loc1, clip_to, sr)
    assert os.path.isfile(os.path.join(
        sub_dir1, 'test1.wav')), "File not saved to old directory"
    assert os.path.isfile(os.path.join(
        sub_dir2, 'test2.wav')), "File not saved to old directory"

    # verify new files are saved in new/different directory
    clip_all(rand_loc1, rand_loc2, clip_to, sr)
    assert os.path.isfile(os.path.join(
        rand_loc2, sub_dir1, 'test1.wav')), "File not saved to new directory"
    assert os.path.isfile(os.path.join(
        rand_loc2, sub_dir2, 'test2.wav')), "File not saved to new directory"

    # verify that log/manifest of previously clipped files is written to disk
    clip_all(rand_loc1,
             rand_loc2,
             clip_to,
             sr,
             log=os.path.join(ROOT_DIR, 'manifest.txt'))
    assert os.path.isfile(os.path.join(
        ROOT_DIR, 'manifest.txt')), "Manifest of resampled files not generated"

    # Verify that previously clipped files are skipped:
    clip_all(rand_loc1,
             rand_loc3,
             clip_to,
             sr,
             restart=True,
             log=os.path.join(ROOT_DIR, 'manifest.txt'))
    assert not os.listdir(
        rand_loc3
    ), "Files erroneously re-written despite being in the manifest/log"

    # Delete all generated test directories and files.
    shutil.rmtree(rand_loc1)
    shutil.rmtree(rand_loc2)
    shutil.rmtree(rand_loc3)
    os.remove(os.path.join(ROOT_DIR, 'manifest.txt'))
Пример #14
0
def AUDIOFILE():
    return librosa.ex('brahms')
Пример #15
0
# Code source: Brian McFee
# License: ISC

##################
# Standard imports
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
import librosa

import librosa.display

#############################################
# Load an example signal
y, sr = librosa.load(librosa.ex('trumpet'))


# And compute the spectrogram magnitude and phase
S_full, phase = librosa.magphase(librosa.stft(y))


###################
# Plot the spectrum
plt.figure(figsize=(12, 4))
librosa.display.specshow(librosa.amplitude_to_db(S_full, ref=np.max),
                         y_axis='log', x_axis='time', sr=sr)
plt.colorbar()
plt.tight_layout()

###########################################################
Пример #16
0
"""

##################################################
# We'll need numpy and matplotlib for this example
import numpy as np
import matplotlib.pyplot as plt

import soundfile as sf

import librosa as librosa
import librosa.display as display

######################################################################
# First, we'll start with an audio file that we want to stream
# We'll use an example track at 44.1 KHz
filename = librosa.ex('brahms', hq=True)

#####################################################################
# Next, we'll set up the block reader to work on short segments of
# audio at a time.

# We'll generate 16 frames at a time, each frame having 4096 samples
# and 50% overlap.
#

n_fft = 4096
hop_length = n_fft // 2

# fill_value pads out the last frame with zeros so that we have a
# full frame at the end of the signal, even if the signal doesn't
# divide evenly into full frames.
Пример #17
0
def SIGNAL():
    y, sr = librosa.load(librosa.ex('brahms'), sr=None)
    return y, sr
Пример #18
0
# Code source: Brian McFee
# License: ISC

##################################################
# We'll need numpy and matplotlib for this example
import numpy as np
import matplotlib.pyplot as plt

import librosa
import librosa.display

######################################################
# The method works fine for longer signals, but the
# results are harder to visualize.
y, sr = librosa.load(librosa.ex('trumpet', hq=True), sr=44100)

####################################################
# These parameters are taken directly from the paper
n_fft = 1024
hop_length = int(librosa.time_to_samples(1. / 200, sr=sr))
lag = 2
n_mels = 138
fmin = 27.5
fmax = 16000.
max_size = 3

########################################################
# The paper uses a log-frequency representation, but for
# simplicity, we'll use a Mel spectrogram instead.
S = librosa.feature.melspectrogram(y,
Пример #19
0
        :param rsr: The rate to which the original signal will be downsampled.
        """
        cutoff = rsr / 2
        sos = sig.butter(10,
                         cutoff,
                         fs=sr,
                         btype='lowpass',
                         analog=False,
                         output='sos')
        return sig.sosfilt(sos, signal)

    @staticmethod
    def _log_bin(arr, n_bins):
        """
        Helper method. Divide spectrogram frequency bins logarithmically

        :param arr: The array to divide.
        :param n_bins: The number of bins to divide the array into.
        """
        bands = np.array([10 * 2**i for i in range(n_bins - 1)])
        idxs = np.arange(len(arr))
        split_arr = np.split(arr, np.searchsorted(idxs, bands))
        return split_arr


if __name__ == '__main__':
    path = librosa.ex('trumpet')
    a, s = librosa.load(path)
    fp = Fingerprint(a, s)
    fp.show()
Пример #20
0
All the stuff needed to preprocess audio data for NN
'''
import librosa, librosa.display
import matplotlib.pyplot as plt 
import os
import numpy as np

data_folder = os.path.join(os.getcwd(), '../GuitarNotes/')
print(data_folder)
file='choice'
#sound src filepaths:
## ../pitch/sms-tools/sounds/
## ./GuitarNotes/

#waveform
signal, sr = librosa.load(librosa.ex(file), sr=22050) #sr * duration(T) --> 22050 * 25
# librosa.display.waveplot(signal, sr=sr)
# plt.xlabel("Time")
# plt.ylabel("Amplitude")
# plt.show()

#fft --> spectrum
'''
FFT:
- Moves the signal from time to frequency domain
- No time information
- Static snapshot of amplitude and frequency for the entire duration
'''
fft = np.fft.fft(signal) 

magnitude = np.abs(fft) #gives us the magnitude of frequency (and converts from complex plane)
Created on Mon Feb  1 14:40:38 2021

@author: federicovisi
"""
from audio_features import audio_features
from audio_features import normalize

import librosa
import librosa.display
import matplotlib.pyplot as plt

#%% Function call

# get the path of one of the librosa audio examples
path = librosa.ex('nutcracker')

# call the audio_features function
# y = audio; sr = sample rate; df = pandas dataframe containing a time vectore and 26 audio features
y, sr, df = audio_features(path)

#%% Plot waveform and 4 audio features

librosa.display.waveplot(y, sr=sr, alpha=0.4)
plt.plot(df['time'].values, normalize(df['spectral_centroid']), color='r')
plt.plot(df['time'].values, normalize(df['rolloff']), color='g')
plt.plot(df['time'].values, normalize(df['rms']), color='m')
plt.plot(df['time'].values, normalize(df['contrast']), color='y')

#%% Save audio features as csv file
df.to_csv('audio_features.csv', index=False)
Пример #22
0
def feature_extraction_example():
	# Load the example clip.
	y, sr = librosa.load(librosa.ex('nutcracker'))

	# Set the hop length: at 22050 Hz, 512 samples ~= 23ms.
	hop_length = 512

	# Separate harmonics and percussives into two waveforms.
	y_harmonic, y_percussive = librosa.effects.hpss(y)

	# Beat track on the percussive signal.
	tempo, beat_frames = librosa.beat.beat_track(y=y_percussive, sr=sr)

	# Compute MFCC features from the raw signal.
	mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)

	# The first-order differences (delta features).
	mfcc_delta = librosa.feature.delta(mfcc)

	# Stack and synchronize between beat events.
	# This time, we'll use the mean value (default) instead of median.
	beat_mfcc_delta = librosa.util.sync(np.vstack([mfcc, mfcc_delta]), beat_frames)

	# Compute chroma features from the harmonic signal.
	chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr)

	# Aggregate chroma features between beat events.
	# We'll use the median value of each feature between beat frames.
	beat_chroma = librosa.util.sync(chromagram, beat_frames, aggregate=np.median)

	# Finally, stack all beat-synchronous features together.
	beat_features = np.vstack([beat_chroma, beat_mfcc_delta])

	#--------------------
	# Spectral features.
	#librosa.feature.chroma_stft(y=None, sr=22050, S=None, norm=inf, n_fft=2048, hop_length=512, win_length=None, window='hann', center=True, pad_mode='constant', tuning=None, n_chroma=12)
	#librosa.feature.chroma_cqt(y=None, sr=22050, C=None, hop_length=512, fmin=None, norm=inf, threshold=0.0, tuning=None, n_chroma=12, n_octaves=7, window=None, bins_per_octave=36, cqt_mode='full')
	#librosa.feature.chroma_cens(y=None, sr=22050, C=None, hop_length=512, fmin=None, tuning=None, n_chroma=12, n_octaves=7, bins_per_octave=36, cqt_mode='full', window=None, norm=2, win_len_smooth=41, smoothing_window='hann')

	#librosa.feature.melspectrogram(y=None, sr=22050, S=None, n_fft=2048, hop_length=512, win_length=None, window='hann', center=True, pad_mode='constant', power=2.0)
	#librosa.feature.mfcc(y=None, sr=22050, S=None, n_mfcc=20, dct_type=2, norm='ortho', lifter=0)

	#librosa.feature.spectral_centroid(y=None, sr=22050, S=None, n_fft=2048, hop_length=512, freq=None, win_length=None, window='hann', center=True, pad_mode='constant')
	#librosa.feature.spectral_bandwidth(y=None, sr=22050, S=None, n_fft=2048, hop_length=512, win_length=None, window='hann', center=True, pad_mode='constant', freq=None, centroid=None, norm=True, p=2)
	#librosa.feature.spectral_contrast(y=None, sr=22050, S=None, n_fft=2048, hop_length=512, win_length=None, window='hann', center=True, pad_mode='constant', freq=None, fmin=200.0, n_bands=6, quantile=0.02, linear=False)
	#librosa.feature.spectral_flatness(y=None, S=None, n_fft=2048, hop_length=512, win_length=None, window='hann', center=True, pad_mode='constant', amin=1e-10, power=2.0)
	#librosa.feature.spectral_rolloff(y=None, sr=22050, S=None, n_fft=2048, hop_length=512, win_length=None, window='hann', center=True, pad_mode='constant', freq=None, roll_percent=0.85)

	#librosa.feature.rms(y=None, S=None, frame_length=2048, hop_length=512, center=True, pad_mode='constant')
	#librosa.feature.poly_features(y=None, sr=22050, S=None, n_fft=2048, hop_length=512, win_length=None, window='hann', center=True, pad_mode='constant', order=1, freq=None)
	#librosa.feature.tonnetz(y=None, sr=22050, chroma=None)
	#librosa.feature.zero_crossing_rate(y, frame_length=2048, hop_length=512, center=True)

	# Rhythm features.
	#librosa.feature.tempogram(y=None, sr=22050, onset_envelope=None, hop_length=512, win_length=384, center=True, window='hann', norm=inf)
	#librosa.feature.fourier_tempogram(y=None, sr=22050, onset_envelope=None, hop_length=512, win_length=384, center=True, window='hann')

	# Feature manipulation.
	#librosa.feature.delta(data, width=9, order=1, axis=- 1, mode='interp')
	#librosa.feature.stack_memory(data, n_steps=2, delay=1)

	# Feature inversion.
	#librosa.feature.inverse.mel_to_stft(M, sr=22050, n_fft=2048, power=2.0)
	#librosa.feature.inverse.mel_to_audio(M, sr=22050, n_fft=2048, hop_length=None, win_length=None, window='hann', center=True, pad_mode='constant', power=2.0, n_iter=32, length=None, dtype=np.float32)
	#librosa.feature.inverse.mfcc_to_mel(mfcc, n_mels=128, dct_type=2, norm='ortho', ref=1.0, lifter=0)
	#librosa.feature.inverse.mfcc_to_audio(mfcc, n_mels=128, dct_type=2, norm='ortho', ref=1.0, lifter=0)

	if True:
		# REF [site] >> https://librosa.org/doc/main/generated/librosa.feature.rms.html
		y, sr = librosa.load(librosa.ex('trumpet'))

		#S, phase = librosa.magphase(librosa.stft(y))
		# Use a STFT window of constant ones and no frame centering to get consistent results with the RMS computed from the audio samples y.
		S, phase = librosa.magphase(librosa.stft(y, window=np.ones, center=False))

		#rms = librosa.feature.rms(y=y)
		rms = librosa.feature.rms(S=S)

		fig, ax = plt.subplots(nrows=2, sharex=True)
		times = librosa.times_like(rms)
		ax[0].semilogy(times, rms[0], label='RMS Energy')
		ax[0].set(xticks=[])
		ax[0].legend()
		ax[0].label_outer()
		librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max), y_axis='log', x_axis='time', ax=ax[1])
		ax[1].set(title='log Power spectrogram')

		plt.show()
Пример #23
0
once, or when streaming data from a recording device.
"""

##################################################
# We'll need numpy and matplotlib for this example
import numpy as np
import matplotlib.pyplot as plt

import soundfile as sf

import librosa as librosa
import librosa.display as display

######################################################################
# First, we'll start with an audio file that we want to stream
filename = librosa.ex('humpback')

#####################################################################
# Next, we'll set up the block reader to work on short segments of
# audio at a time.

# We'll generate 64 frames at a time, each frame having 2048 samples
# and 75% overlap.
#

n_fft = 2048
hop_length = 512

# fill_value pads out the last frame with zeros so that we have a
# full frame at the end of the signal, even if the signal doesn't
# divide evenly into full frames.
Пример #24
0
and its margin-based extension due to `Dreidger, Mueller and Disch, 2014
<http://www.terasoft.com.tw/conf/ismir2014/proceedings/T110_127_Paper.pdf>`_.


"""

from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt

import librosa
import librosa.display

########################
# Load an example clip with harmonics and percussives
y, sr = librosa.load(librosa.ex('choice'))

###############################################
# Compute the short-time Fourier transform of y
D = librosa.stft(y)

#####################################################
# Decompose D into harmonic and percussive components
#
# :math:`D = D_\text{harmonic} + D_\text{percussive}`
D_harmonic, D_percussive = librosa.decompose.hpss(D)

####################################################################
# We can plot the two components along with the original spectrogram

# Pre-compute a global reference power from the input spectrum
Пример #25
0
# Code source: Brian McFee
# License: ISC
# sphinx_gallery_thumbnail_number = 5

import numpy as np
import scipy
import matplotlib.pyplot as plt

import librosa
import librosa.display

#######################################################################
# We'll use a track that has harmonic, melodic, and percussive elements
#  Karissa Hobbs - Let's Go Fishin'
y, sr = librosa.load(librosa.ex('fishin'))

#######################################
# First, let's plot the original chroma
chroma_orig = librosa.feature.chroma_cqt(y=y, sr=sr)

# For display purposes, let's zoom in on a 15-second chunk from the middle of the song
idx = tuple([slice(None), slice(*list(librosa.time_to_frames([45, 60])))])

# And for comparison, we'll show the CQT matrix as well.
C = np.abs(librosa.cqt(y=y, sr=sr, bins_per_octave=12 * 3, n_bins=7 * 12 * 3))

fig, ax = plt.subplots(nrows=2, sharex=True)
img1 = librosa.display.specshow(librosa.amplitude_to_db(C, ref=np.max)[idx],
                                y_axis='cqt_note',
                                x_axis='time',
Пример #26
0
def SIGNAL():
    y, sr = librosa.load(librosa.ex('trumpet'), sr=None)
    return y, sr
Пример #27
0
def viterbi_decoding_example():
	# Problem of silence/non-silence detection.

	y, sr = librosa.load(librosa.ex('trumpet'))

	# Compute the spectrogram magnitude and phase.
	S_full, phase = librosa.magphase(librosa.stft(y))

	# Plot the spectrum.
	fig, ax = plt.subplots()
	img = librosa.display.specshow(librosa.amplitude_to_db(S_full, ref=np.max), y_axis='log', x_axis='time', sr=sr, ax=ax)
	fig.colorbar(img, ax=ax)

	# There are periods of silence and non-silence throughout this recording.
	# Plot the root-mean-square (RMS) curve.
	rms = librosa.feature.rms(y=y)[0]
	times = librosa.frames_to_time(np.arange(len(rms)))

	fig, ax = plt.subplots()
	ax.plot(times, rms)
	ax.axhline(0.02, color='r', alpha=0.5)
	ax.set(xlabel='Time', ylabel='RMS')

	# We'll normalize the RMS by its standard deviation to expand the range of the probability vector.
	r_normalized = (rms - 0.02) / np.std(rms)
	p = np.exp(r_normalized) / (1 + np.exp(r_normalized))

	fig, ax = plt.subplots()
	ax.plot(times, p, label='P[V=1|x]')
	ax.axhline(0.5, color='r', alpha=0.5, label='Descision threshold')
	ax.set(xlabel='Time')
	ax.legend()

	# A simple silence detector would classify each frame independently of its neighbors.
	#plt.figure(figsize=(12, 6))
	fig, ax = plt.subplots(nrows=2, sharex=True)
	librosa.display.specshow(librosa.amplitude_to_db(S_full, ref=np.max), y_axis='log', x_axis='time', sr=sr, ax=ax[0])
	ax[0].label_outer()
	ax[1].step(times, p>=0.5, label='Non-silent')
	ax[1].set(ylim=[0, 1.05])
	ax[1].legend()

	# We can do better using the Viterbi algorithm. 
	# We'll assume that a silent frame is equally likely to be followed by silence or non-silence, but that non-silence is slightly more likely to be followed by non-silence.
	# This is accomplished by building a self-loop transition matrix, where transition[i, j] is the probability of moving from state i to state j in the next frame.

	transition = librosa.sequence.transition_loop(2, [0.5, 0.6])
	print(transition)

	# Our p variable only indicates the probability of non-silence, so we need to also compute the probability of silence as its complement.
	full_p = np.vstack([1 - p, p])
	print(full_p)

	# We'll use viterbi_discriminative here, since the inputs are state likelihoods conditional on data (in our case, data is rms).
	states = librosa.sequence.viterbi_discriminative(full_p, transition)

	#sphinx_gallery_thumbnail_number = 5
	fig, ax = plt.subplots(nrows=2, sharex=True)
	librosa.display.specshow(librosa.amplitude_to_db(S_full, ref=np.max), y_axis='log', x_axis='time', sr=sr, ax=ax[0])
	ax[0].label_outer()
	ax[1].step(times, p>=0.5, label='Frame-wise')
	ax[1].step(times, states, linestyle='--', color='orange', label='Viterbi')
	ax[1].set(ylim=[0, 1.05])
	ax[1].legend()

	plt.show()
Пример #28
0
def AUDIOFILE():
    return librosa.ex('trumpet')
Пример #29
0
##################################################
# We'll need numpy and matplotlib for this example
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt

import librosa
import librosa.display

######################################################
# We'll load in a five-second clip of a track that has
# noticeable vocal vibrato.
# The method works fine for longer signals, but the
# results are harder to visualize.
y, sr = librosa.load(librosa.ex('fishin', hq=True),
                     sr=44100,
                     duration=5,
                     offset=35)

####################################################
# These parameters are taken directly from the paper
n_fft = 1024
hop_length = int(librosa.time_to_samples(1. / 200, sr=sr))
lag = 2
n_mels = 138
fmin = 27.5
fmax = 16000.
max_size = 3

########################################################
Пример #30
0
import librosa
import numpy
import soundfile


def apply_fadeout(audio, sr, duration=3.0):
    # convert to audio indices (samples)
    length = int(duration * sr)
    end = audio.shape[0]
    start = end - length

    # compute fade out curve
    # linear fade
    fade_curve = numpy.linspace(1.0, 0.0, length)

    # apply the curve
    audio[start:end] = audio[start:end] * fade_curve


path = librosa.ex('brahms')
orig, sr = librosa.load(path, duration=5.0)
out = orig.copy()
apply_fadeout(out, sr, duration=2.0)

soundfile.write('original.wav', orig, samplerate=sr)
soundfile.write('faded.wav', out, samplerate=sr)