def __test_ifgram(center, pad_mode): D1, F1 = librosa.ifgram(y, center=center, pad_mode='reflect') D2, F2 = librosa.ifgram(y, center=center, pad_mode=pad_mode) assert D1.shape == D2.shape if center and pad_mode != 'reflect': assert not np.allclose(D1, D2) else: assert np.allclose(D1, D2) assert np.allclose(F1, F2)
def extractMusicFeatures(file_name, file_details): y, sr = librosa.load(file_name) chroma = librosa.feature.chroma_stft(y=y, sr=sr) if_gram, D = librosa.ifgram(y) S = np.abs(librosa.stft(y)) mag_SC = librosa.feature.spectral_centroid(S=np.abs(D), freq=if_gram) mfcc = librosa.feature.mfcc(y=y, sr=sr) rmse = librosa.feature.rmse(y=y) contrast = librosa.feature.spectral_contrast(S=S, sr=sr) spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr) spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr) spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr) zero_crossing = librosa.feature.zero_crossing_rate(y) onset_env = librosa.onset.onset_strength(y, sr=sr) tempo = librosa.beat.estimate_tempo(onset_env, sr=sr) print("File: ", file_details) print("Chroma", chroma.mean(1)) print("MSG_SC", mag_SC.mean()) print("MFCC", mfcc.mean()) print("Spectral Contrast mean", contrast.mean()) print("Spectral roll off mean", spectral_rolloff.mean()) print("Spectral centroid mean", spectral_centroid.mean()) print("Spectral bandwidth mean", spectral_bandwidth.mean()) print("Tempo", tempo) print("RSME", rmse) Create_table("workout") addAudioFeatures(name=file_details, database="workout", scem=spectral_centroid.mean(), scom=contrast.mean(), mfcc=mfcc.mean(), rmse=rmse.mean(), sbwm=spectral_bandwidth.mean(), srom=spectral_rolloff.mean(), tempo=tempo)
def __test(ref, clip): F, D = librosa.ifgram(y, sr=sr, ref_power=ref, clip=clip) if clip: assert np.all(0 <= F) and np.all(F <= 0.5 * sr) assert np.all(np.isfinite(F))
def __test(ref_power, clip): F, D = librosa.ifgram(y, sr=sr, ref_power=ref_power, clip=clip) if clip: assert np.all(0 <= F) and np.all(F <= 0.5 * sr) assert np.all(np.isfinite(F))
def getfrequeciesdistribute2(filepath, num=100): y, sr = librosa.load(filepath) y = clean2(y, sr) frequencies, D = librosa.ifgram(y, sr=sr) frequencies = frequencies.astype(int) frequencies = frequencies // 2 * 2 c = Counter(frequencies.flatten().tolist()) data = c.most_common(num) data = list(filter(lambda x: x[0] != 0, data)) frequeciesdistribute = np.array(list(map(lambda t: t[0], data))) return frequeciesdistribute
def change_stretch(wav, sr=_sr, rate=1): """ 成倍拉伸延长。 :param rate:1~10,int,1:原声 :param wav: :param sr: :return: """ frequencies, D = librosa.ifgram(wav, sr=sr) D = spread(D, rate) return librosa.istft(D)
def change_male(wav, sr=_sr, rate=0): """ 变男声。 :param rate:0~1025,int,0,1,1025:原声 :param wav: :param sr: :return: """ frequencies, D = librosa.ifgram(wav, sr=sr) D = pool_step(D, rate) return librosa.istft(D)
def change_attention(wav, sr=_sr, rate=0): """ 突出高音或低音段。 :param rate:-100~100,int,0:原声 :param wav: :param sr: :return: """ frequencies, D = librosa.ifgram(wav, sr=sr) D = roll(D, rate) return librosa.istft(D)
def change_reback(wav, sr=_sr, rate=1): """ 回声。 :param rate:1~10,int,1:原声 :param wav: :param sr: :return: """ frequencies, D = librosa.ifgram(wav, sr=sr) D = pool(D, size=(1, rate)) D = repeat(D, rate) return librosa.istft(D)
def change_vague(wav, sr=_sr, rate=1): """ 模糊。 :param rate:1~10,int,1:原声 :param wav: :param sr: :return: """ frequencies, D = librosa.ifgram(wav, sr=sr) D = pool(D, (1, rate)) D = spread(D, (1, rate)) return librosa.istft(D)
def getfrequeciesdistribute(filepath, num=100): y, sr = librosa.load(filepath) y = clean2(y, sr) frequencies, D = librosa.ifgram(y, sr=sr) frequencies = frequencies.astype(int) frequencies = frequencies // 2 * 2 c = Counter(frequencies.flatten().tolist()) data = c.most_common(num) data = list(filter(lambda x: x[0] != 0, data)) s = reduce(lambda x1, x2: (0, x1[1] + x2[1]), data)[1] frequeciesdistribute = np.array(list(map(lambda t: t[0] * t[1], data))) frequeciesdistribute = frequeciesdistribute / s #print(frequeciesdistribute) return frequeciesdistribute
def __test(infile): DATA = load(infile) y, sr = librosa.load(DATA['wavfile'][0], sr=None, mono=True) # Compute the IFgram F, D = librosa.ifgram(y, n_fft = DATA['nfft'][0,0].astype(int), hop_length = DATA['hop_length'][0,0].astype(int), win_length = DATA['hann_w'][0,0].astype(int), sr = DATA['sr'][0,0].astype(int)) # D fails to match here because of fftshift() # assert np.allclose(D, DATA['D']) assert np.allclose(F, DATA['F'], atol=1e-3)
def __test(infile): DATA = load(infile) y, sr = librosa.load(DATA['wavfile'][0], sr=None, mono=True) # Compute the IFgram F, D = librosa.ifgram(y, n_fft = DATA['nfft'][0,0].astype(int), hop_length = DATA['hop_length'][0,0].astype(int), win_length = DATA['hann_w'][0,0].astype(int), sr = DATA['sr'][0,0].astype(int), center = False) # D fails to match here because of fftshift() # assert np.allclose(D, DATA['D']) assert np.allclose(F, DATA['F'], atol=1e-3)
def get_spec(wav, spec_type='mel'): if spec_type == 'mel': #return librosa.feature.melspectrogram(wav, sr=hp.sample_rate, # n_fft=hp.fft_size, hop_length=hp.hop_size, n_mels=hp.num_mels) return melspectrogram(wav) elif spec_type == 'if': return librosa.ifgram(wav, sr=hp.sample_rate, n_fft=hp.fft_size, hop_length=hp.hop_size)[0] elif spec_type == 'stft': #return np.abs(librosa.stft(wav, n_fft=hp.fft_size, hop_length=hp.hop_size)) return spectrogram(wav) else: raise ValueError(f"Unknown spec_type: `{spec_type}`")
def spectralCent(song): y, sr = librosa.load("C:\Users\Katherine\Music\\" + song + ".mp3", duration=60) cent = librosa.feature.spectral_centroid(y=y, sr=sr) S, phase = librosa.magphase(librosa.stft(y=y)) librosa.feature.spectral_centroid(S=S) if_gram, D = librosa.ifgram(y) librosa.feature.spectral_centroid(S=np.abs(D), freq=if_gram) plt.figure() plt.subplot(2, 1, 1) plt.semilogy(cent.T, label=song) plt.ylabel('Hz') plt.xticks([]) plt.xlim([0, cent.shape[-1]]) plt.legend()
def change_pitchspeed(wav, sr=_sr, rate=1): """ 音高和语速同时变化。 :param rate:0~10,float,1:原声 :param wav: :param sr: :return: """ frequencies, D = librosa.ifgram(wav, sr=sr) n = int(D.shape[0] * rate) if n <= D.shape[0]: D = drop(D, D.shape[0] - n, mode="r") else: D = rewardshape(D, (n, D.shape[1])) return librosa.istft(D)
def __test(n_fft, hop_length, win_length, center, norm, dtype): D_stft = librosa.stft(y, n_fft=n_fft, hop_length=hop_length, win_length=win_length, center=center, dtype=dtype) _, D_ifgram = librosa.ifgram(y, sr, n_fft=n_fft, hop_length=hop_length, win_length=win_length, center=center, norm=norm, dtype=dtype) if norm: # STFT doesn't do window normalization; # let's just ignore the relative scale to make this easy D_stft = librosa.util.normalize(D_stft, axis=0) D_ifgram = librosa.util.normalize(D_ifgram, axis=0) assert np.allclose(D_stft, D_ifgram)
def Sinusoid_extraction(data, sr): # Equal Loudness Filtering # Following parameters from "http://replaygain.hydrogenaud.io/proposal/equal_loudness.html" Ay = [ 1.00000000000000, -3.47845948550071, 6.36317777566148, -8.54751527471874, 9.47693607801280, -8.81498681370155, 6.85401540936998, -4.39470996079559, 2.19611684890774, -0.75104302451432, 0.13149317958808 ] By = [ 0.05418656406430, -0.02911007808948, -0.00848709379851, -0.00851165645469, -0.00834990904936, 0.02245293253339, -0.02596338512915, 0.01624864962975, -0.00240879051584, 0.00674613682247, -0.00187763777362 ] Ab = [1.00000000000000, -1.96977855582618, 0.97022847566350] Bb = [0.98500175787242, -1.97000351574484, 0.98500175787242] data_lfiltered = signal.lfilter(b=Bb, a=Ab, x=signal.lfilter(b=By, a=Ay, x=data)) # F the spectral resolution # T the sample time distance, usually <= 1/2fh, fh the highest frequency component # N the fft window size, better power of 2 # F = 1 / (len(data) / sr) fh = 22050 # T = 1 / (2 * fh) # N = 1 / (F * T) N = (len(data) / sr) * 2 * fh N = int(find_proper_2n(N)) # print N # Frequency/Amplitude Correction; Short- Time Fourier Transform (STFT) # data_stft = librosa.core.stft(y=data, n_fft=N) data_if, data_stft = librosa.ifgram(y=data, hop_length=128, win_length=2048, n_fft=8192) return data_lfiltered, data_stft, data_if
def get_audio_features(file_name): file_path = UPLOAD_FOLDER + "/" + file_name y, sr = librosa.load(file_path) chroma = librosa.feature.chroma_stft(y=y, sr=sr) if_gram, D = librosa.ifgram(y) S = np.abs(librosa.stft(y)) mfcc = librosa.feature.mfcc(y=y, sr=sr) rmse = librosa.feature.rmse(y=y) contrast = librosa.feature.spectral_contrast(S=S, sr=sr) spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr) spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr) spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr) onset_env = librosa.onset.onset_strength(y, sr=sr) tempo = librosa.beat.estimate_tempo(onset_env, sr=sr) activity = classifier.classify_audio(scem=spectral_centroid.mean(), scom=contrast.mean(), mfcc=mfcc.mean(), rmse=rmse.mean(), sbwm=spectral_bandwidth.mean(), srom=spectral_rolloff.mean(), tempo=tempo) return json.dumps(activity)
os.makedirs(output_dir, access_rights, exist_ok=True) except OSError as exc: if exc.errno != errno.EEXIST: raise print("Creation of the directory %s failed" % output_dir) pass else: print("Successfully created the directory %s" % output_dir) for file in range(0, len(audio_files), 1): y, sr = librosa.load(audio_files[file]) spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr) S, phase = librosa.magphase(librosa.stft(y=y)) if_gram, D = librosa.ifgram(y) plt.figure() plt.subplot(2, 1, 1) plt.semilogy(spec_bw.T, label='Spectral bandwidth') plt.ylabel('Hz') plt.xticks([]) plt.xlim([0, spec_bw.shape[-1]]) plt.legend() plt.subplot(2, 1, 2) librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max), y_axis='log', x_axis='time') plt.title('log Power spectrogram') plt.tight_layout()
ones = np.array([1000] * 100000) #%% ones_spec = librosa.stft(ones) #%% np.all(spectrogram(ones)[2] == 0) #%% spectrogram(ones) #%% reconstr = librosa.istft(ones_spec) #%% igram = librosa.ifgram(ones_spec) #%% plot individual channels over same time period plt.figure() plt.plot(giraffe.subm_raw.loc[start_lf:end_lf]) plt.title("Submeter channels") #%% initial view of data # plot total power plt.figure() plt.plot(instant_power[0:2000]) plt.title("Aggregate instant power") #%% load in super meta data (latest one correct acc.Lionel) import json with open(
plt.title('Sevince - Erkin Koray') plt.tight_layout() cent = librosa.feature.spectral_centroid(y=y1, sr=22050) cent # array([[ 4382.894, 626.588, ..., 5037.07 , 5413.398]]) # From spectrogram input: S, phase = librosa.magphase(librosa.stft(y=y1)) librosa.feature.spectral_centroid(S=S) # array([[ 4382.894, 626.588, ..., 5037.07 , 5413.398]]) # Using variable bin center frequencies: if_gram, D = librosa.ifgram(y1) librosa.feature.spectral_centroid(S=np.abs(D), freq=if_gram) # array([[ 4420.719, 625.769, ..., 5011.86 , 5221.492]]) # Plot the result import matplotlib.pyplot as plt plt.figure() plt.subplot(2, 1, 1) plt.semilogy(cent.T, label='Spectral centroid') plt.ylabel('Hz') plt.xticks([]) plt.xlim([0, cent.shape[-1]]) plt.legend() plt.subplot(2, 1, 2) librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max),
def clean2(y, sr): frequencies, D = librosa.ifgram(y, sr=sr) top = abs(D).max() / 30 D[abs(D) < top] = 0 y = librosa.istft(D) return y
def feature(y, sr=22050): feat = [] # y, sr = librosa.load(f) # specgram(np.array(X), Fs=22050) # print("loaded {} data with {} hz".format(len(y), sr)) # set the hop length, at 22050 hz, 512 samples ~= 23ms hop_length = 512 # normalize y_norm = librosa.util.normalize(y, norm=2) # time t = float(len(y)) / float(sr) # average energy in second avg_energy = float(np.sum(y_norm**2)) / t if DEBUG: print('avg_energy: {}'.format(avg_energy)) # zero crossing # z = librosa.zero_crossings(y_norm) # z_num = len(z[z==True]) # if DEBUG: # print('zero crossing num: {}'.format(z_num)) # zero-crossing rate z = librosa.feature.zero_crossing_rate(y_norm) z_mean = np.mean(z) if DEBUG: print('zero crossing rate: {}'.format(z.shape)) feat.extend([avg_energy, z_mean]) # compute stft and turn to db # D = librosa.amplitude_to_db(librosa.stft(norm_y), ref=np.max) if_gram, D = librosa.ifgram(y=y_norm, sr=sr, n_fft=2048, hop_length=hop_length) S, phase = librosa.magphase(D) # rms rms = librosa.feature.rmse(S=S) if DEBUG: print('rms shape: {}'.format(rms.shape)) feat.append(np.mean(rms)) # roll-off rolloff = librosa.feature.spectral_rolloff(S=S, sr=sr) if DEBUG: print('roll-off shape: {}'.format(rolloff.shape)) feat.append(np.mean(rolloff)) # centroid cent = librosa.feature.spectral_centroid(S=np.abs(D), freq=if_gram) if DEBUG: print('spectrum centroid shape: {}'.format(cent.shape)) feat.append(np.mean(cent)) # spectral bandwidth spec_bw = librosa.feature.spectral_bandwidth(S=np.abs(D), freq=if_gram) if DEBUG: print('spectral_bandwidth shape: {}'.format(spec_bw.shape)) feat.append(np.mean(spec_bw)) # tonnetz # y_harmonic = librosa.effects.harmonic(y_norm) # tonnetz = librosa.feature.tonnetz(y=y, sr=sr) # if DEBUG: # print('tonnetz shape: {}'.format(tonnetz.shape)) # feat.extend(list(np.mean(tonnetz, axis=1))) # chroma cqt chroma_cq = librosa.feature.chroma_cqt(y=y_norm, sr=sr, n_chroma=12) if DEBUG: print('chroma cqt shape: {}'.format(chroma_cq.shape)) feat.extend(list(np.mean(chroma_cq, axis=1))) # Chroma cens chroma_cens = librosa.feature.chroma_cens(y=y_norm, sr=sr, n_chroma=12) if DEBUG: print('chroma cens shape: {}'.format(chroma_cens.shape)) feat.extend(list(np.mean(chroma_cens, axis=1))) # estimate global tempo oenv = librosa.onset.onset_strength(y=y_norm, sr=sr, hop_length=hop_length) # tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=sr, hop_length=hop_length) # ac_global = librosa.autocorrelate(oenv, max_size=tempogram.shape[0]) # ac_global = librosa.util.normalize(ac_global) # estimate tempo tempo = librosa.beat.tempo(onset_envelope=oenv, sr=sr, hop_length=hop_length)[0] if DEBUG: print("tempo: {}".format(tempo)) feat.append(tempo) # compute MFCC features from the raw signal # MEL = librosa.feature.melspectrogram(y=norm_y, sr=sr, hop_length=hop_length) return np.array(feat)
import math import librosa import numpy as np import matplotlib.pyplot as plt import librosa.display from music_path import music_path filename = music_path() y, sr = librosa.load(filename, sr=None) o_envs = librosa.onset.onset_strength(y, sr=sr) times = librosa.frames_to_time(np.arange(len(o_envs)), sr=sr) #瞬时频率 frequencies, D = librosa.ifgram(y, sr=sr) #mu率压缩 xn mu = 100 yn = np.log(1 + mu * xn) / np.log(1 + mu) #插值 cn = [0] * (2 * len(yn) - 1) for i in range(len(yn)): cn[2 * i] = yn[i] #低通滤波 #半波整流