def plot_spec(signal, sr=16000, win_length=None, hop_length=None, return_spec=False): if win_length is None: win_length = int(sr * 0.025) if hop_length is None: hop_length = int(sr * 0.010) Sxx = librosa.core.stft( signal, win_length=win_length, hop_length=hop_length, n_fft=4096 ) spec = librosa.amplitude_to_db(np.abs(Sxx), ref=np.max) specshow( spec, sr=sr, x_axis='time', y_axis='hz', cmap='gray_r' ) plt.colorbar(format='%+2.0f dB') if return_spec: return spec
def get_spectrograms(sound_file): '''Returns normalized log(melspectrogram) and log(magnitude) from `sound_file`. Args: sound_file: A string. The full path of a sound file. Returns: mel: A 2d array of shape (T, n_mels) <- Transposed mag: A 2d array of shape (T, 1+n_fft/2) <- Transposed ''' # Loading sound file y, sr = librosa.load(sound_file, sr=hp.sr) # Trimming y, _ = librosa.effects.trim(y) # Preemphasis y = np.append(y[0], y[1:] - hp.preemphasis * y[:-1]) # stft linear = librosa.stft(y=y, n_fft=hp.n_fft, hop_length=hp.hop_length, win_length=hp.win_length) # magnitude spectrogram mag = np.abs(linear) # (1+n_fft//2, T) # mel spectrogram mel_basis = librosa.filters.mel(hp.sr, hp.n_fft, hp.n_mels) # (n_mels, 1+n_fft//2) mel = np.dot(mel_basis, mag) # (n_mels, t) # Sequence length done = np.ones_like(mel[0, :]).astype(np.int32) # to decibel mel = librosa.amplitude_to_db(mel) mag = librosa.amplitude_to_db(mag) # normalize mel = np.clip((mel - hp.ref_db + hp.max_db) / hp.max_db, 0, 1) mag = np.clip((mag - hp.ref_db + hp.max_db) / hp.max_db, 0, 1) # Transpose mel = mel.T.astype(np.float32) # (T, n_mels) mag = mag.T.astype(np.float32) # (T, 1+n_fft//2) return mel, done, mag
def process(self, y, sample_rate): X = librosa.feature.melspectrogram( y, sr=sample_rate, n_mels=self.n_mels, n_fft=self.n_fft_, hop_length=self.hop_length_, power=2.0) return librosa.amplitude_to_db(X, ref=1.0, amin=1e-5, top_db=80.0)
def test_sharex_waveplot_ms(): # Correct time range ~= 4.6 s or 4600ms # Due to shared x_axis, both plots are plotted in 'ms'. plt.figure(figsize=(8, 8)) ax = plt.subplot(2, 1, 1) librosa.display.waveplot(y, sr) plt.subplot(2, 1, 2, sharex=ax) librosa.display.specshow(librosa.amplitude_to_db(S_abs, ref=np.max), x_axis='ms')
def plot_log_power_specgram(sound_names, raw_sounds): i = 1 for n, f in zip(sound_names, raw_sounds): plt.subplot(10, 1, i) D = librosa.amplitude_to_db(np.abs(librosa.stft(f))**2, ref=np.max) librosa.display.specshow(D, x_axis='time', y_axis='log') plt.title(n.title()) i += 1 plt.suptitle("Figure 3: Log power spectrogram", x=0.5, y=0.915, fontsize=18) plt.show()
def test_amplitude_to_db(): srand() NOISE_FLOOR = 1e-6 # Make some noise x = np.abs(np.random.randn(1000)) + NOISE_FLOOR db1 = librosa.amplitude_to_db(x, top_db=None) db2 = librosa.logamplitude(x**2, top_db=None) assert np.allclose(db1, db2)
def test_db_to_amplitude(): srand() NOISE_FLOOR = 1e-6 # Make some noise x = np.abs(np.random.randn(1000)) + NOISE_FLOOR db = librosa.amplitude_to_db(x, top_db=None) x2 = librosa.db_to_amplitude(db) assert np.allclose(x, x2)
def dynamic_spectrogram(data, filename, block_nb=0, ref=np.max, display=False): """ Compute the spectrogram of a time serie of samples. The dynamic spectrogram is obtained by computing the the signal in the frequency domain and display the spectrogram. Args: data (array): 1D array of audio data. display (bool): Boolean to plot or save the current spectrogram. Returns: None Todo: - remove the padding/margin around the plot - Add a path and a name where to save the plots """ data_freq = librosa.stft(data) data_freq_db = librosa.amplitude_to_db(data_freq, ref=ref) librosa.display.specshow(data_freq_db) if display: plt.ylabel('Frequency [Hz]') plt.xlabel('Time [samples]') plt.show() else: spec_path = utils.read_config('path', 'spectrograms') fname = os.path.splitext(os.path.basename(filename)) fig_path = utils.create_filename( spec_path, 'png', fname[0], 'dynamic', block_nb) plt.savefig(fig_path)
header += "label" header_list = header.split(",") #Dataframe vacío features_set = pd.DataFrame(np.nan, index=range(len(onlyfiles)), columns=header_list) #Extraemos los features y rellenamos el dataframe id = 0 for genre in clases: for file in os.listdir(audio_path + genre): song = audio_path + genre + "/" + file y, sr = librosa.load(song, mono=True, duration=30) stft = librosa.stft(y) stft_db = librosa.amplitude_to_db(abs(stft)) spectogram = np.abs(librosa.stft(y)) tempo, beats = librosa.beat.beat_track(y=y, sr=sr) chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr) chroma_cqt = librosa.feature.chroma_cqt(y=y, sr=sr) chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr) melspectrogram = librosa.feature.melspectrogram(y=y, sr=sr) rms = librosa.feature.rms(S=spectogram) spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr) spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr) spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr) spectral_flatness = librosa.feature.spectral_flatness(y=y) spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr) poly_features = librosa.feature.poly_features(y=y, sr=sr) tonnetz = librosa.feature.tonnetz(y=y, sr=sr) zero_crossing_rate = librosa.feature.zero_crossing_rate(y=y)
def processMusic(socketID, melBins, core): mp3Path = "./Website/uploads/" + socketID + ".mp3" # script setup and housekeeping for BEATS PROCESSING # melBins = int(melBins) frameRate = 24 sampleRate = 1000 * frameRate sout("") sout("") sout("Loading song into pyBlender.....") # load music into librosa # y, sr = librosa.load(mp3Path, sr=sampleRate) sout("") sout("**Loading Finished**") sout("") time.sleep(1) sout("") sout("") sout("Tempo/Beats Processing Engine") sout("--------------") sout("") # setup librosa functions/processing # tempo, beats = librosa.beat.beat_track(y=y, sr=sampleRate) beatArray = librosa.frames_to_time(beats, sr=sr) # sout out some information about what we're working with # sout("Beats: " + str(len(beats))) sout("Tempo: " + str(int(tempo))) # setup, fill, and save time and frame values for beat info # frameArray = [] for i in beatArray: i = i * 100 i = int(i) i = float(i) i = i / 100 frameArray.append(int(i*24)) np.savetxt("./Website/downloads/" + socketID + ".bts", frameArray) # confirm script execution # sout("") sout("**Frame Array Construction Finished**") sout("") time.sleep(2) # script setup and housekeeping for VOLUME PROCESSING # samplesPerFrame = 2 frameRate = 24 sampleRate = int(1000 * frameRate) sampleHop = int((sampleRate/frameRate)/samplesPerFrame) scaleFactor = 100/80 sout("") sout("") sout("Volume Processing Engine") sout("--------------") sout("") # setup librosa functions/processing # S = librosa.feature.melspectrogram(y=y, sr=sampleRate, n_mels=1, fmax=8000, hop_length = sampleHop) librosaMel = librosa.amplitude_to_db(S, ref=np.max) # sout out some information about what we're working with samples = len(librosaMel[0]) sout("Samples: " + str(samples)) frames = samples/samplesPerFrame sout("Anim. Frames: " + str(frames)) sout("Samples/Frame: " + str(samplesPerFrame)) sout("Seconds: " + str(frames/frameRate)) # convert the bin based Mel spectrogram array to a time based array # timeArray = [] for q in range(samples): tmpArry = [] for r in range(1): tmpValue = ((librosaMel[r])[q])+80 tmpArry.append(int(tmpValue*scaleFactor)) timeArray.append(tmpArry) # downconvert the time based array into an animation frame array # frameArray = [] for q in range(int(frames)): tmpArry = [] for r in range(1): tempValue = 0 for s in range(samplesPerFrame): tmpValue = tempValue + ((timeArray[(2*q)+s])[r]) tmpValue = tmpValue / samplesPerFrame tmpArry.append(int(tmpValue)) frameArray.append(tmpArry) np.savetxt("./Website/downloads/" + socketID + ".vol", frameArray) # confirm script execution # sout("") sout("**Frame Array Construction Finished**") sout("") time.sleep(2) # script setup and housekeeping for MEL BINS PROCESSING # samples = 0 frames = 0 samplesPerFrame = 2 frameRate = 24 sampleRate = 1000 * frameRate sampleHop = int((sampleRate/frameRate)/samplesPerFrame) scaleFactor = 100/80 sout("") sout("") sout("Mel Spectrogram Processing Engine") sout("--------------") # setup librosa functions/processing # Q = librosa.feature.melspectrogram(y=y, sr=sampleRate, n_mels=melBins, fmax=8000, hop_length = sampleHop) librosaMel = librosa.amplitude_to_db(Q, ref=np.max) # sout out some information about what we're working with # samples = len(librosaMel[0]) sout("Samples: " + str(samples)) frames = samples/samplesPerFrame sout("Anim. Frames: " + str(frames)) sout("Samples/Frame: " + str(samplesPerFrame)) sout("Seconds: " + str(frames/frameRate)) sout("Mel Bins: " + str(melBins)) # convert the bin based Mel spectrogram array to a time based array # timeArray = [] for q in range(samples): tmpArry = [] for r in range(melBins): tmpValue = ((librosaMel[r])[q])+80 tmpArry.append(int(tmpValue*scaleFactor)) timeArray.append(tmpArry) # downconvert the time based array into an animation frame array # frameArray = [] for q in range(int(frames)): tmpArry = [] for r in range(melBins): tempValue = 0 for s in range(samplesPerFrame): tmpValue = tempValue + ((timeArray[(2*q)+s])[r]) tmpValue = tmpValue / samplesPerFrame tmpArry.append(int(tmpValue)) frameArray.append(tmpArry) np.savetxt("./Website/downloads/" + socketID + ".mel", frameArray) # confirm script execution # sout("") sout("**Frame Array Construction Finished**") sout("") time.sleep(2) # setup and housekeeping for CORE PROCESSING # sout("") sout("") sout("Scene\\Render Core Fusion Engine") sout("--------------") sout("") scenePath = "./PyBlender/Scripts/SceneSetup.txt" corePath = "./PyBlender/Scripts/RenderingCores/" + core + ".txt" tailPath = "./PyBlender/Scripts/SceneTail.txt" #TODO - Fix this missing file from project scriptPath = "./Website/downloads/" + socketID + ".brs" sout("Scene will be rendered with core:") sout(corePath) filenames = [scenePath, corePath, tailPath] with open(scriptPath, 'w') as outputFile: for file in filenames: with open(file) as inputFile: outputFile.write(inputFile.read().replace(socketID, "%SID%")) # confirm script execution # sout("") sout("**Render Script Construction Finished**") sout("") time.sleep(2)
energy.append(np.mean(e)) ent = 0.0 m = np.mean(e) for j in range(0,len(e[0])): q = np.absolute(e[0][j] - m) ent = ent + (q * np.log10(q)) entropy_of_energy.append(ent) f_list_1 = [] f_list_1.append(zero_crossings) f_list_1.append(energy) f_list_1.append(entropy_of_energy) f_np_1 = np.array(f_list_1) f_np_1 = np.transpose(f_np_1)[:-1] kmeans = KMeans(n_clusters=2, random_state=0).fit(f_np_1) result=kmeans.predict(f_np_1) D = li.amplitude_to_db(np.abs(li.stft(y)), ref=np.max) plt.subplot(3,1,1) plt.title("Audio Analog Signal") plt.plot(y[1950:2000]) plt.subplot(3,1,2) plt.title("Spectogram") librosa.display.specshow(D, y_axis='linear') plt.colorbar(format='%+2.0f dB') plt.subplot(3,1,3) plt.title("Audio Digital Signal") plt.plot(result, marker='d', color='blue', drawstyle='steps') plt.show() stream.stop_stream() stream.close() audio.terminate()
def __test(ref): db = librosa.amplitude_to_db(xp, ref=ref, top_db=None) xp2 = librosa.db_to_amplitude(db, ref=ref) assert np.allclose(xp, xp2)
####################################### # First, let's plot the original chroma chroma_orig = librosa.feature.chroma_cqt(y=y, sr=sr) # For display purposes, let's zoom in on a 15-second chunk from the middle of the song idx = [slice(None), slice(*list(librosa.time_to_frames([45, 60])))] # And for comparison, we'll show the CQT matrix as well. C = np.abs(librosa.cqt(y=y, sr=sr, bins_per_octave=12*3, n_bins=7*12*3)) plt.figure(figsize=(12, 4)) plt.subplot(2, 1, 1) librosa.display.specshow(librosa.amplitude_to_db(C, ref=np.max)[idx], y_axis='cqt_note', bins_per_octave=12*3) plt.colorbar() plt.subplot(2, 1, 2) librosa.display.specshow(chroma_orig[idx], y_axis='chroma') plt.colorbar() plt.ylabel('Original') plt.tight_layout() ########################################################### # We can correct for minor tuning deviations by using 3 CQT # bins per semi-tone, instead of one chroma_os = librosa.feature.chroma_cqt(y=y, sr=sr, bins_per_octave=12*3)
def display_sample_info(file_path, label=''): """Generate various representations a given audio file. E.g. Mel, MFCC and power spectrogram's. Args: file_path (str): Path to the audio file. label (str): Optional label to display for the given audio file. Returns: Nothing. """ if not os.path.isfile(file_path): raise ValueError('{} does not exist.'.format(file_path)) # By default, all audio is mixed to mono and resampled to 22050 Hz at load time. y, sr = librosa.load(file_path, sr=None, mono=True) # At 16000 Hz, 512 samples ~= 32ms. At 16000 Hz, 200 samples = 12ms. 16 samples = 1ms @ 16kHz. hop_length = 200 # Number of samples between successive frames e.g. columns if a spectrogram. f_max = sr / 2. # Maximum frequency (Nyquist rate). f_min = 64. # Minimum frequency. n_fft = 1024 # Number of samples in a frame. n_mels = 80 # Number of Mel bins to generate. n_mfcc = 13 # Number of Mel cepstral coefficients to extract. win_length = 333 # Window length. # Create info string. num_samples = y.shape[0] duration = librosa.get_duration(y=y, sr=sr) info_str_format = 'Label: {}\nPath: {}\nDuration={:.3f}s with {:,d} Samples\n' \ 'Sampling Rate={:,d} Hz\nMin, Max=[{:.2f}, {:.2f}]' info_str = info_str_format.format(label, file_path, duration, num_samples, sr, np.min(y), np.max(y)) print(info_str) # Escape some LaTeX special characters info_str_tex = info_str.replace('_', '\\_') plt.figure(figsize=(10, 7)) plt.subplot(3, 1, 1) display.waveplot(y, sr=sr) plt.title('Monophonic') # Plot waveforms. y_harm, y_perc = librosa.effects.hpss(y) plt.subplot(3, 1, 2) display.waveplot(y_harm, sr=sr, alpha=0.33) display.waveplot(y_perc, sr=sr, color='r', alpha=0.40) plt.title('Harmonic and Percussive') # Add file information. plt.subplot(3, 1, 3) plt.axis('off') plt.text(0.0, 1.0, info_str_tex, color='black', verticalalignment='top') plt.tight_layout() # Calculating MEL spectrogram and MFCC. db_pow = np.abs( librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length))**2 s_mel = librosa.feature.melspectrogram(S=db_pow, sr=sr, hop_length=hop_length, fmax=f_max, fmin=f_min, n_mels=n_mels) s_mel = librosa.power_to_db(s_mel, ref=np.max) s_mfcc = librosa.feature.mfcc(S=s_mel, sr=sr, n_mfcc=n_mfcc) # STFT (Short-time Fourier Transform) # https://librosa.github.io/librosa/generated/librosa.core.stft.html plt.figure(figsize=(12, 10)) db = librosa.amplitude_to_db(librosa.magphase(librosa.stft(y))[0], ref=np.max) plt.subplot(3, 2, 1) display.specshow(db, sr=sr, x_axis='time', y_axis='linear', hop_length=hop_length) plt.colorbar(format='%+2.0f dB') plt.title('Linear-frequency power spectrogram') plt.subplot(3, 2, 2) display.specshow(db, sr=sr, x_axis='time', y_axis='log', hop_length=hop_length) plt.colorbar(format='%+2.0f dB') plt.title('Log-frequency power spectrogram') plt.subplot(3, 2, 3) display.specshow(s_mfcc, sr=sr, x_axis='time', y_axis='linear', hop_length=hop_length) plt.colorbar(format='%+2.0f dB') plt.title('MFCC spectrogram') # # CQT (Constant-T Transform) # # https://librosa.github.io/librosa/generated/librosa.core.cqt.html cqt = librosa.amplitude_to_db(librosa.magphase(librosa.cqt(y, sr=sr))[0], ref=np.max) # plt.subplot(3, 2, 3) # display.specshow(cqt, sr=sr, x_axis='time', y_axis='cqt_note', hop_length=hop_length) # plt.colorbar(format='%+2.0f dB') # plt.title('Constant-Q power spectrogram (note)') plt.subplot(3, 2, 4) display.specshow(cqt, sr=sr, x_axis='time', y_axis='cqt_hz', hop_length=hop_length) plt.colorbar(format='%+2.0f dB') plt.title('Constant-Q power spectrogram (Hz)') plt.subplot(3, 2, 5) display.specshow(db, sr=sr, x_axis='time', y_axis='log', hop_length=hop_length) plt.colorbar(format='%+2.0f dB') plt.title('Log power spectrogram') plt.subplot(3, 2, 6) display.specshow(s_mel, x_axis='time', y_axis='mel', hop_length=hop_length) plt.colorbar(format='%+2.0f dB') plt.title('Mel spectrogram') # TODO Import project used features (python_speech_features). # norm_features = 'none' # mfcc = load_sample(file_path, feature_type='mfcc', feature_normalization=norm_features)[0] # mfcc = np.swapaxes(mfcc, 0, 1) # # mel = load_sample(file_path, feature_type='mel', feature_normalization=norm_features)[0] # mel = np.swapaxes(mel, 0, 1) (__sr, __y) = wavfile.read(file_path) num_features = 26 win_len = WIN_LENGTH win_step = WIN_STEP __mel = psf.logfbank(signal=__y, samplerate=__sr, winlen=win_len, winstep=win_step, nfilt=num_features, nfft=n_fft, lowfreq=f_min, highfreq=f_max, preemph=0.97) __mfcc = psf.mfcc(signal=__y, samplerate=__sr, winlen=win_len, winstep=win_step, numcep=num_features // 2, nfilt=num_features, nfft=n_fft, lowfreq=f_min, highfreq=f_max, preemph=0.97, ceplifter=22, appendEnergy=False) __mfcc = __mfcc.astype(np.float32) __mel = __mel.astype(np.float32) __mfcc = np.swapaxes(__mfcc, 0, 1) __mel = np.swapaxes(__mel, 0, 1) plt.figure(figsize=(5.2, 1.6)) display.waveplot(y, sr=sr) fig = plt.figure(figsize=(10, 4)) plt.subplot(2, 1, 2) display.specshow(__mfcc, sr=__sr, x_axis='time', y_axis='mel', hop_length=win_step * __sr) # plt.set_cmap('magma') # plt.xticks(rotation=295) plt.xlabel('Time (s)') plt.ylabel('Frequency (Hz)') plt.xlim(xmin=0) plt.ylim(0, 8000) plt.colorbar(format='%+2.0f') plt.title('MFCC', visible=False) plt.subplot(2, 1, 1) display.specshow(__mel, sr=__sr, x_axis='time', y_axis='mel', hop_length=win_step * __sr) # plt.set_cmap('magma') # plt.xticks(rotation=295) plt.xlabel('Time (s)') plt.ylabel('Frequency (Hz)') plt.xlim(xmin=0) plt.ylim(0, 8000) plt.colorbar(format='%+2.0f', label='Power (dB)') plt.title('Mel Spectrogram', visible=False) plt.tight_layout() fig.savefig('/tmp/mel-mfcc-plot-we-did-it.pdf', bbox_inches='tight') plt.show()
def main(): main_name = "chAngE.wav" inst_name = "chAngE_inst.wav" plt.figure(figsize=(10, 15)) main_wav, sr = librosa.load(main_name) print("file_name:{}, sr:{}".format(main_name, sr)) print(main_wav.shape) inst_wav, sr = librosa.load(inst_name) print("file_name:{}, sr:{}".format(inst_name, sr)) print(inst_wav.shape) main_power_spec = np.abs(librosa.stft(main_wav)) print("power_spec.shape:", main_power_spec.shape) inst_power_spec = np.abs(librosa.stft(inst_wav)) print("power_spec.shape:", inst_power_spec.shape) plt.subplot(3, 1, 1) # (row, colum, num) librosa.display.specshow(librosa.amplitude_to_db(main_power_spec, ref=np.max), y_axis='log', x_axis='time') wav_title = "main_Power_spectrogram" plt.title(wav_title) plt.colorbar(format='%+2.0f dB') plt.subplot(3, 1, 2) # (row, colum, num) librosa.display.specshow(librosa.amplitude_to_db(inst_power_spec, ref=np.max), y_axis='log', x_axis='time') wav_title = "inst_Power_spectrogram" plt.title(wav_title) plt.colorbar(format='%+2.0f dB') main_len = main_power_spec.shape[1] inst_len = inst_power_spec.shape[1] if main_len > inst_len: diff_len = inst_len else: diff_len = main_len print diff_power_spec = [] for i in range(diff_len): diff = main_power_spec.T[i] - inst_power_spec.T[i] # print("diff.shape:", diff.shape) diff_power_spec.append(diff) diff_power_spec = np.array(diff_power_spec).T print("diff_power_spec.shape:", diff_power_spec.shape) plt.subplot(3, 1, 3) # (row, colum, num) librosa.display.specshow(librosa.amplitude_to_db(diff_power_spec, ref=np.max), y_axis='log', x_axis='time') wav_title = "diff_Power_spectrogram" plt.title(wav_title) plt.colorbar(format='%+2.0f dB') plt.tight_layout() plt.savefig(wav_title + ".jpg") plt.clf() inv_wav = librosa.core.istft(main_power_spec) diff_wav = librosa.core.istft(diff_power_spec) librosa.output.write_wav('inv.wav', inv_wav, sr) librosa.output.write_wav('diff.wav', diff_wav, sr)
""" Created on Fri Mar 8 08:41:08 2019 @author: MR toad """ import librosa import os import numpy as np import librosa.display import matplotlib.pyplot as plt #from PIL import Image file_path = 'E:/speech/' mfcc_path = 'E:/mfcc/' pic_path = 'E:/pic' file_name_list = os.listdir(file_path) for file_name in file_name_list: y, sr = librosa.load(file_path + file_name) mfcc_feature = librosa.feature.mfcc(y=y, sr=sr) np.save(mfcc_path + file_name.split('.')[0] + ".npy", mfcc_feature) plt.figure(figsize=(12, 8)) D = librosa.amplitude_to_db(librosa.stft(y), ref=np.max) plt.subplot(4, 2, 1) librosa.display.specshow(D, y_axis='linear') plt.colorbar(format='%+2.0f dB') plt.title('Linear-frequency power spectrogram') plt.savefig(file_name.split('.')[0] + ".png", dpi=300)
img = np.zeros(chromagram.shape, dtype=np.float32) w, h = chromagram.shape for x in range(h): # img.item(x, c_max[x], 0) img.itemset((c_max[x], x), 1) return img #y, sr = load_and_trim('F:/项目/花城音乐项目/样式数据/ALL/旋律/1.31MP3/旋律1.100分.wav') y, sr = load_and_trim(filename) # silence_threshold = 0.2 # need_vocal_separation = check_need_vocal_separation(y, silence_threshold) # if need_vocal_separation: # y, sr = get_foreground(y, sr) # 分离前景音 CQT = librosa.amplitude_to_db(librosa.cqt(y, sr=16000), ref=np.max) w, h = CQT.shape CQT[50:w, :] = -100 CQT[0:20, :] = -100 # 标准节拍时间点 type_index = get_onsets_index_by_filename(filename) total_frames_number = get_total_frames_number(filename) # base_frames = onsets_base_frames_rhythm(type_index,total_frames_number) base_frames = onsets_base_frames(codes[type_index], total_frames_number) base_onsets = librosa.frames_to_time(base_frames, sr=sr) first_frame = base_frames[1] - base_frames[0] rms = librosa.feature.rmse(y=y)[0] rms = [x / np.std(rms) for x in rms] min_waterline = find_min_waterline(rms, 8)
# perform stft stft = librosa.stft(signal, n_fft=n_fft, hop_length=hop_length) # calculate abs values on complex numbers to get magnitude spectrogram = np.abs(stft) # display spectrogram plt.figure(figsize=FIG_SIZE) librosa.display.specshow(spectrogram, sr=sample_rate, hop_length=hop_length) plt.xlabel("Time") plt.ylabel("Frequency") plt.colorbar() plt.title("Spectrogram") # apply logarithm to cast amplitude to Decibels log_spectrogram = librosa.amplitude_to_db(spectrogram) plt.figure(figsize=FIG_SIZE) librosa.display.specshow(log_spectrogram, sr=sample_rate, hop_length=hop_length) plt.xlabel("Time") plt.ylabel("Frequency") plt.colorbar(format="%+2.0f dB") plt.title("Spectrogram (dB)") # MFCCs # extract 13 MFCCs MFCCs = librosa.feature.mfcc(signal, sample_rate, n_fft=n_fft, hop_length=hop_length,
def extract_features(dataset='train', n_fft=512, hop_length=128, n_mels=40, dct_type=3): f = open(data_path + dataset + '_list.txt', 'r') i = 0 for file_name in f: # progress check i = i + 1 if not (i % 10): print i # load audio file file_name = file_name.rstrip('\n') file_path = data_path + file_name #print file_path y, sr = librosa.load(file_path, sr=SAMPLING_RATE) # mel-scaled spectrogram mel_S = librosa.feature.melspectrogram(y, sr=SAMPLING_RATE, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels, fmin=0.0, fmax=8000) #log compression log_mel_S = librosa.power_to_db(mel_S) # mfcc (DCT) mfcc = librosa.feature.mfcc(S=log_mel_S, dct_type=dct_type, n_mfcc=MFCC_DIM) mfcc = mfcc.astype(np.float32) # to save the memory (64 to 32 bits) # constant-q transform C = np.abs(librosa.core.cqt(y, sr=sr)) log_cqt = librosa.amplitude_to_db(C, ref=np.max) mfcc_cqt = librosa.feature.mfcc(S=log_cqt, dct_type=dct_type, n_mfcc=MFCC_DIM) mfcc_cqt = mfcc_cqt.astype(np.float32) # Zero crossing rate zcr = librosa.feature.zero_crossing_rate(y, frame_length=n_fft, hop_length=hop_length) # STFT S, phase = librosa.magphase( librosa.core.stft(y, n_fft=n_fft, hop_length=hop_length, win_length=n_fft)) # Spectral centroid s_centroid = librosa.feature.spectral_centroid(S=S) # Spectral rolloff s_rolloff = librosa.feature.spectral_rolloff(S=S, sr=sr, roll_percent=0.95) # Spectral flatness s_flatness = librosa.feature.spectral_flatness(S=S) # Spectral bandwidth s_bandwidth = librosa.feature.spectral_bandwidth(S=S) # Spectral contrast s_contrast = librosa.feature.spectral_contrast(S=S) # Chromagram crm_stft = librosa.feature.chroma_stft(S=S, sr=sr) crm_cqt = librosa.feature.chroma_cqt(C=C, sr=sr) crm_cens = librosa.feature.chroma_cens(C=C, sr=sr) rms = librosa.feature.rms(S=S) tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(y), sr=sr) features = [ mfcc, mfcc_cqt, zcr, s_centroid, s_rolloff, s_flatness, s_bandwidth, s_contrast, crm_stft, crm_cqt, crm_cens, rms, tonnetz ] # save mfcc as a file file_name = file_name.replace('.wav', '.npy') for feature in range(len(features)): save_file = './rms_tonnetz' + str(n_fft) + '/' + str( hop_length) + '/' + str(n_mels) + '/' + str( dct_type) + '/' + feature_paths[feature] + file_name if not os.path.exists(os.path.dirname(save_file)): os.makedirs(os.path.dirname(save_file)) np.save(save_file, features[feature]) f.close()
# harmony, perceptr harmony, perceptr = librosa.effects.hpss(y) harmony_mean = np.mean(harmony) harmony_var = np.var(harmony) perceptr_mean = np.mean(perceptr) perceptr_var = np.var(perceptr) # print(harmony_mean,harmony_var,perceptr_mean,perceptr_var) # tempo tempo, _ = librosa.beat.beat_track(y, sr=sr) # print(tempo) # MFCCs S = librosa.feature.melspectrogram(y, sr=sr) S_DB = librosa.amplitude_to_db(S, ref=np.max) D = np.abs(librosa.stft(y, n_fft=2048, win_length=2048, hop_length=512)) mfcc = librosa.feature.mfcc(y, sr=sr, S=librosa.power_to_db(D), n_mfcc=20) print(np.mean(mfcc[0])) # -11.3112 mfcc1_mean = np.mean(mfcc[0]) mfcc1_var = np.var(mfcc[0]) mfcc2_mean = np.mean(mfcc[1]) mfcc2_var = np.var(mfcc[1]) mfcc3_mean = np.mean(mfcc[2]) mfcc3_var = np.var(mfcc[2]) mfcc4_mean = np.mean(mfcc[3]) mfcc4_var = np.var(mfcc[3]) mfcc5_mean = np.mean(mfcc[4]) mfcc5_var = np.var(mfcc[4]) mfcc6_mean = np.mean(mfcc[5])
device="default", channels=1, samplerate=samplingrate, callback=audio_callback ) sound_name = input("name of sound (only use lowercase chars and underscore): ") with stream: print("listening...") while True: if len(v) >= recording_size: # trim v size to be exactly recording_size v = v[-recording_size:] y = np.asarray(v, dtype=np.float32) mel_spec = librosa.feature.melspectrogram(y=y, sr=samplingrate) mel_spec_db = librosa.amplitude_to_db(mel_spec, ref=np.max) # normalize data to make each value on a scale between -1 and 1 normalized = librosa.util.normalize(mel_spec_db) # between 0 and 255 normalized = [[(v + 1) * 255 / 2 for v in row] for row in normalized] # show preview # plt.imshow(normalized) # plt.show() import os import binascii fname = binascii.hexlify(os.urandom(8)) f = f"./datasets/sounds-{int(recording_len*100)}ms/{sound_name}/{fname.decode('utf-8')}.jpg" res = cv2.imwrite(f, np.array(normalized)) print(f"saved {f}")
my_dpi = 120 for index, row in speakers_filtered.iterrows(): dir_ = root + '/' + row['SUBSET'] + '/' + str(row['ID']) + '/' print('working on df row {}, spaker {}'.format(index, row['CODE'])) if not os.path.exists(dir_): print('dir {} not exists, skipping'.format(dir_)) continue files_iter = Path(dir_).glob('**/*.flac') files_ = [str(f) for f in files_iter] for f in files_: ay, sr = librosa.load(f) duration = ay.shape[0] / sr start = 0 while start + 5 < duration: slice_ = ay[start * sr:(start + 5) * sr] start = start + 5 - 1 x = librosa.stft(slice_) xdb = librosa.amplitude_to_db(abs(x)) plt.figure(figsize=(227 / my_dpi, 227 / my_dpi), dpi=my_dpi) plt.axis('off') librosa.display.specshow(xdb, sr=sr, x_axis='time', y_axis='log') plt.savefig(root + '/train-gram/' + str(row['CODE']) + '/' + uuid.uuid4().hex + '.png', dpi=my_dpi) plt.close() print('work done on index {}, speaker {}'.format(index, row['CODE']))
S = S.cuda() net = NMFD(S.shape, T, n_components=R).cuda() #net = NMF(S.shape, n_components=R, max_iter=max_iter, verbose=True, beta_loss=2).cuda() niter, V = net.fit_transform(S, verbose=True, beta_loss=1.5, max_iter=max_iter, alpha=0.5, l1_ratio=0.2) net.sort() W = net.W H = net.H plt.subplot(3, 1, 1) display.specshow(librosa.amplitude_to_db(W.detach().cpu().numpy().mean(2), ref=np.max), y_axis='log', sr=sr) plt.title('Template ') plt.subplot(3, 1, 2) display.specshow(H.detach().cpu().numpy(), x_axis='time', hop_length=1024, sr=sr) plt.colorbar() plt.title('Activations') plt.subplot(3, 1, 3) display.specshow(librosa.amplitude_to_db(V.detach().cpu().numpy(), ref=np.max), y_axis='log', x_axis='time',
def compute_cqt(self): c = librosa.cqt(self.audio_sample, sr=self.sr, hop_length=self.hop_length, fmin=None, n_bins=self.n_bins, res_type='fft') c_mag = librosa.magphase(c)[0] ** self.mag_exp cdb = librosa.amplitude_to_db(c_mag, ref=np.max) return cdb
plt.subplot(1, 2, 1) plt.plot(x1, y1, label="PESQ ") plt.xlabel('Number of neurons') plt.ylabel('PESQ') plt.subplot(1, 2, 2) plt.plot(x1, y2, label="STOI") plt.xlabel('Number of neurons') plt.ylabel('STOI') y, sr = librosa.load( "D:/UB_MS/Thesis/Seprating speech signals/DataSet/one_noise_dataset/noisy_test/noisy462.wav", sr=16000) plt.figure(figsize=(12, 8)) D1 = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length)) D1 = librosa.amplitude_to_db(D1, ref=np.max) y, sr = librosa.load( "D:/UB_MS/Thesis/Seprating speech signals/DataSet/set/test_clean/clean462.wav", sr=16000) plt.figure(figsize=(12, 8)) D2 = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length)) D2 = librosa.amplitude_to_db(D2, ref=np.max) y, sr = librosa.load( "D:/UB_MS/Thesis/Seprating speech signals/DataSet/Results/one_noise_data/seq2seq/256/enhanced_462.wav", sr=16000) plt.figure(figsize=(12, 8)) D3 = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length)) D3 = librosa.amplitude_to_db(D3, ref=np.max)
f1 = f1_score(y_test, y_pred) print("accuracy : \t", accuracy) print("recall : \t", recall) print("precision : \t", precision) print("f1 : \t", f1) # predict 데이터 pred_pathAudio = 'C:/nmb/nmb_data/pred_voice/' files = librosa.util.find_files(pred_pathAudio, ext=['wav']) files = np.asarray(files) for file in files: y, sr = librosa.load(file, sr=22050) pred_mels = librosa.feature.melspectrogram(y, sr=sr, n_fft=512, hop_length=128, n_mels=128) pred_mels = librosa.amplitude_to_db(pred_mels, ref=np.max) pred_mels = pred_mels.reshape(1, pred_mels.shape[0] * pred_mels.shape[1]) # print(pred_mels.shape) # (1, 110336) y_pred = model.predict(pred_mels) # print(y_pred) if y_pred == 0: # label 0 print(file, '여자입니다.') else: # label 1 print(file, '남자입니다.') end_now = datetime.datetime.now() time = end_now - start_now print("time >> ", time) # time >
def calcu(data, sr, n_fft): data = data.astype(np.float) / 32767 data = librosa.stft(data[sr // 4:sr // 4 * 2], n_fft=n_fft) data = np.mean(librosa.amplitude_to_db(np.abs(data)), axis=1) return data
sig, sr = librosa.load(audio_data, sr = 44100) # 3_Extacting Datas From Audio ==================================================================== # 3-1.Onset Envelope | 3-2.Beats | 3-3.Onsets ----------------------------------------------------- onset_frames = librosa.onset.onset_detect(sig, sr = sr) onsets = librosa.frames_to_time(onset_frames, sr = sr) onset_env = librosa.onset.onset_strength(sig, sr = sr, aggregate = np.median) tempo = librosa.beat.tempo(onset_envelope = onset_env, sr = sr) tempo, beats = librosa.beat.beat_track(onset_envelope = onset_env, sr = sr, units = 'time') # 3-4.Frequency & Magnitude ----------------------------------------------------------------------- fft = np.fft.fft(sig) magnitude = np.abs(fft) magnitude_dB = librosa.amplitude_to_db(magnitude) frequency = np.linspace(0, sr, len(magnitude_dB)) left_magnitude_dB = magnitude_dB[:len(magnitude_dB)/2] # certain magnitude(dB) left_frequency = frequency[:len(magnitude_dB)/2] # certain frequency # 4_Preprocessing Datas for Visualization ========================================================= # 4-1.Onset Envelope(propotional to audio length) ------------------------------------------------- E = len(onset_env) x1 = np.random.rand(E) * E y1 = np.random.rand(E) * E n1 = 50 radii_1 = np.random.rand(E) * E / n1 colors_1 = ["#%02x%02x%02x" % (int(r), int(g), 180) for r, g in zip(x1, y1)] # 255,100,37
import librosa import os import matplotlib.pyplot as plt import librosa.display import numpy as np y, sr = librosa.load("sample/UltraCat_-_01_-_Orbiting_the_Earth.mp3") Y = librosa.stft(y) Ydb = librosa.amplitude_to_db(abs(Y)) plt.figure(figsize=(6, 2)) librosa.display.specshow(Ydb, sr=sr, cmap='magma') img_path = "sample/UltraCat_-_01_-_Orbiting_the_Earth.png" plt.savefig(img_path) # save the figure to file plt.close("all")
hop_length = 80 n_sample = 200 amps = [] log_amps = [] dbs = [] for filepath in glob.glob('{}/*.wav'.format(src_path))[:n_sample]: wav = read(filepath, sr, mono=True) spec = librosa.stft(wav, n_fft=n_fft, win_length=win_length, hop_length=hop_length) # (n_fft/2+1, t) amp = np.abs(spec) amps.extend(amp.flatten()) log_amp = np.log(amp) log_amps.extend(log_amp.flatten()) db = librosa.amplitude_to_db(amp) dbs.extend(db.flatten()) amps = np.array(amps) log_amps = np.array(log_amps) dbs = np.array(dbs) mean = np.mean(amps) std = np.std(amps) max = np.max(amps) min = np.min(amps) # mean = np.mean(dbs) # std = np.std(dbs)
def spectrogram_librosa(y, fs, hparams): D = np.abs(_stft(preemphasis(y, hparams), fs, hparams)) S = librosa.amplitude_to_db(D) - hparams.ref_level_db S_norm = _normalize(S, hparams) return S_norm
Note.append(data[i - 1]) count = 1 N = data[i] return Note, Count def Normlize(data, num): if type(data) != list or type(num) != list: return False Data = [] Num = [] for i in range(len(data)): num[i] = num[i] // 10 if num[i] >= 1: Data.append(data[i]) Num.append(num[i]) return Data, Num if __name__ == '__main__': y, rate = loadFile('1.wav', 44100) Time = librosa.get_duration(y, sr=rate) fft = librosa.stft(y, n_fft=1024 * 2) D = librosa.amplitude_to_db(abs(fft), ref=np.max) D = D + 80 data = music2note(D, rate / 2) data, num = getNoteAndNum(data) data, num = Normlize(data, num)
# Decompose D into harmonic and percussive components # # :math:`D = D_\text{harmonic} + D_\text{percussive}` D_harmonic, D_percussive = librosa.decompose.hpss(D) #################################################################### # We can plot the two components along with the original spectrogram # Pre-compute a global reference power from the input spectrum rp = np.max(np.abs(D)) plt.figure(figsize=(12, 8)) plt.subplot(3, 1, 1) librosa.display.specshow(librosa.amplitude_to_db(np.abs(D), ref=rp), y_axis='log') plt.colorbar() plt.title('Full spectrogram') plt.subplot(3, 1, 2) librosa.display.specshow(librosa.amplitude_to_db(np.abs(D_harmonic), ref=rp), y_axis='log') plt.colorbar() plt.title('Harmonic spectrogram') plt.subplot(3, 1, 3) librosa.display.specshow(librosa.amplitude_to_db(np.abs(D_percussive), ref=rp), y_axis='log', x_axis='time') plt.colorbar() plt.title('Percussive spectrogram') plt.tight_layout()
import sklearn.cluster import librosa import librosa.display ############################# # First, we'll load in a song y, sr = librosa.load('audio/Karissa_Hobbs_-_09_-_Lets_Go_Fishin.mp3') ############################################## # Next, we'll compute and plot a log-power CQT BINS_PER_OCTAVE = 12 * 3 N_OCTAVES = 7 C = librosa.amplitude_to_db(librosa.cqt(y=y, sr=sr, bins_per_octave=BINS_PER_OCTAVE, n_bins=N_OCTAVES * BINS_PER_OCTAVE), ref=np.max) plt.figure(figsize=(12, 4)) librosa.display.specshow(C, y_axis='cqt_hz', sr=sr, bins_per_octave=BINS_PER_OCTAVE, x_axis='time') plt.tight_layout() ########################################################## # To reduce dimensionality, we'll beat-synchronous the CQT tempo, beats = librosa.beat.beat_track(y=y, sr=sr, trim=False) Csync = librosa.util.sync(C, beats, aggregate=np.median) # For plotting purposes, we'll need the timing of the beats
def audio_preprocessing(): import librosa import librosa.display # import IPython.display import numpy as np import matplotlib.pyplot as plt import matplotlib as mpl import matplotlib.font_manager as fm import glob from IPython import get_ipython import os, errno # get_ipython().run_line_magic('matplotlib', 'inline') FIG_SIZE = (15, 10) FOLDER = "mfcc/" try: if not (os.path.isdir(FOLDER)): os.makedirs(os.path.join(FOLDER)) except OSError as e: if e.errno != errno.EEXIST: print("Failed to create directory!!!!!") raise folder_list = glob.glob("audio_data") print(folder_list) print(len(folder_list)) file_list = [] for i in range(len(folder_list)): file = glob.glob(folder_list[i] + "/*") file_list.append(file) sig = [None] * len(file_list[i]) sr = [None] * len(file_list[i]) index = 0 for j in file_list[i]: sig[index], sr[index] = librosa.load(j, sr=16000) fft = np.fft.fft(sig[index]) # 복소공간 값 절댓갑 취해서, magnitude 구하기 magnitude = np.abs(fft) # Frequency 값 만들기 f = np.linspace(0, sr[index], len(magnitude)) # 푸리에 변환을 통과한 specturm은 대칭구조로 나와서 high frequency 부분 절반을 날려고 앞쪽 절반만 사용한다. left_spectrum = magnitude[:int(len(magnitude) / 2)] left_f = f[:int(len(magnitude) / 2)] # STFT -> spectrogram hop_length = 512 # 전체 frame 수 n_fft = 2048 # frame 하나당 sample 수 # calculate duration hop length and window in seconds hop_length_duration = float(hop_length) / sr[index] n_fft_duration = float(n_fft) / sr[index] # STFT stft = librosa.stft(sig[index], n_fft=n_fft, hop_length=hop_length) # 복소공간 값 절댓값 취하기 magnitude = np.abs(stft) # magnitude > Decibels log_spectrogram = librosa.amplitude_to_db(magnitude) # MFCCs # extract 40 MFCCs MFCCs = librosa.feature.mfcc(sig[index], sr[index], n_fft=n_fft, hop_length=hop_length, n_mfcc=40) # display MFCCs plt.figure(figsize=FIG_SIZE) librosa.display.specshow(MFCCs, sr=sr[index], hop_length=hop_length) plt.xlabel("Time") plt.ylabel("MFCC coefficients") plt.colorbar() plt.title("MFCCs") # save image fig = plt.gcf() fig.savefig(FOLDER + str(index) + '.png') index += 1 print("complete!")
def transform(y,sr,hop_length): """ convert audio-raws format in mel-coefficients""" M = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length) log_M = librosa.amplitude_to_db(M, ref=np.max) M_std = (log_M+80)/80 return M_std
mse = mse + RMSE(X[i], Y[i]) return mse / lenx esperado_dir = '../avaliacao-subjetiva/Esperado/' exp_dir = '../avaliacao-subjetiva/Preditos/' exp_list = list(os.listdir(exp_dir)) esperado_list = list(os.listdir(esperado_dir)) esp_mag = [] esp_db = [] for i in esperado_list: if i[-4:] == '.wav': file_id = i[:-4] _, _, mag = load_spectrograms(os.path.join(esperado_dir, i)) db = librosa.amplitude_to_db(mag, ref=np.max) display.specshow(db, y_axis='log', x_axis='time') save_img_dir = os.path.join(esperado_dir, i.replace('.wav', '.png')) esp_mag.append([file_id, mag]) esp_db.append([file_id, db]) plt.title('Espectrograma STFT') plt.colorbar(format='%+2.0f dB') plt.tight_layout() plt.savefig(save_img_dir) plt.cla() # Clear axis plt.clf() results_list = []
import librosa.display ############################################# # Load an example with vocals. y, sr = librosa.load('audio/Cheese_N_Pot-C_-_16_-_The_Raps_Well_Clean_Album_Version.mp3', duration=120) # And compute the spectrogram magnitude and phase S_full, phase = librosa.magphase(librosa.stft(y)) ####################################### # Plot a 5-second slice of the spectrum idx = slice(*librosa.time_to_frames([30, 35], sr=sr)) plt.figure(figsize=(12, 4)) librosa.display.specshow(librosa.amplitude_to_db(S_full[:, idx], ref=np.max), y_axis='log', x_axis='time', sr=sr) plt.colorbar() plt.tight_layout() ########################################################### # The wiggly lines above are due to the vocal component. # Our goal is to separate them from the accompanying # instrumentation. # # We'll compare frames using cosine similarity, and aggregate similar frames # by taking their (per-frequency) median value. # # To avoid being biased by local continuity, we constrain similar frames to be # separated by at least 2 seconds.
def analyse_audio(audio_file, midi_file): x, _ = librosa.load(audio_file, sr=sr) print("Music file length=%s, sampling_rate=%s" % (x.shape[0], sr)) plt.figure(figsize=(14, 5)) plt.title('Music Sample Waveplot') librosa.display.waveplot(x, sr=sr) x_stft_spectrum = lb.stft(x, n_fft=1024, hop_length=512, center=True, dtype=np.complex64) x_stft = librosa.amplitude_to_db(abs(x_stft_spectrum)) plt.figure(figsize=(14, 5)) librosa.display.specshow(lb.amplitude_to_db(x_stft, ref=np.max), sr=sr, fmin=lb.note_to_hz('A0'), x_axis='time', y_axis='linear', cmap='coolwarm') plt.title('Power spectrogram') plt.colorbar(format='%+2.0f dB') plt.tight_layout() plt.figure(figsize=(14, 5)) x_cqt = np.abs( librosa.cqt(x, sr=sr, bins_per_octave=bins_per_octave, n_bins=n_bins, fmin=lb.note_to_hz('A0'))) librosa.display.specshow(librosa.amplitude_to_db(x_cqt, ref=np.max), sr=sr, x_axis='time', y_axis='cqt_note', cmap='coolwarm') print("CQT Matrix shape", x_cqt.shape) plt.colorbar(format='%+2.0f dB') plt.title('Constant-Q power spectrum') plt.tight_layout() n_frames = x_cqt.shape[1] midi_data = pretty_midi.PrettyMIDI(midi_file) plt.figure(figsize=(12, 4)) plot_piano_roll(midi_data, 24, 84) print('There are {} time signature changes'.format( len(midi_data.time_signature_changes))) print('There are {} instruments'.format(len(midi_data.instruments))) print('Instrument 1 has {} notes'.format( len(midi_data.instruments[0].notes))) pianoRoll = midi_data.instruments[0].get_piano_roll(fs=n_frames * 44100. / len(x)) midi_mat = (pianoRoll[MIDInotes[0]:MIDInotes[1] + 1, :n_frames] > 0) print("MIDI Matrix shape", midi_mat.shape) plt.figure() librosa.display.specshow(midi_mat, sr=sr, bins_per_octave=12, fmin=lb.note_to_hz('A0'), x_axis='time', y_axis='cqt_note') n_pitch_frame = np.sum(midi_mat, axis=1) print(n_pitch_frame) plt.bar(range(MIDInotes[0], MIDInotes[1] + 1), n_pitch_frame / np.sum(n_pitch_frame).astype(np.float)) plt.xticks(range(MIDInotes[0], MIDInotes[1] + 1, 12), lb.midi_to_note(range(MIDInotes[0], MIDInotes[1] + 1, 12))) plt.xlabel('Midi note') plt.ylabel('Note probability')
def main(): # load audio file # get current working directory dir = os.path.dirname(__file__) + "/" # dir = "C:/Users/yamam/Desktop/lab/2021/B4Lecture-2021/ex_1/t_yamamoto/" audio_path = dir + "recording_b4lec_ex1.wav" wav, sr = librosa.load(audio_path, mono=True) fig, ax = plt.subplots(nrows=3, ncols=1, sharex=True) plt.subplots_adjust(hspace=0.6) # draw original signal librosa.display.waveplot(wav, sr=sr, color="g", ax=ax[0]) ax[0].set(title="Original signal", xlabel=None, ylabel="Magnitude") # parameter hop = 0.5 win_length = 1024 hop_length = int(win_length * hop) # STFT amp = stft(wav, hop=hop, win_length=win_length) # convert an amplitude spectrogram to dB-scaled spectrogram db = librosa.amplitude_to_db(np.abs(amp)) # db = librosa.amplitude_to_db(np.abs(librosa.stft(wav)), ref=np.max) # draw spectrogram (log scale) img = librosa.display.specshow( db, sr=sr, hop_length=hop_length, x_axis="time", y_axis="log", ax=ax[1], cmap="plasma", ) ax[1].set(title="Spectrogram", xlabel=None, ylabel="Frequency [Hz]") ax[1].set_yticks([0, 128, 512, 2048, 8192]) fig.colorbar(img, aspect=10, pad=0.01, extend="both", ax=ax[1], format="%+2.f dB") # inverse-STFT inv_wav = istft(amp, hop=hop, win_length=win_length) # draw re-synthesized signal librosa.display.waveplot(inv_wav, sr=sr, color="g", ax=ax[2]) ax[2].set(title="Re-synthesized signal", xlabel="Time [s]", ylabel="Magnitude") # graph adjustment ax_pos_0 = ax[0].get_position() ax_pos_1 = ax[1].get_position() ax_pos_2 = ax[2].get_position() ax[0].set_position( [ax_pos_0.x0, ax_pos_0.y0, ax_pos_1.width, ax_pos_1.height]) ax[2].set_position( [ax_pos_2.x0, ax_pos_2.y0, ax_pos_1.width, ax_pos_1.height]) # fig.tight_layout() fig.align_labels() # save and show figure of result plt.savefig(dir + "ex1_result.png") plt.show()
# Visualize an STFT power spectrum import librosa import matplotlib.pyplot as plt y, sr = librosa.load(librosa.util.example_audio_file()) plt.figure(figsize=(12, 8)) D = librosa.amplitude_to_db(librosa.stft(y), ref=np.max) plt.subplot(4, 2, 1) librosa.display.specshow(D, y_axis='linear') plt.colorbar(format='%+2.0f dB') plt.title('Linear-frequency power spectrogram') # Or on a logarithmic scale plt.subplot(4, 2, 2) librosa.display.specshow(D, y_axis='log') plt.colorbar(format='%+2.0f dB') plt.title('Log-frequency power spectrogram') # Or use a CQT scale CQT = librosa.amplitude_to_db(librosa.cqt(y, sr=sr), ref=np.max) plt.subplot(4, 2, 3) librosa.display.specshow(CQT, y_axis='cqt_note') plt.colorbar(format='%+2.0f dB') plt.title('Constant-Q power spectrogram (note)') plt.subplot(4, 2, 4) librosa.display.specshow(CQT, y_axis='cqt_hz') plt.colorbar(format='%+2.0f dB')
amountOfSegments = times.size file = open(fileJustName + '_times.txt', 'w') for cont in range(amountOfSegments - 1): posFrameInit = times[cont] file.write(str(posFrameInit) + "\n") file.close() # Plot timess = librosa.frames_to_time(np.arange(len(onset_env)), sr=sr, hop_length=512) plt.figure() ax = plt.subplot(2, 1, 2) D = librosa.stft(y) librosa.display.specshow(librosa.amplitude_to_db(D, ref=np.max), y_axis='log', x_axis='time') plt.subplot(2, 1, 1, sharex=ax) plt.plot(timess, onset_env, alpha=0.8, label='Onset strength') plt.vlines(timess[peaks], 0, onset_env.max(), color='r', alpha=0.8, label='Selected peaks') plt.legend(frameon=True, framealpha=0.8) plt.axis('tight') plt.tight_layout() plt.show()
t=np.linspace(0,N/fe,N); s = 0.2*np.cos(2*np.pi*200*t) + 2*np.cos(2*np.pi*400*t); tf=np.linspace(0,fe/N,N); plt.subplot(1,2,1); plt.plot(t[:200],s[:200]); plt.title('280Hz et 500Hz,fe=8000Hz') plt.subplot(1,2,2); plt.plot(np.abs(np.fft.fft(s))); plt.title('280Hz et 500Hz,fe=8000Hz') """ #x, fe = librosa.load('ressources/mesange-tete-noire.wav') x, fe = librosa.load('ressources/PIANO.wav') plt.figure(figsize=(14, 5)) librosa.display.waveplot(x, sr=fe) plt.title('') plt.show() fe /= 2 n = len(x) t = np.linspace(0, n / fe, n, endpoint=False) s = 0.75 * np.cos(2 * np.pi * 440 * t) plt.plot(t, x) plt.plot(np.abs(np.fft.fft(s))) Sdb = librosa.amplitude_to_db(abs(s)) S = np.abs(librosa.stft(s)) Sdb = librosa.amplitude_to_db(abs(S)) #librosa.display.specshow(Sdb, sr=fe, x_axis='time', y_axis='hz') #librosa.display.specshow(Sdb, sr=fe, x_axis='time', y_axis='hz') sd.play(x, fe) status = sd.wait()
import librosa.display ############################################# # Load an example signal y, sr = librosa.load('audio/sir_duke_slow.mp3') # And compute the spectrogram magnitude and phase S_full, phase = librosa.magphase(librosa.stft(y)) ################### # Plot the spectrum plt.figure(figsize=(12, 4)) librosa.display.specshow(librosa.amplitude_to_db(S_full, ref=np.max), y_axis='log', x_axis='time', sr=sr) plt.colorbar() plt.tight_layout() ########################################################### # As you can see, there are periods of silence and # non-silence throughout this recording. # # As a first step, we can plot the root-mean-square (RMS) curve rms = librosa.feature.rms(y=y)[0] times = librosa.frames_to_time(np.arange(len(rms))) plt.figure(figsize=(12, 4))
def main(args): """ fname = "aiueo.wav" """ # get current working directory path = os.path.dirname(os.path.abspath(__file__)) # load audio file fname = os.path.join(path, "data", args.fname) wav, sr = librosa.load(fname, mono=True) # plot signal plt.figure() ax = plt.subplot(111) librosa.display.waveplot(wav, sr=sr, color="g", ax=ax) ax.set(title="Original signal", xlabel="Time [s]", ylabel="Magnitude") save_fname = os.path.join(path, "result", "signal.png") plt.savefig(save_fname, transparent=True) plt.show() # parameter hop = 0.5 win_length = 1024 hop_length = int(win_length * hop) # make mel filter bank n_channels = 20 # the number of mel filter bank channels df = sr / win_length # frequency resolution (Hz width per frequency index 1) filterbank, _ = melFilterBank(sr, win_length, n_channels) # plot mel filter bank for c in range(n_channels): plt.plot(np.arange(0, win_length / 2) * df, filterbank[c]) plt.title("Mel filter bank") plt.xlabel("Frequency [Hz]") save_fname = os.path.join(path, "result", "MelFilterBank.png") plt.savefig(save_fname, transparent=True) plt.show() # spectrogram (ex1) fig, ax = plt.subplots(nrows=1, ncols=1) amp = utils.stft(wav, hop=hop, win_length=win_length) db = librosa.amplitude_to_db(np.abs(amp)) img = librosa.display.specshow( db, sr=sr, hop_length=hop_length, x_axis="time", y_axis="linear", ax=ax, cmap="rainbow", ) ax.set(title="Spectrogram", xlabel=None, ylabel="Frequency [Hz]") fig.colorbar(img, aspect=10, pad=0.01, ax=ax, format="%+2.f dB") save_fname = os.path.join(path, "result", "spectrogram.png") plt.savefig(save_fname, transparent=True) plt.show() fig, ax = plt.subplots(nrows=4, ncols=1, sharex=True, figsize=(10, 6)) plt.subplots_adjust(hspace=0.6) # calculate mel spectrogram and mfcc mel_spec, mfcc = calc_mfcc(wav, hop, win_length, filterbank) # mel spectrogram wav_time = wav.shape[0] // sr f_nyq = sr // 2 extent = [0, wav_time, 0, f_nyq] img = ax[0].imshow( librosa.amplitude_to_db(mel_spec), aspect="auto", extent=extent, cmap="rainbow", ) ax[0].set( title="Mel spectrogram", xlabel=None, ylabel="Mel frequency [mel]", ylim=[0, 8000], yticks=range(0, 10000, 2000), ) fig.colorbar(img, aspect=10, pad=0.01, ax=ax[0], format="%+2.f dB") # mfcc n_mfcc = 12 extent = [0, wav_time, 0, n_mfcc] img = ax[1].imshow(np.flipud(mfcc[:n_mfcc]), aspect="auto", extent=extent, cmap="rainbow") ax[1].set( title="MFCC sequence", xlabel=None, ylabel="MFCC", yticks=range(0, 13, 4), ) fig.colorbar(img, aspect=10, pad=0.01, ax=ax[1], format="%+2.f dB") # d-mfcc d_mfcc = delta_mfcc(mfcc, k=2) img = ax[2].imshow(np.flipud(d_mfcc[:n_mfcc]), aspect="auto", extent=extent, cmap="rainbow") ax[2].set( title="ΔMFCC sequence", xlabel=None, ylabel="ΔMFCC", yticks=range(0, 13, 4), ) fig.colorbar(img, aspect=10, pad=0.01, ax=ax[2], format="%+2.f dB") # dd-mfcc dd_mfcc = delta_mfcc(d_mfcc, k=2) img = ax[3].imshow(np.flipud(dd_mfcc[:n_mfcc]), aspect="auto", extent=extent, cmap="rainbow") ax[3].set( title="ΔΔMFCC sequence", xlabel="Time [s]", ylabel="ΔΔMFCC", yticks=range(0, 13, 4), ) fig.colorbar(img, aspect=10, pad=0.01, ax=ax[3], format="%+2.f dB") save_fname = os.path.join(path, "result", "mfcc_result.png") plt.savefig(save_fname, transparent=True) plt.show()