def load_spec(audio_path, mode=0): """ takes audio path and mode to return various audio 2D representation with the actual audio and sample rate as tensor use mode=1 to get melspectrogram and mode=2 to get mfcc Default mode=0 for Spectrogram """ wave, sr = librosa.load(audio_path, sr=None, mono=True) # trim silent edges(below 60 db by default), change the threashold by passing `top_db` # The threshold (in decibels) below reference to consider as silence (default : 60 db) s, _ = librosa.effects.trim(wave, top_db=60) # convert to tensor wave = torch.FloatTensor(s).unsqueeze(0) # generate (mel)spectrogram / mfcc if (mode == 1): # s = torchaudio.transforms.MelSpectrogram(sample_rate=sr)(wave) s = librosa.feature.melspectrogram(y=s, sr=sr, hop_length=512) elif (mode == 2): # s = torchaudio.transforms.MFCC(sample_rate=sr)(wave) s = librosa.feature.mfcc(y=s, sr=sr, n_mfcc=40) else: # s = torchaudio.transforms.Spectrogram()(wave) freqs, times, s = librosa.reassigned_spectrogram(y=s, sr=sr, hop_length=512) s = torch.FloatTensor(s).unsqueeze(0) return s, wave, sr
def get_spectrogram(files): Spec = [] for filename in files: y = np.load(filename.replace('.wav', '.npy')) sr = GLOBAL_SAMPLING_RATE freqs, times, mags = librosa.reassigned_spectrogram(y, sr) mags_db = librosa.power_to_db(mags, ref=np.max) Spec.append(mags_db) return Spec
def reassigned_spectrogram(y, fs, hparams): freqs, times, mags = librosa.reassigned_spectrogram( y=preemphasis(y, hparams), sr=fs, n_fft=hparams.n_fft, hop_length=int(hparams.hop_length_ms / 1000 * fs), win_length=int(hparams.win_length_ms / 1000 * fs), center=False, ) S = librosa.amplitude_to_db((freqs > 0) * (times > 0) * mags, ref=hparams.ref_level_db) S = _normalize(S, hparams) return S
def stft_reassign_from_sig( sig_wf: np.ndarray, frequency_sample_rate_hz: float, band_order_Nth: float ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """ Librosa STFT is complex FFT grid, not power Reassigned frequencies are not the same as the standard mesh frequencies :param sig_wf: array with input signal :param frequency_sample_rate_hz: sample rate of frequency in Hz :param band_order_Nth: Nth order of constant Q bands :return: six numpy ndarrays with STFT, STFT_bits, time_stft_s, frequency_stft_hz, time_stft_rsg_s, frequency_stft_rsg_hz """ sig_duration_s = len(sig_wf) / frequency_sample_rate_hz _, min_frequency_hz = scales.from_duration(band_order_Nth, sig_duration_s) order_Nth, cycles_M, quality_Q, \ frequency_center, frequency_start, frequency_end = \ scales.frequency_bands_g2f1(scale_order_input=band_order_Nth, frequency_low_input=min_frequency_hz, frequency_sample_rate_input=frequency_sample_rate_hz) # Choose the spectral resolution as the key parameter frequency_resolution_min_hz = np.min(frequency_end - frequency_start) frequency_resolution_max_hz = np.max(frequency_end - frequency_start) frequency_resolution_hz_geo = np.sqrt(frequency_resolution_min_hz * frequency_resolution_max_hz) stft_time_duration_s = 1 / frequency_resolution_hz_geo stft_points_per_seg = int(frequency_sample_rate_hz * stft_time_duration_s) # From CQT stft_points_hop, _, _, _, _ = \ scales.cqt_frequency_bands_g2f1(band_order_Nth, min_frequency_hz, frequency_sample_rate_hz, is_power_2=False) print('Reassigned STFT Duration, NFFT, HOP:', len(sig_wf), stft_points_per_seg, stft_points_hop) STFT_Scaling = 2 * np.sqrt(np.pi) / stft_points_per_seg # Reassigned frequencies require a 'best fit' solution. frequency_stft_rsg_hz, time_stft_rsg_s, STFT_mag = \ librosa.reassigned_spectrogram(sig_wf, sr=frequency_sample_rate_hz, n_fft=stft_points_per_seg, hop_length=stft_points_hop, win_length=None, window='hann', center=False, pad_mode='reflect') # Must be scaled to match scipy psd STFT_mag *= STFT_Scaling STFT_bits = utils.log2epsilon(STFT_mag) # Standard mesh times and frequencies for plotting - nice to have both time_stft_s = librosa.times_like(STFT_mag, sr=frequency_sample_rate_hz, hop_length=stft_points_hop) frequency_stft_hz = librosa.core.fft_frequencies( sr=frequency_sample_rate_hz, n_fft=stft_points_per_seg) # Reassigned frequencies are not the same as the standard mesh frequencies return STFT_mag, STFT_bits, time_stft_s, frequency_stft_hz, time_stft_rsg_s, frequency_stft_rsg_hz
def tfr_multi(y_multi): y, sr = y_multi return librosa.reassigned_spectrogram(y, fill_nan=True)