示例#1
0
def load_spec(audio_path, mode=0):
    """
  takes audio path and mode to return various audio 2D representation with the 
  actual audio and sample rate as tensor

  use mode=1 to get melspectrogram
  and mode=2 to get mfcc
  Default mode=0 for Spectrogram
  """
    wave, sr = librosa.load(audio_path, sr=None, mono=True)
    # trim silent edges(below 60 db by default), change the threashold by passing `top_db`
    # The threshold (in decibels) below reference to consider as silence (default : 60 db)
    s, _ = librosa.effects.trim(wave, top_db=60)

    # convert to tensor
    wave = torch.FloatTensor(s).unsqueeze(0)

    # generate (mel)spectrogram / mfcc
    if (mode == 1):
        # s = torchaudio.transforms.MelSpectrogram(sample_rate=sr)(wave)
        s = librosa.feature.melspectrogram(y=s, sr=sr, hop_length=512)
    elif (mode == 2):
        # s = torchaudio.transforms.MFCC(sample_rate=sr)(wave)
        s = librosa.feature.mfcc(y=s, sr=sr, n_mfcc=40)
    else:
        # s = torchaudio.transforms.Spectrogram()(wave)
        freqs, times, s = librosa.reassigned_spectrogram(y=s,
                                                         sr=sr,
                                                         hop_length=512)

    s = torch.FloatTensor(s).unsqueeze(0)
    return s, wave, sr
def get_spectrogram(files):
    Spec = []
    for filename in files:
        y = np.load(filename.replace('.wav', '.npy'))
        sr = GLOBAL_SAMPLING_RATE
        freqs, times, mags = librosa.reassigned_spectrogram(y, sr)
        mags_db = librosa.power_to_db(mags, ref=np.max)
        Spec.append(mags_db)
    return Spec
示例#3
0
def reassigned_spectrogram(y, fs, hparams):

    freqs, times, mags = librosa.reassigned_spectrogram(
        y=preemphasis(y, hparams),
        sr=fs,
        n_fft=hparams.n_fft,
        hop_length=int(hparams.hop_length_ms / 1000 * fs),
        win_length=int(hparams.win_length_ms / 1000 * fs),
        center=False,
    )
    S = librosa.amplitude_to_db((freqs > 0) * (times > 0) * mags,
                                ref=hparams.ref_level_db)

    S = _normalize(S, hparams)
    return S
示例#4
0
def stft_reassign_from_sig(
    sig_wf: np.ndarray, frequency_sample_rate_hz: float, band_order_Nth: float
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray,
           np.ndarray]:
    """
    Librosa STFT is complex FFT grid, not power
    Reassigned frequencies are not the same as the standard mesh frequencies

    :param sig_wf: array with input signal
    :param frequency_sample_rate_hz: sample rate of frequency in Hz
    :param band_order_Nth: Nth order of constant Q bands
    :return: six numpy ndarrays with STFT, STFT_bits, time_stft_s, frequency_stft_hz, time_stft_rsg_s,
        frequency_stft_rsg_hz
    """

    sig_duration_s = len(sig_wf) / frequency_sample_rate_hz
    _, min_frequency_hz = scales.from_duration(band_order_Nth, sig_duration_s)

    order_Nth, cycles_M, quality_Q, \
    frequency_center, frequency_start, frequency_end = \
        scales.frequency_bands_g2f1(scale_order_input=band_order_Nth,
                                    frequency_low_input=min_frequency_hz,
                                    frequency_sample_rate_input=frequency_sample_rate_hz)

    # Choose the spectral resolution as the key parameter
    frequency_resolution_min_hz = np.min(frequency_end - frequency_start)
    frequency_resolution_max_hz = np.max(frequency_end - frequency_start)
    frequency_resolution_hz_geo = np.sqrt(frequency_resolution_min_hz *
                                          frequency_resolution_max_hz)
    stft_time_duration_s = 1 / frequency_resolution_hz_geo
    stft_points_per_seg = int(frequency_sample_rate_hz * stft_time_duration_s)

    # From CQT
    stft_points_hop, _, _, _, _ = \
        scales.cqt_frequency_bands_g2f1(band_order_Nth,
                                        min_frequency_hz,
                                        frequency_sample_rate_hz,
                                        is_power_2=False)

    print('Reassigned STFT Duration, NFFT, HOP:', len(sig_wf),
          stft_points_per_seg, stft_points_hop)

    STFT_Scaling = 2 * np.sqrt(np.pi) / stft_points_per_seg

    # Reassigned frequencies require a 'best fit' solution.
    frequency_stft_rsg_hz, time_stft_rsg_s, STFT_mag = \
        librosa.reassigned_spectrogram(sig_wf, sr=frequency_sample_rate_hz,
                                       n_fft=stft_points_per_seg,
                                       hop_length=stft_points_hop, win_length=None,
                                       window='hann', center=False, pad_mode='reflect')

    # Must be scaled to match scipy psd
    STFT_mag *= STFT_Scaling
    STFT_bits = utils.log2epsilon(STFT_mag)

    # Standard mesh times and frequencies for plotting - nice to have both
    time_stft_s = librosa.times_like(STFT_mag,
                                     sr=frequency_sample_rate_hz,
                                     hop_length=stft_points_hop)
    frequency_stft_hz = librosa.core.fft_frequencies(
        sr=frequency_sample_rate_hz, n_fft=stft_points_per_seg)

    # Reassigned frequencies are not the same as the standard mesh frequencies
    return STFT_mag, STFT_bits, time_stft_s, frequency_stft_hz, time_stft_rsg_s, frequency_stft_rsg_hz
示例#5
0
def tfr_multi(y_multi):
    y, sr = y_multi
    return librosa.reassigned_spectrogram(y, fill_nan=True)