Пример #1
0
    def _do_spec(self):
        if self._signal is None or len(self._signal) == 0:
            self.set_data(np.array([[0.5]]))
            return
        #if len(self._signal) / self._sr > 30:
        num_steps = 1000
        if len(self._signal) < num_steps:
            num_steps = len(self._signal)
        step_samp = int(len(self._signal)/ num_steps)

        #if step_samp < 28:
        #    step_samp = 28
        #    step = step_samp / self._sr
        #self._n_fft = 512
        #window = partial(gaussian, std = 250/12)
        if self._window == 'gaussian':
            window = partial(gaussian, std = 0.45*(self._win_len)/2)
        else:
            window = None
        #import matplotlib.pyplot as plt
        #plt.plot(window(250))
        #plt.show()
        data = stft(self._signal, self._n_fft, step_samp, center = True, win_length = self._win_len, window = window)

        data = np.abs(data)
        data = 20 * np.log10(data) if self._color_scale == 'log' else data

        self.set_data(data)
Пример #2
0
def generate_spectrogram(signal, sr, color_scale='log'):
    n_fft = 256
    # if len(self._signal) / self._sr > 30:
    window_length = 0.005
    win_len = int(window_length * sr)
    if win_len > n_fft:
        n_fft = win_len
    num_steps = 500
    if len(signal) < num_steps:
        num_steps = len(signal)
    step_samp = int(len(signal) / num_steps)
    time_step = step_samp / sr
    freq_step = sr / n_fft
    # if step_samp < 28:
    #    step_samp = 28
    #    step = step_samp / self._sr
    # self._n_fft = 512
    # window = partial(gaussian, std = 250/12)
    window = 'gaussian'
    # win_len = None
    if window == 'gaussian':
        window = partial(gaussian, std=0.45 * (win_len) / 2)
    data = stft(signal,
                n_fft,
                step_samp,
                center=True,
                win_length=win_len,
                window=window)
    data = np.abs(data)
    data = 20 * np.log10(data) if color_scale == 'log' else data
    return data, time_step, freq_step
def audio_to_data(signal):

    if config.silence_thr_db is not None:
        signal, _ = trim(signal,
                         config.silence_thr_db,
                         frame_length=config.fft_bins,
                         hop_length=config.fft_hop_len)

    spec = abs(
        stft(signal, config.fft_bins, config.fft_hop_len,
             config.fft_window_len))

    # mfccs = mfcc(signal, config.sample_rate, n_mfcc=config.mfcc_bins)
    # chroma = chroma_stft(signal, config.sample_rate, n_fft=config.fft_bins, hop_length=config.fft_hop_len, win_length=config.fft_window_len)
    # show(specshow(spec, sr=config.sample_rate, hop_length=config.fft_hop_len))
    # show(specshow(mfccs, sr=config.sample_rate, hop_length=config.fft_hop_len))
    # show(plot(chroma))

    vector = deepcopy(spec)
    print('\tmax min initially:', max(vector), min(vector))

    vector = amplitude_to_db(vector)
    print('\tmax min in db:', max(vector), min(vector))

    # vector = concatenate([vector, chroma], 0)
    vector = vector.T
    print('\tfinal vector shape:', vector.shape)

    return vector
 def process_signal(self, signal):
     ft = np.abs(stft(signal, n_fft=self.window_size, hop_length=self.window_stride, window='hann'))
     mel = melspectrogram(sr=self.sample_rate,S=ft)
     mfccs = mfcc( sr=self.sample_rate, n_mfcc=self.num_mfccs,S=mel)
     deltas=  delta(mfccs)
     delta_deltas=  delta(mfccs,order=2)
     return mfccs, deltas, delta_deltas
Пример #5
0
def generate_spectrogram(signal, sr, log_color_scale=True):
    """
    Generate a spectrogram

    Parameters
    ----------
    signal : numpy.array
        Signal to generate spectrogram from
    sr : int
        Sample rate of the signal
    log_color_scale : bool
        Flag to make the color scale logarithmic

    Returns
    -------
    numpy.array
        Spectrogram data
    float
        Time step between frames
    float
        Frequency step between bins
    """
    n_fft = 256
    # if len(self._signal) / self._sr > 30:
    window_length = 0.005
    win_len = int(window_length * sr)
    if win_len > n_fft:
        n_fft = win_len
    num_steps = 500
    if len(signal) < num_steps:
        num_steps = len(signal)
    step_samp = int(len(signal) / num_steps)
    time_step = step_samp / sr
    freq_step = sr / n_fft
    # if step_samp < 28:
    #    step_samp = 28
    #    step = step_samp / self._sr
    # self._n_fft = 512
    # window = partial(gaussian, std = 250/12)
    window = 'gaussian'
    # win_len = None
    if window == 'gaussian':
        window = partial(gaussian, std=0.45 * win_len / 2)
    data = stft(signal,
                n_fft,
                step_samp,
                center=True,
                win_length=win_len,
                window=window)
    data = np.abs(data)
    if log_color_scale:
        data = 20 * np.log10(data)
    return data, time_step, freq_step
Пример #6
0
def __cqt_response(y, n_fft, hop_length, fft_basis, mode):
    '''Compute the filter response with a target STFT hop.'''

    # Compute the STFT matrix
    D = stft(y,
             n_fft=n_fft,
             hop_length=hop_length,
             window='ones',
             pad_mode=mode)

    # And filter response energy
    return fft_basis.dot(D)
Пример #7
0
def melspectrogram(y=None,
                   sr=16000,
                   n_fft=400,
                   hop_length=160,
                   power=2.0,
                   **kwargs):
    """Compute a mel-scaled spectrogram.

    If a spectrogram input `S` is provided, then it is mapped directly onto
    the mel basis `mel_f` by `mel_f.dot(S)`.

    If a time-series input `y, sr` is provided, then its magnitude spectrogram
    `S` is first computed, and then mapped onto the mel scale by
    `mel_f.dot(S**power)`.  By default, `power=2` operates on a power spectrum.

    Parameters
    ----------
    y : np.ndarray [shape=(n,)] or None
        audio time-series

    sr : number > 0 [scalar]
        sampling rate of `y`

    n_fft : int > 0 [scalar]
        length of the FFT window

    hop_length : int > 0 [scalar]
        number of samples between successive frames.
        See `librosa.core.stft`

    power : float > 0 [scalar]
        Exponent for the magnitude melspectrogram.
        e.g., 1 for energy, 2 for power, etc.

    kwargs : additional keyword arguments
      Mel filter bank parameters.
      See `librosa.filters.mel` for details.

    Returns
    -------
    S : np.ndarray [shape=(n_mels, t)]
        Mel spectrogram
    """
    # Compute a magnitude spectrogram from input
    S = np.abs(stft(y, n_fft=n_fft, hop_length=hop_length,
                    center=False))**power

    # Build a Mel filter
    mel_basis = filters.mel(sr, n_fft, **kwargs)

    return np.dot(mel_basis, S)
Пример #8
0
def librosa_compute_spec(y=None,
                         sr=1600,
                         S=None,
                         n_fft=2048,
                         hop_length=512,
                         power=1):
    if S is not None:
        # Infer n_fft from spectrogram shape
        n_fft = 2 * (S.shape[0] - 1)
    else:
        # Otherwise, compute a magnitude spectrogram from input
        # 计算的幅度普, 希望abs, 然后在取 power次方
        S = np.abs(stft(y, n_fft=n_fft, hop_length=hop_length))**power

    return S, n_fft
Пример #9
0
def log_energy(y, n_fft=400, hop_length=160):

    power_spectrum = np.abs(
        stft(y, n_fft=n_fft, hop_length=hop_length, center=False))**2
    log_E = 10 * np.log10(sum(power_spectrum))  # in dB
    return log_E
Пример #10
0
def pseudo_cqt(y,
               sr=22050,
               hop_length=512,
               fmin=None,
               n_bins=84,
               bins_per_octave=12,
               tuning=0.0,
               filter_scale=1,
               norm=1,
               sparsity=0.01,
               window='hann',
               scale=True,
               pad_mode='reflect'):
    '''Compute the pseudo constant-Q transform of an audio signal.
    This uses a single fft size that is the smallest power of 2 that is greater
    than or equal to the max of:
        1. The longest CQT filter
        2. 2x the hop_length
    Parameters
    ----------
    y : np.ndarray [shape=(n,)]
        audio time series
    sr : number > 0 [scalar]
        sampling rate of `y`
    hop_length : int > 0 [scalar]
        number of samples between successive CQT columns.
    fmin : float > 0 [scalar]
        Minimum frequency. Defaults to C1 ~= 32.70 Hz
    n_bins : int > 0 [scalar]
        Number of frequency bins, starting at `fmin`
    bins_per_octave : int > 0 [scalar]
        Number of bins per octave
    tuning : None or float in `[-0.5, 0.5)`
        Tuning offset in fractions of a bin (cents).
        If `None`, tuning will be automatically estimated from the signal.
    filter_scale : float > 0
        Filter filter_scale factor. Larger values use longer windows.
    sparsity : float in [0, 1)
        Sparsify the CQT basis by discarding up to `sparsity`
        fraction of the energy in each basis.
        Set `sparsity=0` to disable sparsification.
    window : str, tuple, number, or function
        Window specification for the basis filters.
        See `filters.get_window` for details.
    pad_mode : string
        Padding mode for centered frame analysis.
        See also: `librosa.core.stft` and `np.pad`.
    Returns
    -------
    CQT : np.ndarray [shape=(n_bins, t), dtype=np.float]
        Pseudo Constant-Q energy for each frequency at each time.
    Raises
    ------
    ParameterError
        If `hop_length` is not an integer multiple of
        `2**(n_bins / bins_per_octave)`
        Or if `y` is too short to support the frequency range of the CQT.
    Notes
    -----
    This function caches at level 20.
    '''

    if fmin is None:
        # C1 by default
        fmin = note_to_hz('C1')

    if tuning is None:
        tuning = estimate_tuning(y=y, sr=sr)

    fft_basis, n_fft, _ = __cqt_filter_fft(sr,
                                           fmin,
                                           n_bins,
                                           bins_per_octave,
                                           tuning,
                                           filter_scale,
                                           norm,
                                           sparsity,
                                           hop_length=hop_length,
                                           window=window)

    fft_basis = np.abs(fft_basis)

    # Compute the magnitude STFT with Hann window
    D = np.abs(stft(y, n_fft=n_fft, hop_length=hop_length, pad_mode=pad_mode))

    # Project onto the pseudo-cqt basis
    C = fft_basis.dot(D)

    if scale:
        C /= np.sqrt(n_fft)
    else:
        lengths = filters.constant_q_lengths(sr,
                                             fmin,
                                             n_bins=n_bins,
                                             bins_per_octave=bins_per_octave,
                                             tuning=tuning,
                                             window=window,
                                             filter_scale=filter_scale)

        C *= np.sqrt(lengths[:, np.newaxis] / n_fft)

    return C
Пример #11
0
def main():

    files = glob(
        config.data_path +
        '/*.wav')  # + glob('data/*.mp3') # try ffmpeg -i input.mp3 output.wav

    if not config.frequencies_to_pick:

        # gather initial info from all files

        frequency_strengths = zeros(len(config.frequencies_of_bins))

        for file in files:
            signal = load(file, config.sample_rate)[0]
            spec = abs(
                stft(signal, config.fft_bins, config.fft_hop_len,
                     config.fft_window_len))
            # print('\tmax min initially:', max(spec), min(spec))
            # show(specshow(spec, sr=config.sample_rate, hop_length=config.fft_hop_len))

            frequency_strengths += spec.sum(1) / spec.shape[1]

        max_strength = max(frequency_strengths)
        strength_thr = max_strength / config.frequency_strength_thr

        band_low_hz = 999_999
        band_high_hz = -1

        for frequency, strength in zip(config.frequencies_of_bins,
                                       frequency_strengths):
            if strength >= strength_thr:
                config.frequencies_to_pick.append(frequency)
                if frequency < band_low_hz: band_low_hz = frequency
                if frequency > band_high_hz: band_low_hz = frequency

        # spec = cat([spec[config.frequencies_of_bins.index(i),:] for i in config.frequencies_to_pick], 0)
        # print('\tmax min after bandpass:'******'with bandpass, timestep size: {len(config.frequencies_of_bins)} -> {len(config.frequencies_to_pick)}'
        )
        print(
            f'copy paste this line into frequencies_to_pick @ config: \n{config.frequencies_to_pick}'
        )

    # proceed to separately processing each file

    converted = []

    for file_id, file in enumerate(files):

        print(f'reading: {file}')
        song_id = [0 if i == file_id else 1 for i in range(len(files))]

        # analysis
        signal, sample_rate = load(file, config.sample_rate)
        data, meta = audio_to_data(signal, song_id)
        converted.append([data, meta])

        # synthesis
        signal_recons = data_to_audio(data, meta)
        write(f'{file.split("/")[-1]}_{file_id}.wav', config.sample_rate,
              signal_recons)
        signal_recons, sample_rate = load(
            f'{file.split("/")[-1]}_{file_id}.wav', config.sample_rate)

    pickle_save(converted, config.data_path + '.pk')
    print('saved data.')
Пример #12
0
def audio_to_data(signal, song_id):

    meta = [song_id]

    if config.silence_thr_db:
        signal, _ = trim(signal,
                         config.silence_thr_db,
                         frame_length=config.fft_bins,
                         hop_length=config.fft_hop_len)

    spec = abs(
        stft(signal, config.fft_bins, config.fft_hop_len,
             config.fft_window_len))
    # mfccs = mfcc(signal, config.sample_rate, n_mfcc=config.mfcc_bins)
    # chroma = chroma_stft(signal, config.sample_rate, n_fft=config.fft_bins, hop_length=config.fft_hop_len, win_length=config.fft_window_len)

    # rows-frequencies cols-times
    # show(specshow(spec, sr=config.sample_rate, hop_length=config.fft_hop_len))
    # show(specshow(mfccs, sr=config.sample_rate, hop_length=config.fft_hop_len))
    # show(plot(chroma))

    spec_mod = deepcopy(spec)
    print('\tmax min initially:', max(spec_mod), min(spec_mod))

    spec_mod = stack([
        spec_mod[config.frequencies_of_bins.index(i), :]
        for i in config.frequencies_to_pick
    ], 0)
    print('\tmax min after bandpass:'******'\tmax min in db:', max(spec_mod), min(spec_mod))

    # spec_mod = clip(spec_mod, config.amp_min_thr_db, config.amp_max_thr_db)
    # print('db clipped.')

    if config.zscore_scale:

        mean = spec_mod.mean()
        std = spec_mod.std()
        spec_mod -= mean
        spec_mod /= std

        print('\tmax min after std:', max(spec_mod), min(spec_mod))

        scale = max([abs(max(spec_mod)), abs(min(spec_mod))])
        spec_mod /= scale

        meta.extend([mean, std, scale])

    elif config.minmax_scale:

        spec_min = min(spec_mod)
        spec_max = max(spec_mod)
        spec_mod -= spec_min
        spec_mod /= spec_max - spec_min

        print('\tmax min after min/max:', max(spec_mod), min(spec_mod))

        meta.extend([spec_min, spec_max])

    elif config.log_scale:

        spec_mod = log(spec_mod + 1e-10)

        print('\tmax min after log:', max(spec_mod), min(spec_mod))

    vector = spec_mod
    # vector = concatenate([vector, chroma], 0)
    vector = vector.T  # now first index time, second index frequency

    print('\tfinal vector shape:', vector.shape)

    return vector, meta