示例#1
0
    def __test_cqt(pad_mode):
        D1 = librosa.cqt(y, pad_mode='reflect')
        D2 = librosa.cqt(y, pad_mode=pad_mode)

        assert D1.shape == D2.shape

        if pad_mode != 'reflect':
            assert not np.allclose(D1, D2)
        else:
            assert np.allclose(D1, D2)
示例#2
0
def do_cqt(src, track_id):
	SRC_cqt_L = librosa.logamplitude(librosa.cqt(src[0,:], sr=CQT_CONST["sr"], 
									 hop_length=CQT_CONST["hop_len"], 
		                             bins_per_octave=CQT_CONST["bins_per_octave"], 
		                             n_bins=CQT_CONST["n_bins"])**2, ref_power=1.0)
	SRC_cqt_R = librosa.logamplitude(librosa.cqt(src[1,:], sr=CQT_CONST["sr"], 
									 hop_length=CQT_CONST["hop_len"], 
		                             bins_per_octave=CQT_CONST["bins_per_octave"], 
		                             n_bins=CQT_CONST["n_bins"])**2, ref_power=1.0)
	np.save(PATH_CQT + str(track_id) + '.npy', np.dstack((SRC_cqt_L, SRC_cqt_R)))
	print "Done: %s" % str(track_id)
示例#3
0
    def compute_features(self):
        """Actual implementation of the features.

        Returns
        -------
        cqt: np.array(N, F)
            The features, each row representing a feature vector for a give
            time frame/beat.
        """
        linear_cqt = (
            np.abs(
                librosa.cqt(
                    self._audio,
                    sr=self.sr,
                    hop_length=self.hop_length,
                    n_bins=self.n_bins,
                    norm=self.norm,
                    filter_scale=self.filter_scale,
                    real=False,
                )
            )
            ** 2
        )
        cqt = librosa.logamplitude(linear_cqt, ref_power=self.ref_power).T
        return cqt
示例#4
0
def compute_cqt(filename):
    a, sr = librosa.load(filename, sr=SR)
    spectrum = librosa.stft(a)
    harm_spec, _ = librosa.decompose.hpss(spectrum)
    harm = librosa.istft(harm_spec)
    cqt = np.abs(librosa.cqt(harm, sr=sr, hop_length=HOP, real=False))
    return cqt
示例#5
0
    def __test(hop_length, fmin, n_bins, bins_per_octave, tuning, resolution, norm, sparsity):

        C2 = librosa.hybrid_cqt(
            y,
            sr=sr,
            hop_length=hop_length,
            fmin=fmin,
            n_bins=n_bins,
            bins_per_octave=bins_per_octave,
            tuning=tuning,
            resolution=resolution,
            norm=norm,
            sparsity=sparsity,
        )

        C1 = librosa.cqt(
            y,
            sr=sr,
            hop_length=hop_length,
            fmin=fmin,
            n_bins=n_bins,
            bins_per_octave=bins_per_octave,
            tuning=tuning,
            resolution=resolution,
            norm=norm,
            sparsity=sparsity,
        )

        eq_(C1.shape, C2.shape)

        # Check for numerical comparability
        assert np.mean(np.abs(C1 - C2)) < 1e-3
def makeSpectragrams(filename):
    f, sr = librosa.load(filename)
    print "first"
    melSpectra = librosa.feature.melspectrogram(f)
    cqtSpectra = librosa.cqt(f)
    stftSpectra = librosa.stft(f)
    print "stuff"
    librosa.display.specshow(melSpectra)
    #    plt.specgram(melSpectra)
    imageName = filename, "MelSpectragram.png"
    title = "Mel Spectrogram \nof " + filename[26:]
    plt.title(title)
    plt.ion()
    # plt.savefig(imageName)
    plt.show()

    librosa.display.specshow(cqtSpectra)
    title = "Constant Q Spectrogram \nof " + filename[26:]
    plt.title(title)
    # plt.spectrogram(cqtSpectra)
    plt.show()

    librosa.display.specshow(stftSpectra)
    title = "STFT Spectrogram \nof " + filename[26:]
    plt.title(title)
    # plt.spectrogram(cqtSpectra)
    plt.show()

    return True
示例#7
0
    def __test(sr, scale, hop_length, over_sample, y):

        bins_per_octave = over_sample * 12
        n_bins = 7 * bins_per_octave

        C = librosa.cqt(y, sr=sr, n_bins=n_bins,
                        bins_per_octave=bins_per_octave,
                        scale=scale,
                        hop_length=hop_length)

        yinv = librosa.icqt(C, sr=sr,
                            scale=scale,
                            hop_length=hop_length,
                            bins_per_octave=bins_per_octave)

        # Only test on the middle section
        yinv = librosa.util.fix_length(yinv, len(y))
        y = y[sr//2:-sr//2]
        yinv = yinv[sr//2:-sr//2]

        residual = np.abs(y - yinv)
        # We'll tolerate 11% RMSE
        # error is lower on more recent numpy/scipy builds

        resnorm = np.sqrt(np.mean(residual**2))
        assert resnorm <= 1.1e-1, resnorm
示例#8
0
def extract_cqt(audio_data):
    '''
    CQT routine with default parameters filled in, and some post-processing.

    Parameters
    ----------
    audio_data : np.ndarray
        Audio data to compute CQT of

    Returns
    -------
    cqt : np.ndarray
        CQT of the supplied audio data.
    frame_times : np.ndarray
        Times, in seconds, of each frame in the CQT
    '''
    # Compute CQT
    cqt = librosa.cqt(audio_data, sr=FS, fmin=librosa.midi_to_hz(NOTE_START),
                      n_bins=N_NOTES, hop_length=HOP_LENGTH, tuning=0.)
    # Compute the time of each frame
    times = librosa.frames_to_time(
        np.arange(cqt.shape[1]), sr=FS, hop_length=HOP_LENGTH)
    # Use float32 for the cqt to save space/memory
    cqt = cqt.astype(np.float32)
    return cqt, times
示例#9
0
def audio_to_cqt_and_onset_strength(audio, fs=22050, hop=512):
    '''
    Feature extraction for audio data.
    Gets a power CQT of harmonic component and onset strength signal of percussive.
    
    Input:
        midi - pretty_midi.PrettyMIDI object
        fs - sampling rate to synthesize audio at, default 22050
        hop - hop length for cqt, default 512, onset strength hop will be 1/4 of this
    Output:
        audio_gram - CQT of audio data
        audio_onset_strength - onset strength signal
    '''
    # Use harmonic part for gram, percussive part for onsets
    H, P = librosa.decompose.hpss(librosa.stft(audio))
    audio_harmonic = librosa.istft(H)
    audio_percussive = librosa.istft(P)
    # Compute log-frequency spectrogram of original audio
    audio_gram = np.abs(librosa.cqt(y=audio_harmonic,
                                    sr=fs,
                                    hop_length=hop,
                                    fmin=librosa.midi_to_hz(36),
                                    n_bins=60))**2
    # Beat track the audio file at 4x the hop rate
    audio_onset_strength = librosa.onset.onset_strength(audio_percussive, hop_length=hop/4, sr=fs)
    return audio_gram, audio_onset_strength
示例#10
0
def midi_to_cqt(midi, sf2_path=None, fs=22050, hop=512):
    '''
    Feature extraction routine for midi data, converts to a drum-free, percussion-suppressed CQT.
    
    Input:
        midi - pretty_midi.PrettyMIDI object
        sf2_path - path to .sf2 file to pass to pretty_midi.fluidsynth
        fs - sampling rate to synthesize audio at, default 22050
        hop - hop length for cqt, default 512
    Output:
        midi_gram - Simulated CQT of the midi data
    '''
    # Synthesize the MIDI using the supplied sf2 path
    midi_audio = midi.fluidsynth(fs=fs, sf2_path=sf2_path)
    # Use the harmonic part of the signal
    H, P = librosa.decompose.hpss(librosa.stft(midi_audio))
    midi_audio_harmonic = librosa.istft(H)
    # Compute log frequency spectrogram of audio synthesized from MIDI
    midi_gram = np.abs(librosa.cqt(y=midi_audio_harmonic,
                                   sr=fs,
                                   hop_length=hop,
                                   fmin=librosa.midi_to_hz(36),
                                   n_bins=60,
                                   tuning=0.0))**2
    return midi_gram
def ExtractCQSpectraSparcityFeatures(f, epsilon=defaultEpsilon):
    
    cqtspectra = librosa.cqt(f)   
    
    ##### Sparcity - fraction of entries that are zero (or within \epsilon)
    ##### We can have Sparcity of a full spectra or the max over time (if it is ever spase in that band).
    
    cqtEpsilonSparcityMatrix = (cqtspectra > epsilon)
    #cqtSparcity = cqtspectra.size - np.count_nonzero(cqtspectra)
    cqtEpsilonSparcity = float((cqtEpsilonSparcityMatrix.size - np.count_nonzero(cqtEpsilonSparcityMatrix)))/cqtEpsilonSparcityMatrix.size
    
    #print "size", cqtspectra.size
    #print "cqtspectra epsilon sparcity for epsilon = ", epsilon, " is ", cqtEpsilonSparcity
    
    cqtSpectraMax = np.amax(cqtspectra, axis=1)
    cqtSpectraBandSparcityMatrix = (cqtSpectraMax > epsilon)
    cqtBandSparcity = float(len(cqtSpectraMax) - np.count_nonzero(cqtSpectraBandSparcityMatrix))/len(cqtSpectraMax)
    
    #print "Epsilon Band sparcity for cqt spectra: ", cqtBandSparcity
    
    
    cqtave = np.mean(cqtspectra, axis=1)
    cqtAveSpectraBandSparcityMatrix = (cqtave > epsilon)
    cqtBandSparcityTimeAve = float(len(cqtave) - np.count_nonzero(cqtAveSpectraBandSparcityMatrix))/len(cqtave)
    
    
    #print "Epsilon Band sparcity based on ave cqt spectra: ", cqtBandSparcityTimeAve
    
    return cqtEpsilonSparcity, cqtBandSparcity, cqtBandSparcityTimeAve
示例#12
0
文件: main.py 项目: beckgom/msaf
def features(filename):
    # print '\t[1/5] loading audio'
    y, sr = librosa.load(filename, sr=SR)

    # print '\t[2/5] Separating harmonic and percussive signals'
    y_perc, y_harm = hp_sep(y)

    # print '\t[3/5] detecting beats'
    bpm, beats = get_beats(y=y_perc, sr=sr, hop_length=HOP_LENGTH)

    # print '\t[4/5] generating CQT'
    M1 = np.abs(
        librosa.cqt(y=y_harm, sr=sr, hop_length=HOP_LENGTH, bins_per_octave=12, fmin=librosa.midi_to_hz(24), n_bins=72)
    )

    M1 = librosa.logamplitude(M1 ** 2.0, ref_power=np.max)

    # print '\t[5/5] generating MFCC'
    S = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=HOP_LENGTH, n_mels=N_MELS)
    M2 = librosa.feature.mfcc(S=librosa.logamplitude(S), n_mfcc=N_MFCC)

    n = min(M1.shape[1], M2.shape[1])

    beats = beats[beats < n]

    beats = np.unique(np.concatenate([[0], beats]))

    times = librosa.frames_to_time(beats, sr=sr, hop_length=HOP_LENGTH)

    times = np.concatenate([times, [float(len(y)) / sr]])
    M1 = librosa.feature.sync(M1, beats, aggregate=np.median)
    M2 = librosa.feature.sync(M2, beats, aggregate=np.mean)
    return (M1, M2), times
示例#13
0
def logcqt_onsets(x, fs, pre_max=0, post_max=1, pre_avg=0,
                  post_avg=1, delta=0.05, wait=50):
    """
    Parameters
    ----------
    x : np.ndarray
        Audio signal

    fs : scalar
        Samplerate of the audio signal.

    pre_max, post_max, pre_avg, post_avg, delta, wait
        See `librosa.util.peak_pick` for details.

    Returns
    -------
    onsets : np.ndarray, ndim=1
        Times in seconds for splitting.
    """
    hop_length = 1024
    x_noise = x + np.random.normal(scale=10.**-3, size=x.shape)
    cqt = librosa.cqt(x_noise.flatten(),
                      sr=fs, hop_length=hop_length, fmin=27.5,
                      n_bins=24*8, bins_per_octave=24, tuning=0,
                      sparsity=0, real=False, norm=1)
    cqt = np.abs(cqt)
    lcqt = np.log1p(5000*cqt)

    c_n = utils.canny(51, 3.5, 1)
    onset_strength = sig.lfilter(c_n, np.ones(1), lcqt, axis=1).mean(axis=0)

    peak_idx = librosa.onset.onset_detect(
        onset_envelope=onset_strength, delta=delta, wait=wait)
    return librosa.frames_to_time(peak_idx, hop_length=hop_length)
def process_one_file(midi_filename, skip=True):
    '''
    Load in midi data, compute features, and write out file

    :parameters:
        - midi_filename : str
            Full path to midi file
        - skip : bool
            Whether to skip creating the file when the npz already exists
    '''
    # npz files go in the 'npz' dir instead of 'mid'
    output_filename = mid_to_npz_path(midi_filename)
    # Skip files already created
    if skip and os.path.exists(output_filename):
        return
    try:
        m = pretty_midi.PrettyMIDI(midi_filename)
        midi_audio = alignment_utils.fast_fluidsynth(m, MIDI_FS)
        midi_gram = librosa.cqt(
            midi_audio, sr=MIDI_FS, hop_length=MIDI_HOP,
            fmin=librosa.midi_to_hz(NOTE_START), n_bins=N_NOTES)
        midi_beats, midi_tempo = alignment_utils.midi_beat_track(m)
        midi_sync_gram = alignment_utils.post_process_cqt(
            midi_gram, librosa.time_to_frames(
                midi_beats, sr=MIDI_FS, hop_length=MIDI_HOP))
        np.savez_compressed(
            output_filename, sync_gram=midi_sync_gram,
            beats=midi_beats, bpm=midi_tempo)
    except Exception as e:
        print "Error processing {}: {}".format(midi_filename, e)
示例#15
0
    def __test(hop_length, fmin, n_bins, bins_per_octave,
               tuning, resolution, norm, sparsity):

        C2 = librosa.hybrid_cqt(y, sr=sr,
                                hop_length=hop_length,
                                fmin=fmin, n_bins=n_bins,
                                bins_per_octave=bins_per_octave,
                                tuning=tuning, resolution=resolution,
                                norm=norm,
                                sparsity=sparsity)

        C1 = librosa.cqt(y, sr=sr,
                         hop_length=hop_length,
                         fmin=fmin, n_bins=n_bins,
                         bins_per_octave=bins_per_octave,
                         tuning=tuning, resolution=resolution,
                         norm=norm,
                         sparsity=sparsity)

        eq_(C1.shape, C2.shape)

        # Check for numerical comparability
        idx1 = (C1 > 1e-4 * C1.max())
        idx2 = (C2 > 1e-4 * C2.max())

        perc = 0.99

        thresh = 1e-3

        idx = idx1 | idx2

        assert np.percentile(np.abs(C1[idx] - C2[idx]),
                             perc) < thresh * max(C1.max(), C2.max())
示例#16
0
def get_cqt(y, PARAMETERS):
    '''Constant-Q transform, energy-only'''
    CQT = np.abs(librosa.cqt(y, 
                      sr=PARAMETERS['load']['sr'],
                      hop_length=PARAMETERS['stft']['hop_length'], 
                      **PARAMETERS['cqt']))
    
    return CQT
示例#17
0
def CQT(filename, fmin=None, n_bins=84, hop_length=512):
    data, fs = librosa.load(filename)
    cqt = librosa.cqt(data, sr=fs, fmin=fmin, n_bins=n_bins, hop_length=hop_length)
    delta1 = librosa.feature.delta(cqt[24:,:],order=1)
    delta2 = librosa.feature.delta(cqt[24:,:],order=2)
    energy = librosa.feature.rmse(y=data)
    features = np.vstack((cqt,delta1,delta2,energy))
    return features.T
示例#18
0
def get_spec():

    __EXAMPLE_FILE = 'data/test1_22050.wav'

    y, sr = librosa.load(__EXAMPLE_FILE)

    C = librosa.cqt(y, sr=sr)
    return librosa.stft(y), C, sr
示例#19
0
def compute_features(audio, y_harmonic):
    """Computes the HPCP and MFCC features.

    Parameters
    ----------
    audio: np.array(N)
        Audio samples of the given input.
    y_harmonic: np.array(N)
        Harmonic part of the audio signal, in samples.

    Returns
    -------
    mfcc: np.array(N, msaf.Anal.mfcc_coeff)
        Mel-frequency Cepstral Coefficients.
    hpcp: np.array(N, 12)
        Pitch Class Profiles.
    tonnetz: np.array(N, 6)
        Tonal Centroid features.
    cqt: np.array(N, msaf.Anal.cqt_bins)
        Constant-Q log-scale features.
    tempogram: np.array(N, 192)
        Tempogram features.
    """
    logging.info("Computing Spectrogram...")
    S = librosa.feature.melspectrogram(audio,
                                       sr=msaf.Anal.sample_rate,
                                       n_fft=msaf.Anal.frame_size,
                                       hop_length=msaf.Anal.hop_size,
                                       n_mels=msaf.Anal.n_mels)

    logging.info("Computing Constant-Q...")
    cqt = librosa.logamplitude(np.abs(
        librosa.cqt(audio,
                    sr=msaf.Anal.sample_rate,
                    hop_length=msaf.Anal.hop_size,
                    n_bins=msaf.Anal.cqt_bins,
                    real=False)) ** 2,
        ref_power=np.max).T

    logging.info("Computing MFCCs...")
    log_S = librosa.logamplitude(S, ref_power=np.max)
    mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=msaf.Anal.mfcc_coeff).T

    logging.info("Computing HPCPs...")
    hpcp = librosa.feature.chroma_cqt(y=y_harmonic,
                                      sr=msaf.Anal.sample_rate,
                                      hop_length=msaf.Anal.hop_size,
                                      n_octaves=msaf.Anal.n_octaves,
                                      fmin=msaf.Anal.f_min).T

    logging.info("Computing Tonnetz...")
    tonnetz = utils.chroma_to_tonnetz(hpcp)
    logging.info("Computing Tempogram...")
    tempogram = librosa.feature.tempogram(audio,
                                      sr=msaf.Anal.sample_rate,
                                      hop_length=msaf.Anal.hop_size,
                                      win_length=192).T
    return mfcc, hpcp, tonnetz, cqt, tempogram
示例#20
0
文件: pre.py 项目: EQ4/crema
    def extract(self, infile):
        '''Extract Constant-Q spectra from an input file'''

        y, sr = librosa.load(infile, sr=self.sr)

        return librosa.cqt(y, sr=sr, hop_length=self.hop_length,
                           n_bins=12 * self.n_octaves * self.over_sample,
                           bins_per_octave=12 * self.over_sample,
                           fmin=self.fmin).T.astype(self.dtype)
def compute_features(audio_file, intervals, level):
    """Computes the subseg-sync cqt features from the given audio file, if
    they are not previously computed. Saves the results in the feat_dir folder.

    Parameters
    ----------
    audio_file : str
        Path to the audio file.
    intervals : np.array
        Intervals containing the estimated boundaries.
    level : str
        Level in the hierarchy.

    Returns
    -------
    cqgram : np.array
        Subseg-sync constant-Q power spectrogram.
    intframes : np.array
        The frame indeces.
    """
    # Check if features have already been computed
    if level == "small_scale":
        features_file = os.path.join(features_dir, os.path.basename(audio_file).split('.')[0] +
                                    "_small_scale.mp3.pk")
    else:
        features_file = os.path.join(features_dir, os.path.basename(audio_file) +
                                    ".pk")
    if os.path.isfile(features_file):
        return read_features(features_file)

    y, sr = librosa.load(audio_file, sr=11025)

    # Default hopsize is 512
    hopsize = 512
    cqgram = librosa.logamplitude(librosa.cqt(y, sr=sr, hop_length=hopsize)**2, ref_power=np.max)

    # Track beats
    y_harmonic, y_percussive = librosa.effects.hpss(y)
    tempo, beats = librosa.beat.beat_track(y=y_percussive, sr=sr,
                                           hop_length=hopsize)

    # Synchronize
    cqgram = librosa.feature.sync(cqgram, beats, aggregate=np.median)

    intframes = None
    if intervals is not None:
        # convert intervals to frames
        intframes = librosa.time_to_frames(intervals, sr=sr, hop_length=hopsize)

        # Match intervals to subseg points
        intframes = librosa.util.match_events(intframes, beats)

    # Save the features
    save_features(cqgram, intframes, beats, features_file)

    return cqgram, intframes
示例#22
0
    def __test(sr, hop_length, y):

        C = np.abs(librosa.cqt(y=y, sr=sr, hop_length=hop_length))

        response = np.mean(C**2, axis=1)

        continuity = np.abs(np.diff(response))

        # Test that integrated energy is approximately constant
        assert np.max(continuity) < 5e-4, continuity
示例#23
0
def CQT(filename, fmin=None, n_bins=84, hop_length=512,nfreqs=None):
    f = Sndfile(filename, 'r')
    data = f.read_frames(f.nframes)
    cqt = librosa.cqt(data, sr=f.samplerate, fmin=fmin, n_bins=n_bins, hop_length=hop_length)
    if nfreqs != None:
        cqt = cqt[:nfreqs,:]
    delta1 = librosa.feature.delta(cqt,order=1)
    delta2 = librosa.feature.delta(cqt,order=2)
    energy = librosa.feature.rmse(y=data)
    features = np.vstack((cqt,delta1,delta2,energy))
    return features.T
示例#24
0
文件: segmenter.py 项目: bmcfee/olda
    def chroma(y):
        # Build the wrapper
        CQT      = np.abs(librosa.cqt(y,    sr=SR, 
                                            resolution=NOTE_RES,
                                            hop_length=HOP_LENGTH,
                                            fmin=NOTE_MIN,
                                            n_bins=NOTE_NUM))

        C_to_Chr = librosa.filters.cq_to_chroma(CQT.shape[0], n_chroma=N_CHROMA) 

        return librosa.logamplitude(librosa.util.normalize(C_to_Chr.dot(CQT)))
示例#25
0
def logcqt(x, fs, hop_length=1024):
    """
    """
    x_noise = x + np.random.normal(scale=10.**-3, size=x.shape)
    cqt = librosa.cqt(x_noise.flatten(),
                      sr=fs, hop_length=hop_length, fmin=27.5,
                      n_bins=24 * 8, bins_per_octave=24, tuning=0,
                      sparsity=0, real=False, norm=1)
    cqt = np.abs(cqt)
    lcqt = np.log1p(5000 * cqt)
    return lcqt
示例#26
0
def do_cqt(src, clip_id, seg_idx):
	'''see do_mfcc'''
	if check_if_done('%s%d_%d.npy'%(PATH_CQT,clip_id,seg_idx)):
		return
	np.save('%s%d_%d.npy'%(PATH_CQT,clip_id,seg_idx) ,
				 librosa.logamplitude(librosa.cqt(y=src, 
												sr=SR, 
												hop_length=HOP_LEN, 
												bins_per_octave=BINS_PER_OCTAVE, 
												n_bins=N_CQT_BINS)**2, 
										ref_power=1.0))
	return
示例#27
0
    def __test(real):
        warnings.resetwarnings()
        warnings.simplefilter('always')
        with warnings.catch_warnings(record=True) as out:
            C = librosa.cqt(y=y, sr=sr, real=real)
            assert len(out) > 0
            assert out[0].category is DeprecationWarning

            if real:
                assert np.isrealobj(C)
            else:
                assert np.iscomplexobj(C)
示例#28
0
文件: cqt.py 项目: keunwoochoi/pumpp
    def transform_audio(self, y):

        cqt, phase = librosa.magphase(librosa.cqt(y=y,
                                                  sr=self.sr,
                                                  hop_length=self.hop_length,
                                                  fmin=self.fmin,
                                                  n_bins=self.n_octaves *
                                                         self.over_sample * 12,
                                                  bins_per_octave=self.over_sample * 12,
                                                  real=False))

        return {'mag': cqt.T.astype(np.float32),
                'phase': np.angle(phase).T.astype(np.float32)}
示例#29
0
    def __test(sr, hop_length, y):

        C = np.abs(librosa.cqt(y=y, sr=sr, hop_length=hop_length, real=False))

        max_response = np.max(C, axis=1)

        ref_response = np.max(max_response)
        continuity = np.abs(np.diff(max_response))

        # Test that continuity is never violated by more than 15% point-wise energy
        assert np.max(continuity) < 1.5e-1 * ref_response, np.max(continuity) / ref_response

        # Test that peak-energy deviation is bounded
        assert np.std(max_response) < 0.5 * ref_response, np.std(max_response) / ref_response
示例#30
0
def CQT_stacked(filename, fmin=None, n_bins=84, hop_length=512,nfreqs=None):
    f = Sndfile(filename, 'r')
    data = f.read_frames(f.nframes)
    cqt = librosa.cqt(data, sr=f.samplerate, fmin=fmin, n_bins=n_bins, hop_length=hop_length)
    if nfreqs != None:
        cqt = cqt[:nfreqs,:]
    delta1 = librosa.feature.delta(cqt,order=1)
    delta2 = librosa.feature.delta(cqt,order=2)
    d,L    = cqt.shape
    cqt = cqt.T.reshape(1,L,d)
    delta1 = delta1.T.reshape(1,L,d)
    delta2 = delta2.T.reshape(1,L,d)
    features = np.vstack((cqt,delta1,delta2))
    return features
示例#31
0
def compute_features(audio, y_harmonic):
	"""Computes the HPCP and MFCC features.

	Parameters
	----------
	audio: np.array(N)
		Audio samples of the given input.
	y_harmonic: np.array(N)
		Harmonic part of the audio signal, in samples.

	Returns
	-------
	mfcc: np.array(N, msaf.Anal.mfcc_coeff)
		Mel-frequency Cepstral Coefficients.
	hpcp: np.array(N, 12)
		Pitch Class Profiles.
	tonnetz: np.array(N, 6)
		Tonal Centroid features.
	cqt: np.array(N, msaf.Anal.cqt_bins)
		Constant-Q log-scale features.
	gmt: np.array(N, msaf.Anal.mfcc_coeff+6)
		Gammatone features
	"""
	logging.info("Computing Spectrogram...")
	S = librosa.feature.melspectrogram(audio,
									   sr=msaf.Anal.sample_rate,
									   n_fft=msaf.Anal.frame_size,
									   hop_length=msaf.Anal.hop_size,
									   n_mels=msaf.Anal.n_mels)

	logging.info("Computing Constant-Q...")
	cqt = librosa.logamplitude(librosa.cqt(audio, sr=msaf.Anal.sample_rate,
										   hop_length=msaf.Anal.hop_size,
										   n_bins=msaf.Anal.cqt_bins)**2,
							   ref_power=np.max).T

	# cqt = librosa.logamplitude(np.abs(librosa.cqt(audio, sr=msaf.Anal.sample_rate,
	# 								   hop_length=msaf.Anal.hop_size,
	# 								   n_bins=msaf.Anal.cqt_bins, real=False))**2,
	# 					   			ref_power=np.max).T

	logging.info("Computing MFCCs...")
	log_S = librosa.logamplitude(S, ref_power=np.max)
	mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=msaf.Anal.mfcc_coeff).T

	logging.info("Computing HPCPs...")
	# hpcp = librosa.feature.chroma_cqt(y=y_harmonic,
	# 								  sr=msaf.Anal.sample_rate,
	# 								  hop_length=msaf.Anal.hop_size,
	# 								  n_octaves=msaf.Anal.n_octaves,
	# 								  fmin=msaf.Anal.f_min).T
	
	hpcp = librosa.feature.chroma_cqt(y=y_harmonic,
									  sr=msaf.Anal.sample_rate,
									  hop_length=msaf.Anal.hop_size,
									  n_octaves=msaf.Anal.n_octaves,
									  n_chroma=12,										
									  fmin=msaf.Anal.f_min).T

	#plt.imshow(hpcp.T, interpolation="nearest", aspect="auto"); plt.show()
	logging.info("Computing Tonnetz...")
	tonnetz = utils.chroma_to_tonnetz(hpcp)
	
	'''Mi: Extracting Gammatone features'''
	logging.info("Computing gammatone features...")
	gcc = librosa.feature.gammatone_cepstral_coeffecients(audio, sr=msaf.Anal.sample_rate, nfft=msaf.Anal.frame_size*2,\
	 														hop_length=msaf.Anal.hop_size, nfilters=64, f_min=50,\
	 														f_max=msaf.Anal.sample_rate/2, nCoeff=msaf.Anal.mfcc_coeff, log=False).T
	
	gc = librosa.feature.gammatone_contrast(audio, sr=msaf.Anal.sample_rate, nfft=msaf.Anal.frame_size*2, hop_length=msaf.Anal.hop_size,\
	 						nfilters=64, f_min=50, f_max=msaf.Anal.sample_rate/2, n_bands=6, quantile=0.02, log=False).T
	
	gmt = np.hstack((gcc, gc))
	# logging.info("%s" %(hpcp.shape,))
	# logging.info("%s" %(gmt.shape,))
	return mfcc, hpcp, tonnetz, cqt, gmt
示例#32
0
def Signal_Process(audio_samples,
                   first_axis_is_batch=False,
                   sr=22050,
                   method='stft'):
    """
    :param audio_samples: sampled raw audio input (tf.Tensor)
    :param first_axis_is_batch: first axis means batch, default = False
    :param sr: sampling rate
    :param method: signal process methods
    :return: signal_processed output [feature_size, sequence_length]
    """

    # TODO: define your signal process method with various functions and hyper parameters
    if method == 'your_own_way':
        stfts = tf.signal.stft(audio_samples,
                               frame_length=2048,
                               frame_step=512,
                               fft_length=2048,
                               pad_end=True)
        spectrograms = tf.abs(stfts)
        num_spectrogram_bins = stfts.shape[-1]
        lower_edge_hertz, upper_edge_hertz, num_mel_bins = 80.0, 2048.0, 80
        linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
            num_mel_bins, num_spectrogram_bins, sr, lower_edge_hertz,
            upper_edge_hertz)
        mel_spectrograms = tf.tensordot(spectrograms,
                                        linear_to_mel_weight_matrix, 1)
        mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(
            linear_to_mel_weight_matrix.shape[-1:]))
        log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)
        if first_axis_is_batch:
            return tf.transpose(log_mel_spectrograms, perm=[0, 2, 1])
        else:
            return tf.transpose(log_mel_spectrograms, perm=[1, 0])

    elif method == 'raw_audio':
        return audio_samples

    elif method == 'tf_stft':
        stfts = tf.signal.stft(audio_samples,
                               frame_length=2048,
                               frame_step=512,
                               fft_length=2048,
                               pad_end=True)
        stfts = tf.abs(stfts)
        if first_axis_is_batch:
            return tf.transpose(stfts, perm=[0, 2, 1])
        else:
            return tf.transpose(stfts, perm=[1, 0])

    elif method == 'tf_mel_spectrogram':
        stfts = tf.signal.stft(audio_samples,
                               frame_length=2048,
                               frame_step=512,
                               fft_length=2048,
                               pad_end=True)
        spectrograms = tf.abs(stfts)
        num_spectrogram_bins = stfts.shape[-1]
        lower_edge_hertz, upper_edge_hertz, num_mel_bins = 80.0, 7600.0, 80
        linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
            num_mel_bins, num_spectrogram_bins, sr, lower_edge_hertz,
            upper_edge_hertz)
        mel_spectrograms = tf.tensordot(spectrograms,
                                        linear_to_mel_weight_matrix, 1)
        if first_axis_is_batch:
            return tf.transpose(mel_spectrograms, perm=[0, 2, 1])
        else:
            return tf.transpose(mel_spectrograms, perm=[1, 0])

    elif method == 'tf_log_mel_spectrogram':
        stfts = tf.signal.stft(audio_samples,
                               frame_length=2048,
                               frame_step=512,
                               fft_length=2048,
                               pad_end=True)
        spectrograms = tf.abs(stfts)
        num_spectrogram_bins = stfts.shape[-1]
        lower_edge_hertz, upper_edge_hertz, num_mel_bins = 80.0, 7600.0, 80
        linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
            num_mel_bins, num_spectrogram_bins, sr, lower_edge_hertz,
            upper_edge_hertz)
        mel_spectrograms = tf.tensordot(spectrograms,
                                        linear_to_mel_weight_matrix, 1)
        mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(
            linear_to_mel_weight_matrix.shape[-1:]))
        log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)
        if first_axis_is_batch:
            return tf.transpose(log_mel_spectrograms, perm=[0, 2, 1])
        else:
            return tf.transpose(log_mel_spectrograms, perm=[1, 0])

    elif method == 'tf_mfcc':
        stfts = tf.signal.stft(audio_samples,
                               frame_length=2048,
                               frame_step=512,
                               fft_length=2048,
                               pad_end=True)
        spectrograms = tf.abs(stfts)
        num_spectrogram_bins = stfts.shape[-1]
        lower_edge_hertz, upper_edge_hertz, num_mel_bins = 80.0, 7600.0, 80
        linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
            num_mel_bins, num_spectrogram_bins, sr, lower_edge_hertz,
            upper_edge_hertz)
        mel_spectrograms = tf.tensordot(spectrograms,
                                        linear_to_mel_weight_matrix, 1)
        mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(
            linear_to_mel_weight_matrix.shape[-1:]))
        log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)
        mfccs = tf.signal.mfccs_from_log_mel_spectrograms(
            log_mel_spectrograms)[..., :20]
        if first_axis_is_batch:
            return tf.transpose(mfccs, perm=[0, 2, 1])
        else:
            return tf.transpose(mfccs, perm=[1, 0])

    elif method == 'stft':
        audio_samples = audio_samples.numpy()
        if first_axis_is_batch:
            f = list()
            for i in range(len(audio_samples)):
                f.append(
                    np.abs(
                        librosa.stft(audio_samples[i],
                                     n_fft=2048,
                                     hop_length=512)))
        else:
            f = np.abs(librosa.stft(audio_samples, n_fft=2048, hop_length=512))
        return tf.convert_to_tensor(f, dtype=tf.float32)

    elif method == 'cqt':
        audio_samples = audio_samples.numpy()
        if first_axis_is_batch:
            f = list()
            for i in range(len(audio_samples)):
                f.append(
                    np.abs(
                        librosa.cqt(audio_samples[i],
                                    sr=float(sr),
                                    hop_length=512,
                                    bins_per_octave=12,
                                    n_bins=7 * 12)))
        else:
            f = np.abs(
                librosa.cqt(audio_samples,
                            sr=float(sr),
                            hop_length=512,
                            bins_per_octave=12,
                            n_bins=7 * 12))
        return tf.convert_to_tensor(f, dtype=tf.float32)

    elif method == 'chroma_cqt':
        audio_samples = audio_samples.numpy()
        if first_axis_is_batch:
            f = list()
            for i in range(len(audio_samples)):
                cqt = np.abs(
                    librosa.cqt(audio_samples[i],
                                sr=float(sr),
                                hop_length=512,
                                bins_per_octave=12,
                                n_bins=7 * 12,
                                tuning=None))
                f.append(
                    librosa.feature.chroma_cqt(C=cqt, n_chroma=12,
                                               n_octaves=7))
        else:
            cqt = np.abs(
                librosa.cqt(audio_samples,
                            sr=float(sr),
                            hop_length=512,
                            bins_per_octave=12,
                            n_bins=7 * 12,
                            tuning=None))
            f = librosa.feature.chroma_cqt(C=cqt, n_chroma=12, n_octaves=7)
        return tf.convert_to_tensor(f, dtype=tf.float32)

    elif method == 'chroma_cens':
        audio_samples = audio_samples.numpy()
        if first_axis_is_batch:
            f = list()
            for i in range(len(audio_samples)):
                cqt = np.abs(
                    librosa.cqt(audio_samples[i],
                                sr=float(sr),
                                hop_length=512,
                                bins_per_octave=12,
                                n_bins=7 * 12,
                                tuning=None))
                f.append(
                    librosa.feature.chroma_cens(C=cqt,
                                                n_chroma=12,
                                                n_octaves=7))
        else:
            cqt = np.abs(
                librosa.cqt(audio_samples,
                            sr=float(sr),
                            hop_length=512,
                            bins_per_octave=12,
                            n_bins=7 * 12,
                            tuning=None))
            f = librosa.feature.chroma_cens(C=cqt, n_chroma=12, n_octaves=7)
        return tf.convert_to_tensor(f, dtype=tf.float32)

    elif method == 'chroma_stft':
        audio_samples = audio_samples.numpy()
        if first_axis_is_batch:
            f = list()
            for i in range(len(audio_samples)):
                stft = np.abs(
                    librosa.stft(audio_samples[i], n_fft=2048, hop_length=512))
                f.append(librosa.feature.chroma_stft(S=stft**2, n_chroma=12))
        else:
            stft = np.abs(
                librosa.stft(audio_samples, n_fft=2048, hop_length=512))
            f = librosa.feature.chroma_stft(S=stft**2, n_chroma=12)
        return tf.convert_to_tensor(f, dtype=tf.float32)

    elif method == 'rms':
        audio_samples = audio_samples.numpy()
        if first_axis_is_batch:
            f = list()
            for i in range(len(audio_samples)):
                stft = np.abs(
                    librosa.stft(audio_samples[i], n_fft=2048, hop_length=512))
                f.append(librosa.feature.rms(S=stft))
        else:
            stft = np.abs(
                librosa.stft(audio_samples, n_fft=2048, hop_length=512))
            f = librosa.feature.rms(S=stft)
        return tf.convert_to_tensor(f, dtype=tf.float32)

    elif method == 'mel_spectrogram':
        audio_samples = audio_samples.numpy()
        if first_axis_is_batch:
            f = list()
            for i in range(len(audio_samples)):
                stft = np.abs(
                    librosa.stft(audio_samples[i], n_fft=2048, hop_length=512))
                f.append(librosa.feature.melspectrogram(S=stft**2, sr=sr))
        else:
            stft = np.abs(
                librosa.stft(audio_samples, n_fft=2048, hop_length=512))
            f = librosa.feature.melspectrogram(S=stft**2, sr=sr)
        return tf.convert_to_tensor(f, dtype=tf.float32)

    elif method == 'mfcc':
        audio_samples = audio_samples.numpy()
        if first_axis_is_batch:
            f = list()
            for i in range(len(audio_samples)):
                stft = np.abs(
                    librosa.stft(audio_samples[i], n_fft=2048, hop_length=512))
                mel_spectrogram = librosa.feature.melspectrogram(S=stft**2,
                                                                 sr=sr)
                f.append(
                    librosa.feature.mfcc(
                        S=librosa.power_to_db(mel_spectrogram), n_mfcc=20))
        else:
            stft = np.abs(
                librosa.stft(audio_samples, n_fft=2048, hop_length=512))
            mel_spectrogram = librosa.feature.melspectrogram(S=stft**2, sr=sr)
            f = librosa.feature.mfcc(S=librosa.power_to_db(mel_spectrogram),
                                     n_mfcc=20)
        return tf.convert_to_tensor(f, dtype=tf.float32)

    else:
        raise NotImplementedError
示例#33
0
                                  hop_length=frame_hop)

    # hanning window to smooth the spectrum out
    han_win = signal.hanning(frame_length)

    # let's extract
    CQT_frames = []
    for frame in range(y_frames.shape[1]):
        if not use_han:
            sig = y_frames[:, frame]
        else:
            sig = han_win * y_frames[:, frame]

        CQTf = np.abs(
            librosa.cqt(sig,
                        sr=sr,
                        n_bins=n_bins,
                        bins_per_octave=bins_per_octave,
                        fmin=fmin,
                        hop_length=cqt_hop,
                        real=False))
        CQT_frames.append(CQTf[:, 1:-1])
    # concatenate everything together
    CQT = np.hstack(CQT_frames)

    # Take the log amplitude
    CQTlog = librosa.logamplitude(CQT**2, ref_power=np.max)

    # save the extracted CQT
    inface.upload_raw_array(stim_out.format(cqt_range, stimuli[i]), CQTlog)
示例#34
0
def draw_cqt(audio, samplerate):
    s = 3
    C = librosa.cqt(audio, sr=samplerate, n_bins=60 * s, bins_per_octave=12 * s, hop_length=16)
    librosa.display.specshow(librosa.amplitude_to_db(C, ref=np.max),
                             sr=samplerate, x_axis='time', y_axis='cqt_note')
示例#35
0
def laplacian_segmentation(y, sr, k=5):
    """This function uses the Laplacian Segmentation method described in McFee and Ellis, 2014, and adapted from example code in the librosa documentation. It returns the segment boundaries (in frame number and time and segment ID's of isolated music file segments."""
    BINS_PER_OCTAVE = 12 * 3
    N_OCTAVES = 7
    C = librosa.amplitude_to_db(np.abs(
        librosa.cqt(y=y,
                    sr=sr,
                    bins_per_octave=BINS_PER_OCTAVE,
                    n_bins=N_OCTAVES * BINS_PER_OCTAVE)),
                                ref=np.max)
    tempo, beats = librosa.beat.beat_track(y=y, sr=sr, trim=False)
    Csync = librosa.util.sync(C, beats, aggregate=np.median)

    # For plotting purposes, we'll need the timing of the beats
    # we fix_frames to include non-beat frames 0 and C.shape[1] (final frame)
    beat_times = librosa.frames_to_time(librosa.util.fix_frames(
        beats, x_min=0, x_max=C.shape[1]),
                                        sr=sr)

    R = librosa.segment.recurrence_matrix(Csync,
                                          width=3,
                                          mode='affinity',
                                          sym=True)
    # Enhance diagonals with a median filter (Equation 2)
    df = librosa.segment.timelag_filter(scipy.ndimage.median_filter)
    Rf = df(R, size=(1, 7))
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    Msync = librosa.util.sync(mfcc, beats)
    path_distance = np.sum(np.diff(Msync, axis=1)**2, axis=0)
    sigma = np.median(path_distance)
    path_sim = np.exp(-path_distance / sigma)
    R_path = np.diag(path_sim, k=1) + np.diag(path_sim, k=-1)
    deg_path = np.sum(R_path, axis=1)
    deg_rec = np.sum(Rf, axis=1)
    mu = deg_path.dot(deg_path + deg_rec) / np.sum((deg_path + deg_rec)**2)
    A = mu * Rf + (1 - mu) * R_path
    L = scipy.sparse.csgraph.laplacian(A, normed=True)
    # and its spectral decomposition
    evals, evecs = scipy.linalg.eigh(L)
    # We can clean this up further with a median filter.
    # This can help smooth over small discontinuities
    evecs = scipy.ndimage.median_filter(evecs, size=(9, 1))
    # cumulative normalization is needed for symmetric normalize laplacian eigenvectors
    Cnorm = np.cumsum(evecs**2, axis=1)**0.5
    # If we want k clusters, use the first k normalized eigenvectors.
    # Fun exercise: see how the segmentation changes as you vary k
    k = k
    X = evecs[:, :k] / Cnorm[:, k - 1:k]
    KM = sklearn.cluster.KMeans(n_clusters=k)
    seg_ids = KM.fit_predict(X)
    bound_beats = 1 + np.flatnonzero(seg_ids[:-1] != seg_ids[1:])
    # Count beat 0 as a boundary
    bound_beats = librosa.util.fix_frames(bound_beats, x_min=0)
    # Compute the segment label for each boundary
    bound_segs = list(seg_ids[bound_beats])
    # Convert beat indices to frames
    bound_frames = beats[bound_beats]
    # Make sure we cover to the end of the track
    bound_frames = librosa.util.fix_frames(bound_frames,
                                           x_min=None,
                                           x_max=C.shape[1] - 1)
    bound_times = librosa.frames_to_time(bound_frames)
    bound_times = [(x / 60) * tempo for x in bound_times]
    beat_numbers = list(range(len(bound_frames)))
    bound_beats = np.append(bound_beats, list(range(len(beats)))[-1])
    segments = list(
        zip(zip(bound_times, bound_times[1:]),
            zip(bound_beats, bound_beats[1:]), bound_segs))

    return segments, beat_times, tempo
示例#36
0
def getFusedSimilarity(filename,
                       sr,
                       hop_length,
                       win_fac,
                       wins_per_block,
                       K,
                       reg_diag,
                       reg_neighbs,
                       niters,
                       do_animation,
                       plot_result,
                       do_crema=True):
    """
    Load in filename, compute features, average/stack delay, and do similarity
    network fusion (SNF) on all feature types
    Parameters
    ----------
    filename: string
        Path to music file
    sr: int
        Sample rate at which to sample file
    hop_length: int
        Hop size between frames in chroma and mfcc
    win_fac: int
        Number of frames to average (i.e. factor by which to downsample)
        If negative, then do beat tracking, and subdivide by |win_fac| times within each beat
    wins_per_block: int
        Number of aggregated windows per sliding window block
    K: int
        Number of nearest neighbors in SNF.  If -1, then autotuned to sqrt(N)
        for an NxN similarity matrix
    reg_diag: float 
        Regularization for self-similarity promotion
    reg_neighbs: float
        Regularization for direct neighbor similarity promotion
    niters: int
        Number of iterations in SNF
    do_animation: boolean
        Whether to plot and save images of the evolution of SNF
    plot_result: boolean
        Whether to plot the result of the fusion
    do_crema: boolean
        Whether to include precomputed crema in the fusion
    Returns
    -------
    {'Ws': An dictionary of weighted adjacency matrices for individual features
                    and the fused adjacency matrix, 
            'times': Time in seconds of each row in the similarity matrices,
            'K': The number of nearest neighbors actually used} 
    """
    ## Step 1: Load audio
    print("Loading %s..." % filename)
    if MANUAL_AUDIO_LOAD:
        subprocess.call([
            FFMPEG_BINARY, "-i", filename, "-ar",
            "%i" % sr, "-ac", "1",
            "%s.wav" % filename
        ])
        sr, y = sio.wavfile.read("%s.wav" % filename)
        y = y / 2.0**15
        os.remove("%s.wav" % filename)
    else:
        y, sr = librosa.load(filename, sr=sr)

    ## Step 2: Figure out intervals to which to sync features
    if win_fac > 0:
        # Compute features in intervals evenly spaced by the hop size
        # but average within "win_fac" intervals of hop_length
        nHops = int(
            (y.size - hop_length * win_fac * wins_per_block) / hop_length)
        intervals = np.arange(0, nHops, win_fac)
    else:
        # Compute features in intervals which are subdivided beats
        # by a factor of |win_fac|
        C = np.abs(librosa.cqt(y=y, sr=sr))
        _, beats = librosa.beat.beat_track(y=y,
                                           sr=sr,
                                           trim=False,
                                           start_bpm=240)
        intervals = librosa.util.fix_frames(beats, x_max=C.shape[1])
        intervals = librosa.segment.subsegment(C,
                                               intervals,
                                               n_segments=abs(win_fac))

    ## Step 3: Compute features
    # 1) CQT chroma with 3x oversampling in pitch
    chroma = librosa.feature.chroma_cqt(y=y,
                                        sr=sr,
                                        hop_length=hop_length,
                                        bins_per_octave=12 * 3)

    # 2) Exponentially liftered MFCCs
    S = librosa.feature.melspectrogram(y,
                                       sr=sr,
                                       n_mels=128,
                                       hop_length=hop_length)
    log_S = librosa.power_to_db(S, ref=np.max)
    mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=20)
    lifterexp = 0.6
    coeffs = np.arange(mfcc.shape[0])**lifterexp
    coeffs[0] = 1
    mfcc = coeffs[:, None] * mfcc

    # 3) Tempograms
    #  Use a super-flux max smoothing of 5 frequency bands in the oenv calculation
    SUPERFLUX_SIZE = 5
    oenv = librosa.onset.onset_strength(y=y,
                                        sr=sr,
                                        hop_length=hop_length,
                                        max_size=SUPERFLUX_SIZE)
    tempogram = librosa.feature.tempogram(onset_envelope=oenv,
                                          sr=sr,
                                          hop_length=hop_length)

    # 4) Crema
    if do_crema:
        matfilename = "%s_crema.mat" % filename
        if not os.path.exists(matfilename):
            print("****WARNING: PRECOMPUTED CREMA DOES NOT EXIST****")
            do_crema = False
        else:
            data = sio.loadmat(matfilename)
            fac = (float(sr) / 44100.0) * 4096.0 / hop_length
            times_orig = fac * np.arange(len(data['chord_bass']))
            times_new = np.arange(mfcc.shape[1])
            interp = scipy.interpolate.interp1d(times_orig,
                                                data['chord_pitch'].T,
                                                kind='nearest',
                                                fill_value='extrapolate')
            chord_pitch = interp(times_new)

    ## Step 4: Synchronize features to intervals
    n_frames = np.min([chroma.shape[1], mfcc.shape[1], tempogram.shape[1]])
    if do_crema:
        n_frames = min(n_frames, chord_pitch.shape[1])
    # median-aggregate chroma to suppress transients and passing tones
    intervals = librosa.util.fix_frames(intervals, x_min=0, x_max=n_frames)
    times = intervals * float(hop_length) / float(sr)

    chroma = librosa.util.sync(chroma, intervals, aggregate=np.median)
    chroma = chroma[:, :n_frames]
    mfcc = librosa.util.sync(mfcc, intervals)
    mfcc = mfcc[:, :n_frames]
    tempogram = librosa.util.sync(tempogram, intervals)
    tempogram = tempogram[:, :n_frames]
    if do_crema:
        chord_pitch = librosa.util.sync(chord_pitch, intervals)
        chord_pitch = chord_pitch[:, :n_frames]

    ## Step 5: Do a delay embedding and compute SSMs
    XChroma = librosa.feature.stack_memory(chroma,
                                           n_steps=wins_per_block,
                                           mode='edge').T
    DChroma = getCSMCosine(XChroma, XChroma)  #Cosine distance
    XMFCC = librosa.feature.stack_memory(mfcc,
                                         n_steps=wins_per_block,
                                         mode='edge').T
    DMFCC = getCSM(XMFCC, XMFCC)  #Euclidean distance
    XTempogram = librosa.feature.stack_memory(tempogram,
                                              n_steps=wins_per_block,
                                              mode='edge').T
    DTempogram = getCSM(XTempogram, XTempogram)
    if do_crema:
        XChordPitch = librosa.feature.stack_memory(chord_pitch,
                                                   n_steps=wins_per_block,
                                                   mode='edge').T
        DChordPitch = getCSMCosine(XChordPitch, XChordPitch)

    ## Step 5: Run similarity network fusion
    FeatureNames = ['MFCCs', 'Chromas']
    Ds = [DMFCC, DChroma, DTempogram]
    if do_crema:
        FeatureNames.append('Crema')
        Ds.append(DChordPitch)
    # Edge case: If it's too small, zeropad SSMs
    for i, Di in enumerate(Ds):
        if Di.shape[0] < 2 * K:
            D = np.zeros((2 * K, 2 * K))
            D[0:Di.shape[0], 0:Di.shape[1]] = Di
            Ds[i] = D
    pK = K
    if K == -1:
        pK = int(np.round(2 * np.log(Ds[0].shape[0]) / np.log(2)))
        print("Autotuned K = %i" % pK)
    # Do fusion on all features
    Ws = [getW(D, pK) for D in Ds]
    if REC_SMOOTH > 0:
        from scipy.ndimage import median_filter
        df = librosa.segment.timelag_filter(median_filter)
        Ws = [df(W, size=(1, REC_SMOOTH)) for W in Ws]

    WFused = doSimilarityFusionWs(Ws, K=pK, niters=niters, \
        reg_diag=reg_diag, reg_neighbs=reg_neighbs, \
        do_animation=do_animation, PlotNames=FeatureNames, \
        PlotExtents=[times[0], times[-1]])
    WsDict = {}
    for n, W in zip(FeatureNames, Ws):
        WsDict[n] = W
    WsDict['Fused'] = WFused
    # Do fusion with only Chroma and MFCC
    #WsDict['Fused MFCC/Chroma'] = doSimilarityFusionWs(Ws[0:2], K=pK, niters=niters, \
    #    reg_diag=reg_diag, reg_neighbs=reg_neighbs)
    if do_crema:
        # Do fusion with tempograms and Crema if Crema is available
        WsDict['Fused Tgram_Crema'] = doSimilarityFusionWs(Ws[2::], K=pK, niters=niters, \
            reg_diag=reg_diag, reg_neighbs=reg_neighbs)
        # Do fusion with MFCC and Crema
        WsDict['Fused MFCC_Crema'] = doSimilarityFusionWs([Ws[0], Ws[-1]], K=pK, niters=niters, \
            reg_diag=reg_diag, reg_neighbs=reg_neighbs)
        # Do fusion with MFCC, Chroma, and Crema
        WsDict['Fused MFCC_Chroma_Crema'] = doSimilarityFusionWs([Ws[0], Ws[1], Ws[-1]], K=pK, niters=niters, \
            reg_diag=reg_diag, reg_neighbs=reg_neighbs)
    if plot_result:
        plotFusionResults(WsDict, {}, {}, times, win_fac)
        plt.savefig("%s_Plot.png" % filename, bbox_inches='tight')
    return {'Ws': WsDict, 'times': times, 'K': pK}
示例#37
0
def preprocess_wav_file(file_path_or_bytes, Y_numSlice):
    # returns 1 example (downsampled, cqt, normalized)
    np_array_list = []

    y, sr = auto_load(file_path_or_bytes, sr =None)
    y_downsample = librosa.resample(y, orig_sr=sr, target_sr=DOWNSAMPLED_SR)
    CQT_result = librosa.cqt(y_downsample, sr=DOWNSAMPLED_SR, hop_length=HOP_LENGTH, n_bins=NUM_BINS, bins_per_octave=BINS_PER_OCTAVE)
    CQT_result = np.absolute(CQT_result)
    np_array_list.append(CQT_result)

# normalize data
    combined = np.concatenate(np_array_list, axis = 1)
    
    ####
    '''
    max_val = combined.max()
    min_val = combined.min()
    
    combined_norm = (combined - min_val) / (max_val - min_val)
    mean_per_label = np.mean(combined_norm, axis = 1)
    mean_per_label = np.reshape(mean_per_label, (-1, 1))
    
    for i in range(len(np_array_list)):
        np_array_list[i] = (np_array_list[i] - min_val) / (max_val - min_val)
        np_array_list[i] = np_array_list[i] - mean_per_label
        
    with h5py.File('minmax_meanlabel.h5', 'w') as h5f:
        h5f.create_dataset('min_max', data=[min_val, max_val], compression='gzip')
        h5f.create_dataset('mean_per_label', data=mean_per_label, compression='gzip')
    '''
    ########

    with h5py.File('sl_data/std/means_stds-nm.h5', 'r') as h5f:
        #cqt_result = np.divide(np.subtract(cqt_result, h5f['means']), h5f['stds'])
    
        mean = h5f['means'][:]#np.mean(combined, axis = 1, keepdims =True)
        std = h5f['stds'][:]#np.std(combined, axis = 1, keepdims=True)
    
    for i in range(len(np_array_list)):
        np_array_list[i] = np.divide(np.subtract(np_array_list[i], mean), std)
    '''    
    with h5py.File('means_stds.h5', 'w') as h5f:
        h5f.create_dataset('means', data=mean, compression='gzip')
        h5f.create_dataset('stds', data=std, compression='gzip')
    '''
    ####

    
    frame_windows_list = []
    numSlices_list = []
    for i in range(len(np_array_list)):
        CQT_result = np_array_list[i]
        # print (CQT_result.shape[0])
        # print ("====")
        # print (CQT_result.shape[1])
        paddedX = np.zeros((CQT_result.shape[0], CQT_result.shape[1] + WINDOW_SIZE - 1), dtype=float)
        pad_amount = WINDOW_SIZE / 2
        pad_amount = int(pad_amount)
        paddedX[:, pad_amount:-pad_amount] = CQT_result
        # print (paddedX[:, pad_amount:-pad_amount])
        frame_windows = np.array([paddedX[:, j:j+WINDOW_SIZE] for j in range(CQT_result.shape[1])])
        frame_windows = np.expand_dims(frame_windows, axis=3)
        
        if Y_numSlice is not None:
            numSlices = min(frame_windows.shape[0], Y_numSlice) #Y_numSlices[i])
        else:
            numSlices = frame_windows.shape[0]

        numSlices_list.append(numSlices)
        frame_windows_list.append(frame_windows[:numSlices])
    
    # return np.concatenate(frame_windows_list, axis=0), numSlices_list
    return frame_windows_list, numSlices_list
                        type=int,
                        action='store',
                        help='Program of the instrument in the output MIDI')
    parser.add_argument('--min_midi',
                        default=24,
                        type=int,
                        action='store',
                        help='Minimum MIDI note to transcribe')
    parser.add_argument('--max_midi',
                        default=107,
                        type=int,
                        action='store',
                        help='Maximum MIDI note to transcribe')
    parser.add_argument('--threshold',
                        default=64,
                        type=int,
                        action='store',
                        help='Threshold to activate note on event, 0-127')

    parameters = vars(parser.parse_args(sys.argv[1:]))

    y, sr = librosa.load(parameters['input_audio'])
    min_midi, max_midi = parameters['min_midi'], parameters['max_midi']
    cqt = librosa.cqt(y, sr=sr, fmin=min_midi, n_bins=max_midi - min_midi)
    pr = cqt_to_piano_roll(cqt, min_midi, max_midi, parameters['threshold'])
    # get audio time
    audio_time = len(y) / sr
    # get sampling frequency of cqt spectrogram
    fs = pr.shape[1] / audio_time
    pm = piano_roll_to_pretty_midi(pr, fs=fs, program=parameters['program'])
    pm.write(parameters['output_midi'])
示例#39
0
def test_cqt_precision(y_cqt, sr_cqt, dtype):
    C = librosa.cqt(y=y_cqt, sr=sr_cqt, dtype=dtype)
    assert np.dtype(C.dtype) == np.dtype(dtype)
示例#40
0
def test_cqt_fail_short_late():

    y = np.zeros(16)
    librosa.cqt(y, sr=22050)
示例#41
0
    def __process_audio(self):
        """ The main audio processing routine for the thread.

        This routine uses Laplacian Segmentation to find and
        group similar beats in the song.

        This code has been adapted from the sample created by Brian McFee at
        https://librosa.github.io/librosa_gallery/auto_examples/plot_segmentation.html#sphx-glr-auto-examples-plot-segmentation-py
        and is based on his 2014 paper published at http://bmcfee.github.io/papers/ismir2014_spectral.pdf

        I have made some performance improvements, but the basic parts remain (mostly) unchanged
        """

        self.__report_progress(.1, "loading file and extracting raw audio")

        #
        # load the file as stereo with a high sample rate and
        # trim the silences from each end
        #

        y, sr = librosa.core.load(self.__filename, mono=False, sr=None)
        y, _ = librosa.effects.trim(y)

        self.duration = librosa.core.get_duration(y, sr)
        self.raw_audio = (y * np.iinfo(np.int16).max).astype(
            np.int16).T.copy(order='C')
        self.sample_rate = sr

        # after the raw audio bytes are saved, convert the samples to mono
        # because the beat detection algorithm in librosa requires it.

        y = librosa.core.to_mono(y)

        self.__report_progress(.2, "computing pitch data...")

        # Compute the constant-q chromagram for the samples.

        BINS_PER_OCTAVE = 12 * 3
        N_OCTAVES = 7

        cqt = librosa.cqt(y=y,
                          sr=sr,
                          bins_per_octave=BINS_PER_OCTAVE,
                          n_bins=N_OCTAVES * BINS_PER_OCTAVE)
        C = librosa.amplitude_to_db(np.abs(cqt), ref=np.max)

        self.__report_progress(.3, "Finding beats...")

        ##########################################################
        # To reduce dimensionality, we'll beat-synchronous the CQT
        tempo, btz = librosa.beat.beat_track(y=y, sr=sr, trim=False)
        # tempo, btz = librosa.beat.beat_track(y=y, sr=sr)
        Csync = librosa.util.sync(C, btz, aggregate=np.median)

        self.tempo = tempo

        # For alignment purposes, we'll need the timing of the beats
        # we fix_frames to include non-beat frames 0 and C.shape[1] (final frame)
        beat_times = librosa.frames_to_time(librosa.util.fix_frames(
            btz, x_min=0, x_max=C.shape[1]),
                                            sr=sr)

        self.__report_progress(.4, "building recurrence matrix...")
        #####################################################################
        # Let's build a weighted recurrence matrix using beat-synchronous CQT
        # (Equation 1)
        # width=3 prevents links within the same bar
        # mode='affinity' here implements S_rep (after Eq. 8)
        R = librosa.segment.recurrence_matrix(Csync,
                                              width=3,
                                              mode='affinity',
                                              sym=True)

        # Enhance diagonals with a median filter (Equation 2)
        df = librosa.segment.timelag_filter(scipy.ndimage.median_filter)
        Rf = df(R, size=(1, 7))

        ###################################################################
        # Now let's build the sequence matrix (S_loc) using mfcc-similarity
        #
        #   :math:`R_\text{path}[i, i\pm 1] = \exp(-\|C_i - C_{i\pm 1}\|^2 / \sigma^2)`
        #
        # Here, we take :math:`\sigma` to be the median distance between successive beats.
        #
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        Msync = librosa.util.sync(mfcc, btz)

        path_distance = np.sum(np.diff(Msync, axis=1)**2, axis=0)
        sigma = np.median(path_distance)
        path_sim = np.exp(-path_distance / sigma)

        R_path = np.diag(path_sim, k=1) + np.diag(path_sim, k=-1)

        ##########################################################
        # And compute the balanced combination (Equations 6, 7, 9)

        deg_path = np.sum(R_path, axis=1)
        deg_rec = np.sum(Rf, axis=1)

        mu = deg_path.dot(deg_path + deg_rec) / np.sum((deg_path + deg_rec)**2)

        A = mu * Rf + (1 - mu) * R_path

        #####################################################
        # Now let's compute the normalized Laplacian (Eq. 10)
        L = scipy.sparse.csgraph.laplacian(A, normed=True)

        # and its spectral decomposition
        _, evecs = scipy.linalg.eigh(L)

        # We can clean this up further with a median filter.
        # This can help smooth over small discontinuities
        evecs = scipy.ndimage.median_filter(evecs, size=(9, 1))

        # cumulative normalization is needed for symmetric normalize laplacian eigenvectors
        Cnorm = np.cumsum(evecs**2, axis=1)**0.5

        # If we want k clusters, use the first k normalized eigenvectors.
        # Fun exercise: see how the segmentation changes as you vary k

        self.__report_progress(.5, "clustering...")

        # if a value for clusters wasn't passed in, then we need to auto-cluster

        if self.clusters == 0:

            # if we've been asked to use the original auto clustering alogrithm, otherwise
            # use the new and improved one that accounts for silhouette scores.

            if self._use_v1:
                self.clusters, seg_ids = self.__compute_best_cluster(
                    evecs, Cnorm)
            else:
                self.clusters, seg_ids = self.__compute_best_cluster_with_sil(
                    evecs, Cnorm)

        else:  # otherwise, just use the cluster value passed in
            k = self.clusters

            self.__report_progress(.51, "using %d clusters" % self.clusters)

            X = evecs[:, :k] / Cnorm[:, k - 1:k]
            seg_ids = sklearn.cluster.KMeans(n_clusters=k,
                                             max_iter=1000,
                                             random_state=0,
                                             n_init=1000).fit_predict(X)

        # Get the amplitudes and beat-align them
        self.__report_progress(.6, "getting amplitudes")

        # newer versions of librosa have renamed the rmse function

        if hasattr(librosa.feature, 'rms'):
            amplitudes = librosa.feature.rms(y=y)
        else:
            amplitudes = librosa.feature.rmse(y=y)

        ampSync = librosa.util.sync(amplitudes, btz)

        # create a list of tuples that include the ordinal position, the start time of the beat,
        # the cluster to which the beat belongs and the mean amplitude of the beat

        zbeat_tuples = zip(range(0, len(btz)), beat_times, seg_ids,
                           ampSync[0].tolist())
        beat_tuples = tuple(zbeat_tuples)

        info = []

        bytes_per_second = int(round(len(self.raw_audio) / self.duration))

        last_cluster = -1
        current_segment = -1
        segment_beat = 0

        for i in range(0, len(beat_tuples)):
            final_beat = {}
            final_beat['start'] = float(beat_tuples[i][1])
            final_beat['cluster'] = int(beat_tuples[i][2])
            final_beat['amplitude'] = float(beat_tuples[i][3])

            if final_beat['cluster'] != last_cluster:
                current_segment += 1
                segment_beat = 0
            else:
                segment_beat += 1

            final_beat['segment'] = current_segment
            final_beat['is'] = segment_beat

            last_cluster = final_beat['cluster']

            if i == len(beat_tuples) - 1:
                final_beat['duration'] = self.duration - final_beat['start']
            else:
                final_beat['duration'] = beat_tuples[i +
                                                     1][1] - beat_tuples[i][1]

            if ((final_beat['start'] * bytes_per_second) % 2 > 1.5):
                final_beat['start_index'] = int(
                    math.ceil(final_beat['start'] * bytes_per_second))
            else:
                final_beat['start_index'] = int(final_beat['start'] *
                                                bytes_per_second)

            final_beat['stop_index'] = int(
                math.ceil((final_beat['start'] + final_beat['duration']) *
                          bytes_per_second))

            # save pointers to the raw bytes for each beat with each beat.
            final_beat['buffer'] = self.raw_audio[
                final_beat['start_index']:final_beat['stop_index']]

            info.append(final_beat)

        self.__report_progress(.7, "truncating to fade point...")

        # get the max amplitude of the beats
        # max_amplitude = max([float(b['amplitude']) for b in info])
        max_amplitude = sum([float(b['amplitude']) for b in info]) / len(info)

        # assume that the fade point of the song is the last beat of the song that is >= 75% of
        # the max amplitude.

        self.max_amplitude = max_amplitude

        fade = len(info) - 1

        for b in reversed(info):
            if b['amplitude'] >= (.75 * max_amplitude):
                fade = info.index(b)
                break

        # truncate the beats to [start:fade + 1]
        beats = info[self.__start_beat:fade + 1]

        loop_bounds_begin = self.__start_beat

        self.__report_progress(.8, "computing final beat array...")

        # assign final beat ids
        for beat in beats:
            beat['id'] = beats.index(beat)
            beat['quartile'] = beat['id'] // (len(beats) / 4.0)

        # compute a coherent 'next' beat to play. This is always just the next ordinal beat
        # unless we're at the end of the song. Then it gets a little trickier.

        for beat in beats:
            if beat == beats[-1]:

                # if we're at the last beat, then we want to find a reasonable 'next' beat to play. It should (a) share the
                # same cluster, (b) be in a logical place in its measure, (c) be after the computed loop_bounds_begin, and
                # is in the first half of the song. If we can't find such an animal, then just return the beat
                # at loop_bounds_begin

                beat['next'] = next(
                    (b['id'] for b in beats
                     if b['cluster'] == beat['cluster'] and b['id'] %
                     4 == (beat['id'] + 1) % 4 and b['id'] <=
                     (.5 * len(beats)) and b['id'] >= loop_bounds_begin),
                    loop_bounds_begin)
            else:
                beat['next'] = beat['id'] + 1

            # find all the beats that (a) are in the same cluster as the NEXT oridnal beat, (b) are of the same
            # cluster position as the next ordinal beat, (c) are in the same place in the measure as the NEXT beat,
            # (d) but AREN'T the next beat, and (e) AREN'T in the same cluster as the current beat.
            #
            # THAT collection of beats contains our jump candidates

            jump_candidates = [
                bx['id'] for bx in beats[loop_bounds_begin:]
                if (bx['cluster'] == beats[beat['next']]['cluster']) and (
                    bx['is'] == beats[beat['next']]['is']) and (
                        bx['id'] % 4 == beats[beat['next']]['id'] %
                        4) and (bx['segment'] != beat['segment']) and (
                            bx['id'] != beat['next'])
            ]

            if jump_candidates:
                beat['jump_candidates'] = jump_candidates
            else:
                beat['jump_candidates'] = []

        # save off the segment count

        self.segments = max([b['segment'] for b in beats]) + 1

        # we don't want to ever play past the point where it's impossible to loop,
        # so let's find the latest point in the song where there are still jump
        # candidates and make sure that we can't play past it.

        last_chance = len(beats) - 1

        for b in reversed(beats):
            if len(b['jump_candidates']) > 0:
                last_chance = beats.index(b)
                break

        # if we play our way to the last beat that has jump candidates, then just skip
        # to the earliest jump candidate rather than enter a section from which no
        # jumping is possible.

        beats[last_chance]['next'] = min(beats[last_chance]['jump_candidates'])

        # store the beats that start after the last jumpable point. That's
        # the outro to the song. We can use these
        # beasts to create a sane ending for a fixed-length remix

        outro_start = last_chance + 1 + self.__start_beat

        if outro_start >= len(info):
            self.outro = []
        else:
            self.outro = info[outro_start:]

        #
        # This section of the code computes the play_vector -- a 1024*1024 beat length
        # remix of the current song.
        #

        random.seed()

        # how long should our longest contiguous playback blocks be? One way to
        # consider it is that higher bpm songs need longer blocks because
        # each beat takes less time. A simple way to estimate a good value
        # is to scale it by it's distance from 120bpm -- the canonical bpm
        # for popular music. Find that value and round down to the nearest
        # multiple of 4. (There almost always are 4 beats per measure in Western music).

        max_sequence_len = int(round((self.tempo / 120.0) * 48.0))
        max_sequence_len = max_sequence_len - (max_sequence_len % 4)

        min_sequence = max(random.randrange(16, max_sequence_len, 4),
                           loop_bounds_begin)

        current_sequence = 0
        beat = beats[0]

        self.__report_progress(.9, "creating play vector")

        play_vector = []

        play_vector.append({
            'beat': 0,
            'seq_len': min_sequence,
            'seq_pos': current_sequence
        })

        # we want to keep a list of recently played segments so we don't accidentally wind up in a local loop
        #
        # the number of segments in a song will vary so we want to set the number of recents to keep
        # at 25% of the total number of segments. Eg: if there are 34 segments, then the depth will
        # be set at round(8.5) == 9.
        #
        # On the off chance that the (# of segments) *.25 < 1 we set a floor queue depth of 1

        recent_depth = int(round(self.segments * .25))
        recent_depth = max(recent_depth, 1)

        recent = collections.deque(maxlen=recent_depth)

        # keep track of the time since the last successful jump. If we go more than
        # 10% of the song length since our last jump, then we will prioritize an
        # immediate jump to a not recently played segment. Otherwise playback will
        # be boring for the listener. This also has the advantage of busting out of
        # local loops.

        max_beats_between_jumps = int(round(len(beats) * .1))
        beats_since_jump = 0
        failed_jumps = 0

        for i in range(0, 1024 * 1024):

            if beat['segment'] not in recent:
                recent.append(beat['segment'])

            current_sequence += 1

            # it's time to attempt a jump if we've played all the beats we wanted in the
            # current sequence. Also, if we've gone more than 10% of the length of the song
            # without jumping we need to immediately prioritze jumping to a non-recent segment.

            will_jump = (current_sequence == min_sequence) or (
                beats_since_jump >= max_beats_between_jumps)

            # since it's time to jump, let's find the most musically pleasing place
            # to go

            if (will_jump):

                # find the jump candidates that haven't been recently played
                non_recent_candidates = [
                    c for c in beat['jump_candidates']
                    if beats[c]['segment'] not in recent
                ]

                # if there aren't any good jump candidates, then we need to fall back
                # to another selection scheme.

                if len(non_recent_candidates) == 0:

                    beats_since_jump += 1
                    failed_jumps += 1

                    # suppose we've been trying to jump but couldn't find a good non-recent candidate. If
                    # the length of time we've been trying (and failing) is >= 10% of the song length
                    # then it's time to relax our criteria. Let's find the jump candidate that's furthest
                    # from the current beat (irrespective if it's been played recently) and go there. Ideally
                    # we'd like to jump to a beat that is not in the same quartile of the song as the currently
                    # playing section. That way we maximize our chances of avoiding a long local loop -- such as
                    # might be found in the section preceeding the outro of a song.

                    non_quartile_candidates = [
                        c for c in beat['jump_candidates']
                        if beats[c]['quartile'] != beat['quartile']
                    ]

                    if (failed_jumps >=
                        (.1 * len(beats))) and (len(non_quartile_candidates) >
                                                0):

                        furthest_distance = max([
                            abs(beat['id'] - c)
                            for c in non_quartile_candidates
                        ])

                        jump_to = next(c for c in non_quartile_candidates
                                       if abs(beat['id'] -
                                              c) == furthest_distance)

                        beat = beats[jump_to]
                        beats_since_jump = 0
                        failed_jumps = 0

                    # uh oh! That fallback hasn't worked for yet ANOTHER 10%
                    # of the song length. Something is seriously broken. Time
                    # to punt and just start again from the first beat.

                    elif failed_jumps >= (.2 * len(beats)):
                        beats_since_jump = 0
                        failed_jumps = 0
                        beat = beats[loop_bounds_begin]

                    # asuuming we're not in one of the failure modes but haven't found a good
                    # candidate that hasn't been recently played, just play the next beat in the
                    # sequence

                    else:
                        beat = beats[beat['next']]

                else:

                    # if it's time to jump and we have at least one good non-recent
                    # candidate, let's just pick randomly from the list and go there

                    beats_since_jump = 0
                    failed_jumps = 0
                    beat = beats[random.choice(non_recent_candidates)]

                # reset our sequence position counter and pick a new target length
                # between 16 and max_sequence_len, making sure it's evenly divisible by
                # 4 beats

                current_sequence = 0
                min_sequence = random.randrange(16, max_sequence_len, 4)

                # if we're in the place where we want to jump but can't because
                # we haven't found any good candidates, then set current_sequence equal to
                # min_sequence. During playback this will show up as having 00 beats remaining
                # until we next jump. That's the signal that we'll jump as soon as we possibly can.
                #
                # Code that reads play_vector and sees this value can choose to visualize this in some
                # interesting way.

                if beats_since_jump >= max_beats_between_jumps:
                    current_sequence = min_sequence

                # add an entry to the play_vector
                play_vector.append({
                    'beat': beat['id'],
                    'seq_len': min_sequence,
                    'seq_pos': current_sequence
                })
            else:

                # if we're not trying to jump then just add the next item to the play_vector
                play_vector.append({
                    'beat': beat['next'],
                    'seq_len': min_sequence,
                    'seq_pos': current_sequence
                })
                beat = beats[beat['next']]
                beats_since_jump += 1

        # save off the beats array and play_vector. Signal
        # the play_ready event (if it's been set)

        self.beats = beats
        self.play_vector = play_vector

        self.__report_progress(1.0, "finished processing")

        if self.play_ready:
            self.play_ready.set()
示例#42
0
for i in range(1, 5):
    COOKED_DIR = 'F:/项目/花城音乐项目/样式数据/ALL/旋律/' + path_index[i - 1] + '/'
    #savepath = 'F:\\mfcc_pic\\'+ str(i) +'\\'
    for root, dirs, files in os.walk(COOKED_DIR):
        print("Root = ", root, "dirs = ", dirs, "files = ", files)

        index = 0
        for filename in files:
            print(filename)
            if filename.find('wav') <= 0:
                continue
            else:
                index = index + 1
            path_one = COOKED_DIR + filename
            y, sr = load_and_trim(path_one)
            CQT = librosa.amplitude_to_db(librosa.cqt(y, sr=16000), ref=np.max)
            librosa.display.specshow(CQT)
            #plt.ylabel('Frequency')
            #plt.xlabel('Time(s)')
            #plt.show()
            fig = matplotlib.pyplot.gcf()
            fig.set_size_inches(4, 4)
            if "." in filename:
                Filename = filename.split(".")[0]
            plt.axis('off')
            plt.axes().get_xaxis().set_visible(False)
            plt.axes().get_yaxis().set_visible(False)
            #plt.rcParams['savefig.dpi'] = 300  # 图片像素
            #plt.figure(figsize=(10, 10))
            #plt.rcParams['figure.dpi'] = 300  # 分辨率
            if filename.find('标准') > 0:
示例#43
0
			num = int(song_t.split('.')[0])

		if year in [1960, 1965, 1970, 1975, 1980, 1985, 2000, 2005]:
			num = int(song[5:8])

		'''
		if num in arr_2015:
			print num,song,'already processed'
			continue
		'''

		y, sr = librosa.load(year_dir+song)


		C = librosa.amplitude_to_db(librosa.cqt(y=y, sr=sr,
		                                        bins_per_octave=BINS_PER_OCTAVE,
		                                        n_bins=N_OCTAVES * BINS_PER_OCTAVE),
		                            ref=np.max)


		tempo, beats = librosa.beat.beat_track(y=y, sr=sr, trim=False)
		Csync = librosa.util.sync(C, beats, aggregate=np.median)

		beat_times = librosa.frames_to_time(librosa.util.fix_frames(beats,
		                                                            x_min=0,
		                                                            x_max=C.shape[1]),
		                                    sr=sr)

		R = librosa.segment.recurrence_matrix(Csync, width=1, mode='affinity',
		                                      sym=True)
示例#44
0
def get_spec(y, sr):

    C = np.abs(librosa.cqt(y, sr=sr))
    return librosa.stft(y), C, sr
示例#45
0
def C(y, sr):
    return np.abs(librosa.cqt(y, sr=sr))
示例#46
0
def feature_examples(filepath):
    # example of various librosa features
    # please check [https://librosa.github.io/librosa/feature.html]
    threshold = 630000
    try:
        x, sr = librosa.load(filepath, sr=None, mono=True, duration=29.0)
        x = x.tolist()
        if len(x) < threshold:
            raise ValueError('song length is shorter than threshold')
        else:
            x = x[:threshold]
        x = np.array(x)

        # zero_crossing_rate
        # returns (1,t)
        f = librosa.feature.zero_crossing_rate(x,
                                               frame_length=2048,
                                               hop_length=512)

        cqt = np.abs(
            librosa.cqt(x,
                        sr=sr,
                        hop_length=512,
                        bins_per_octave=12,
                        n_bins=7 * 12,
                        tuning=None))
        assert cqt.shape[0] == 7 * 12
        assert np.ceil(
            len(x) / 512) <= cqt.shape[1] <= np.ceil(len(x) / 512) + 1

        # chroma_cqt
        # returns (n_chroma, t)
        f = librosa.feature.chroma_cqt(C=cqt, n_chroma=12, n_octaves=7)

        # chroma_cqt
        # returns (n_chroma, t)
        f = librosa.feature.chroma_cens(C=cqt, n_chroma=12, n_octaves=7)
        del cqt

        stft = np.abs(librosa.stft(x, n_fft=2048, hop_length=512))
        assert stft.shape[0] == 1 + 2048 // 2
        assert np.ceil(
            len(x) / 512) <= stft.shape[1] <= np.ceil(len(x) / 512) + 1
        del x

        # chroma_stft
        # returns (n_chroma, t)
        f = librosa.feature.chroma_stft(S=stft**2, n_chroma=12)

        # rmse
        # returns (1,t)
        f = librosa.feature.rmse(S=stft)

        # spectral_centroid
        # returns (1,t)
        f = librosa.feature.spectral_centroid(S=stft)

        # spectral_bandwidth
        # returns (1,t)
        f = librosa.feature.spectral_bandwidth(S=stft)

        # spectral_contrast
        # returns (n_bands+1, t)
        f = librosa.feature.spectral_contrast(S=stft, n_bands=6)

        # spectral_rolloff
        # returns (1,t)
        f = librosa.feature.spectral_rolloff(S=stft)

        # mfcc
        # returns (n_mfcc, t)
        mel = librosa.feature.melspectrogram(sr=sr, S=stft**2)
        del stft

        f = librosa.feature.mfcc(S=librosa.power_to_db(mel), n_mfcc=20)

    except Exception as e:
        print('{}: {}'.format(filepath, repr(e)))
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
plt.subplot(4, 2, 1)
librosa.display.specshow(D, y_axis='linear')
plt.colorbar(format='%+2.0f dB')
plt.title('Linear-frequency power spectrogram')

# Or on a logarithmic scale

plt.subplot(4, 2, 2)
librosa.display.specshow(D, y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('Log-frequency power spectrogram')

# Or use a CQT scale

CQT = librosa.amplitude_to_db(np.abs(librosa.cqt(y, sr=sr)), ref=np.max)
plt.subplot(4, 2, 3)
librosa.display.specshow(CQT, y_axis='cqt_note')
plt.colorbar(format='%+2.0f dB')
plt.title('Constant-Q power spectrogram (note)')

plt.subplot(4, 2, 4)
librosa.display.specshow(CQT, y_axis='cqt_hz')
plt.colorbar(format='%+2.0f dB')
plt.title('Constant-Q power spectrogram (Hz)')

# Draw a chromagram with pitch classes

C = librosa.feature.chroma_cqt(y=y, sr=sr)
plt.subplot(4, 2, 5)
librosa.display.specshow(C, y_axis='chroma')
spt = []
ins = []
n = 0
for instrument, note in itertools.product(range(128), range(50)):
    y, sr = librosa.load('./Audio_Classification/output.wav',
                         sr=None,
                         offset=n,
                         duration=2.0)  # from n to n+2
    n += 2

    # adding white noise for data argumentation (0,1e-4,1e-3).
    for r in (0, 1e-4, 1e-3):
        ret = librosa.cqt(y +
                          ((np.random.rand(*y.shape) - 0.5) * r if r else 0),
                          sr,
                          hop_length=1024,
                          n_bins=24 * 7,
                          bins_per_octave=24)

        ret = np.abs(ret)
        spt.append(ret)  # save spectrogram as a numpy list
        ins.append((instrument,
                    38 + note))  # save instrument's number and note (labeling)

for note in range(46):
    y, sr = librosa.load('./Audio_Classification/output.wav',
                         sr=None,
                         offset=n,
                         duration=2.0)
    n += 2
def calc_cqt(x, fs=fs, hop_length=hop_length, n_bins=n_bins, mag_exp=mag_exp):
    C = librosa.cqt(x, sr=fs, hop_length=hop_length, fmin=None, n_bins=n_bins)
    C_mag = librosa.magphase(C)[0]**mag_exp
    CdB = librosa.core.amplitude_to_db(C_mag, ref=np.max)
    return CdB
示例#50
0
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np

x, sr = librosa.load('test.wav')

C = librosa.cqt(
    x, sr=sr)  #, fmin=30, n_bins=16, bins_per_octave=2, hop_length=2**8)
# print (C[:,5])
# C[0:3,:] = 0
# C[4:8,:] = 0

librosa.display.specshow(librosa.amplitude_to_db(np.abs(C), ref=np.max),
                         sr=sr,
                         x_axis='time',
                         y_axis='cqt_note')
plt.colorbar(format='%+2.0f dB')
plt.title('Constant-Q power spectrum')
plt.tight_layout()

y = librosa.icqt(C, sr)  #,  fmin=30, bins_per_octave=2, hop_length=2**8)
librosa.output.write_wav('testOut.wav', y, sr, norm=True)

plt.show()
示例#51
0
def test_cqt_fail_short_early():

    # sampling rate is sufficiently above the top octave to trigger early downsampling
    y = np.zeros(16)
    librosa.cqt(y, sr=44100, n_bins=36)
示例#52
0
def get_note_with_cqt_rms(filename):
    y, sr = librosa.load(filename)
    rms = librosa.feature.rmse(y=y)[0]
    rms = [x / np.std(rms) for x in rms]
    time = librosa.get_duration(filename=filename)
    print("time is {}".format(time))
    CQT = librosa.amplitude_to_db(librosa.cqt(y, sr=16000), ref=np.max)
    w, h = CQT.shape
    print("w.h is {},{}".format(w, h))
    onsets_frames = get_real_onsets_frames_rhythm(y)
    CQT = np.where(CQT > -20, np.max(CQT), np.min(CQT))
    result = []
    last = 0
    is_ok = 0
    #print("max is {}".format(np.max(CQT)))
    for i in range(15, h - 10):
        is_ok = 0
        last_j = 100
        for j in range(w - 1, 15, -1):
            if CQT[j, i] == np.max(CQT) and CQT[j, i - 1] == np.min(CQT):
                if np.min(CQT[j, i:i + 5]) == np.max(CQT) and np.max(
                        CQT[j, i - 4:i - 1]) == np.min(CQT) and i - last > 5:
                    if np.min(CQT[j, i:i + 10]) == np.max(CQT) and np.mean(
                            CQT[j, i - 5:i - 1]) == np.min(CQT):
                        #print("3... is {},{},{}".format(CQT[j, i - 4:i - 3],CQT[j, i - 3:i-2],i))
                        is_ok += 2
                        break
                    if last_j - j > 10:
                        is_ok += 1
                        last_j = j
                elif np.min(CQT[j, i:i + 5]) == np.max(CQT) and is_ok == 1:
                    is_ok += 1
                # elif np.min(CQT[j, i+1:i + 2]) == np.max(CQT):
                #     result.append(i)
        if rms[i + 1] > rms[i] and is_ok > 1:
            if len(result) == 0:
                result.append(i)
                last = i
            elif i - result[-1] > 10:
                result.append(i)
                last = i
        elif rms[i + 1] - rms[i - 1] > 0.75 and i > 50 and i < len(rms) - 45:
            if len(result) == 0:
                result.append(i)
                last = i
            elif i - result[-1] > 8:
                result.append(i)
                last = i

    rms_on_frames = [rms[x] for x in result]
    mean_rms_on_frames = np.mean(rms_on_frames)
    onstm = librosa.frames_to_time(result, sr=sr)

    #print("result is {}".format(result))
    longest_note = []
    for i in range(len(result)):
        x = result[i]
        if i < len(result) - 1:
            next_frame = result[i + 1]
        else:
            next_frame = result[-1] + 20 if result[-1] + 20 < CQT.shape[
                1] else CQT.shape[1]
        note_line = get_note_line_by_block_for_frames(x, CQT)
        #print("x,note_line is {},{}".format(x,note_line))
        longest_note_line = find_the_longest_note_line(x, next_frame, CQT)
        longest_note.append(longest_note_line)
        #print("x,longest_note_line is {},{}".format(x, longest_note_line))
    #print("longest_note is {}".format(longest_note))
    # CQT[:,onsets_frames[1]:h] = -100
    plt.subplot(3, 1, 1)
    total_frames_number = get_total_frames_number(filename)
    #print("total_frames_number is {}".format(total_frames_number))
    # librosa.display.specshow(CQT)
    CQT, base_notes = add_base_note_to_cqt_for_filename_by_base_notes(
        filename, result, result[0], CQT, longest_note)
    base_notes = [
        x + int(np.mean(longest_note) - np.mean(base_notes))
        for x in base_notes
    ]
    #print("base_notes is {}".format(base_notes))
    librosa.display.specshow(CQT, y_axis='cqt_note', x_axis='time')
    print(np.max(y))
    # onstm = librosa.frames_to_time(onsets_frames, sr=sr)
    plt.vlines(onstm, 0, sr, color='y', linestyle='solid')

    plt.subplot(3, 1, 2)

    plt.text(onstm[0], 1, result[0])
    max_rms = np.max(rms)
    # rms = np.diff(rms)
    times = librosa.frames_to_time(np.arange(len(rms)))
    # rms_on_onset_frames_cqt = [rms[x] for x in onset_frames_cqt]
    # min_rms_on_onset_frames_cqt = np.min(rms_on_onset_frames_cqt)
    # rms = [1 if x >=min_rms_on_onset_frames_cqt else 0 for x in rms]
    plt.plot(times, rms)
    plt.axhline(mean_rms_on_frames, color='r')
    # plt.axhline(min_rms_on_onset_frames_cqt)

    # plt.vlines(onsets_frames_rms_best_time, 0,np.max(rms), color='y', linestyle='solid')
    plt.vlines(onstm, 0, np.max(rms), color='y', linestyle='solid')
    # plt.vlines(base_onsets, 0, np.max(rms), color='r', linestyle='solid')
    plt.xlim(0, np.max(times))

    plt.subplot(3, 1, 3)
    librosa.display.waveplot(y, sr=sr)

    return plt
示例#53
0
def test_griffinlim_cqt(
    y_chirp,
    hop_length,
    window,
    use_length,
    over_sample,
    fmin,
    res_type,
    pad_mode,
    scale,
    momentum,
    init,
    random_state,
    dtype,
):

    if use_length:
        length = len(y_chirp)
    else:
        length = None

    sr = 22050
    bins_per_octave = 12 * over_sample
    n_bins = 6 * bins_per_octave
    C = librosa.cqt(
        y_chirp,
        sr=sr,
        hop_length=hop_length,
        window=window,
        fmin=fmin,
        bins_per_octave=bins_per_octave,
        n_bins=n_bins,
        scale=scale,
        pad_mode=pad_mode,
        res_type=res_type,
    )

    Cmag = np.abs(C)

    y_rec = librosa.griffinlim_cqt(
        Cmag,
        hop_length=hop_length,
        window=window,
        sr=sr,
        fmin=fmin,
        bins_per_octave=bins_per_octave,
        scale=scale,
        pad_mode=pad_mode,
        n_iter=3,
        momentum=momentum,
        random_state=random_state,
        length=length,
        res_type=res_type,
        init=init,
        dtype=dtype,
    )

    y_inv = librosa.icqt(
        Cmag,
        sr=sr,
        fmin=fmin,
        hop_length=hop_length,
        window=window,
        bins_per_octave=bins_per_octave,
        scale=scale,
        length=length,
        res_type=res_type,
    )

    # First check for length
    if use_length:
        assert len(y_rec) == length

    assert y_rec.dtype == dtype

    # Check that the data is okay
    assert np.all(np.isfinite(y_rec))
示例#54
0
def compute_features(tid):

    features = pd.Series(index=columns(), dtype=np.float32, name=tid)

    # Catch warnings as exceptions (audioread leaks file descriptors).
    warnings.filterwarnings('error', module='librosa')

    def feature_stats(name, values):
        features[name, 'mean'] = np.mean(values, axis=1)
        features[name, 'std'] = np.std(values, axis=1)
        features[name, 'skew'] = stats.skew(values, axis=1)
        features[name, 'kurtosis'] = stats.kurtosis(values, axis=1)
        features[name, 'median'] = np.median(values, axis=1)
        features[name, 'min'] = np.min(values, axis=1)
        features[name, 'max'] = np.max(values, axis=1)

    try:
        filepath = utils.get_audio_path(os.environ.get('AUDIO_DIR'), tid)
        x, sr = librosa.load(filepath, sr=None, mono=True)  # kaiser_fast

        f = librosa.feature.zero_crossing_rate(x, frame_length=2048, hop_length=512)
        feature_stats('zcr', f)

        cqt = np.abs(librosa.cqt(x, sr=sr, hop_length=512, bins_per_octave=12,
                                 n_bins=7*12, tuning=None))
        assert cqt.shape[0] == 7 * 12
        assert np.ceil(len(x)/512) <= cqt.shape[1] <= np.ceil(len(x)/512)+1

        f = librosa.feature.chroma_cqt(C=cqt, n_chroma=12, n_octaves=7)
        feature_stats('chroma_cqt', f)
        f = librosa.feature.chroma_cens(C=cqt, n_chroma=12, n_octaves=7)
        feature_stats('chroma_cens', f)
        f = librosa.feature.tonnetz(chroma=f)
        feature_stats('tonnetz', f)

        del cqt
        stft = np.abs(librosa.stft(x, n_fft=2048, hop_length=512))
        assert stft.shape[0] == 1 + 2048 // 2
        assert np.ceil(len(x)/512) <= stft.shape[1] <= np.ceil(len(x)/512)+1
        del x

        f = librosa.feature.chroma_stft(S=stft**2, n_chroma=12)
        feature_stats('chroma_stft', f)

        f = librosa.feature.rmse(S=stft)
        feature_stats('rmse', f)

        f = librosa.feature.spectral_centroid(S=stft)
        feature_stats('spectral_centroid', f)
        f = librosa.feature.spectral_bandwidth(S=stft)
        feature_stats('spectral_bandwidth', f)
        f = librosa.feature.spectral_contrast(S=stft, n_bands=6)
        feature_stats('spectral_contrast', f)
        f = librosa.feature.spectral_rolloff(S=stft)
        feature_stats('spectral_rolloff', f)

        mel = librosa.feature.melspectrogram(sr=sr, S=stft**2)
        del stft
        f = librosa.feature.mfcc(S=librosa.power_to_db(mel), n_mfcc=20)
        feature_stats('mfcc', f)

    except Exception as e:
        print('{}: {}'.format(tid, repr(e)))

    return features
示例#55
0
y = librosa.resample(y_orig, sr_orig, sr)
print("Resampled y and sr: ", len(y), sr)

IPython.display.Audio(data=y, rate=sr)

print(librosa.samples_to_time(len(y), sr), "sec")

# Spectral Representations
D = librosa.stft(y)
print(D.shape, D.dtype)

S, phase = librosa.magphase(D)
print(S.dtype, phase.dtype, np.allclose(D, S * phase))

# Constant-Q Transform
C = librosa.cqt(y, sr=sr)
print(C.shape, C.dtype)

#
# librosa.feature
#

melspec = librosa.feature.melspectrogram(y=y, sr=sr)
melspec_stft = librosa.feature.melspectrogram(S=S**2, sr=sr)
print(np.allclose(melspec, melspec_stft))

chroma = librosa.feature.chroma_stft(y=y, sr=sr)

#
# librosa.display
#
示例#56
0
                     offset=5, duration=25)
#y, sr = librosa.load('/home/bmcfee/working/Battles - Tonto-it1CCNCHPc0.mp3',
#                     offset=300, duration=30)
#y, sr = librosa.load('/home/bmcfee/working/Conlon Nancarrow, Study for Player Piano No. 21 (Canon X)-f2gVhBxwRqg.m4a',
#                     duration=30, offset=60)


# In[787]:

over_sample = 3
res_factor = 1
C = librosa.cqt(y,
                sr=sr,
                hop_length=librosa['hop_length'],
                bins_per_octave=int(12*over_sample),
                n_bins=int(8 * 12 * over_sample),
                real=False,
                filter_scale=res_factor, 
                fmin=librosa.note_to_hz('C1'),
                scale=True)


# # TODO
# 
# - Implement `scale` for icqt

# In[789]:

y2 = icqt(C, sr=sr,
          hop_length=librosa['hop_length'],
          bins_per_octave=int(12 * over_sample),
示例#57
0
import matplotlib.pyplot as plt
import IPython.display as ipd
import numpy as np

import librosa.display

sr = 22050  # sample rate
T = 2.0  # seconds
t = np.linspace(0, T, int(T * sr), endpoint=False)  # time variable
x = 0.5 * np.sin(2 * np.pi * 440 * t)  # pure sine wave at 440 Hz

x, sr = librosa.load("audio/piano.wav")

fmin = librosa.midi_to_hz(36)
hop_length = 512
C = librosa.cqt(x, sr=sr, fmin=fmin, n_bins=72, hop_length=hop_length)

logC = librosa.amplitude_to_db(np.abs(C))
plt.figure(figsize=(15, 5))
librosa.display.specshow(logC,
                         sr=sr,
                         x_axis='time',
                         y_axis='cqt_note',
                         fmin=fmin,
                         cmap='coolwarm')

chromagram = librosa.feature.chroma_stft(x, sr=sr, hop_length=hop_length)
plt.figure(figsize=(15, 5))
librosa.display.specshow(chromagram,
                         x_axis='time',
                         y_axis='chroma',
示例#58
0
    audio = audio[indices[0]:indices[-1]] if indices.size else audio[0:0]

    return audio, sr


#y, sr = librosa.load(filename)
y, sr = load_and_trim(filename)

chroma_orig = librosa.feature.chroma_cqt(y=y, sr=sr)

# For display purposes, let's zoom in on a 15-second chunk from the middle of the song
#idx = tuple([slice(None), slice(*list(librosa.time_to_frames([45, 60])))])

# And for comparison, we'll show the CQT matrix as well.
#C = np.abs(librosa.cqt(y=y, sr=sr, bins_per_octave=12*3, n_bins=7*12*3))
C = np.abs(librosa.cqt(y=y, sr=sr))

plt.figure(figsize=(12, 4))
plt.subplot(2, 1, 1)
librosa.display.specshow(librosa.amplitude_to_db(C, ref=np.max),
                         y_axis='cqt_note')
plt.colorbar()
plt.subplot(2, 1, 2)
librosa.display.specshow(chroma_orig, y_axis='chroma')
plt.colorbar()
plt.ylabel('Original')
plt.tight_layout()

chroma_os = librosa.feature.chroma_cqt(y=y, sr=sr)

plt.figure(figsize=(12, 4))
life_on_mars_fname = "/Users/bgeelen/Music/iTunes/iTunes Media/Music/Compilations/Life on Mars/02 Life on Mars_.mp3"
# life_on_mars_fname = '/Users/bgeelen/Downloads/Tones and I - Dance Monkey (Lyrics).mp3'
life_on_mars_wav, sr = librosa.load(life_on_mars_fname)
my_funny_valentine_fname = "/Users/bgeelen/Music/iTunes/iTunes Media/Music/Chet Baker/Chet Baker Sings/10 My Funny Valentine.mp3"

#%%
chromae = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
n_octaves = 5
n_bins = 12 * n_octaves + 1
hop_length = 128
sparsity = 0.9
fmin = librosa.note_to_hz("C2")
C = librosa.cqt(
    life_on_mars_wav,
    sr=sr,
    hop_length=hop_length,
    fmin=fmin,
    sparsity=sparsity,
    n_bins=n_bins,
)
abs_C = np.abs(C)
log_C = np.log(abs_C + 0.01)

#%%
h, w = C.shape

plt.figure(figsize=(7.5, 3), dpi=300)
plt.imshow(log_C[:, :10 * sr // hop_length],
           interpolation="nearest",
           aspect="auto")
plt.gca().invert_yaxis()
示例#60
0
            #
            # plz = cutter * sr
            # whatthe = y.shape
            #
            # if y.size > cutter * sr:
            #     y = y[0:sr]

            # Normalize
            librosa.util.normalize(y, norm=1)

            # Let's make a spectrogram (freq, power)
            Spec = librosa.amplitude_to_db(abs(librosa.stft(y, n_fft=2048)),
                                           ref=np.max)

            # Let's make a CQT
            C = librosa.amplitude_to_db(abs(librosa.cqt(y, sr=sr)))

            # Let's make and display a mel-scaled power (energy-squared) spectrogram
            S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)

            # Convert to log scale (dB). We'll use the peak power (max) as reference.
            log_S = librosa.power_to_db(S, ref=np.max)

            # Next, we'll extract the top 13 Mel-frequency cepstral coefficients (MFCCs)
            mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=13)

            # Let's pad on the first and second deltas while we're at it
            delta_mfcc = librosa.feature.delta(mfcc)
            delta2_mfcc = librosa.feature.delta(mfcc, order=2)

            if len(Spec[0]) == len(C[0]) == len(S[0]) == len(log_S[0]) == len(