Exemplo n.º 1
0
def get_key(file_in):
    """
    Estimates the key and scale for an audio file.
    """
    loader = streaming.MonoLoader(filename=file_in)
    framecutter = streaming.FrameCutter()
    windowing = streaming.Windowing(type="blackmanharris62")
    spectrum = streaming.Spectrum()
    spectralpeaks = streaming.SpectralPeaks(orderBy="magnitude",
                                            magnitudeThreshold=1e-05,
                                            minFrequency=40,
                                            maxFrequency=5000,
                                            maxPeaks=10000)
    pool = Pool()
    hpcp = streaming.HPCP()
    key = streaming.Key()

    loader.audio >> framecutter.signal
    framecutter.frame >> windowing.frame >> spectrum.frame
    spectrum.spectrum >> spectralpeaks.spectrum
    spectralpeaks.magnitudes >> hpcp.magnitudes
    spectralpeaks.frequencies >> hpcp.frequencies
    hpcp.hpcp >> key.pcp
    key.key >> (pool, 'tonal.key_key')
    key.scale >> (pool, 'tonal.key_scale')
    key.strength >> (pool, 'tonal.key_strength')

    run(loader)

    return Key(pool['tonal.key_key'], pool['tonal.key_scale'])
Exemplo n.º 2
0
    def estimate_chroma(self, uid):
        loader = esstr.MonoLoader(
            filename=self.audio_path_extractor.audio_path_name(uid))
        framecutter = esstr.FrameCutter(hopSize=self.hop_size,
                                        frameSize=self.frame_size)
        windowing = esstr.Windowing(type="blackmanharris62")
        spectrum = esstr.Spectrum()
        spectralpeaks = esstr.SpectralPeaks(orderBy="magnitude",
                                            magnitudeThreshold=1e-05,
                                            minFrequency=40,
                                            maxFrequency=5000,
                                            maxPeaks=10000)
        hpcp = esstr.HPCP(size=12,
                          referenceFrequency=self.tuning_freq,
                          harmonics=8,
                          bandPreset=True,
                          minFrequency=float(40),
                          maxFrequency=float(5000),
                          bandSplitFrequency=500.0,
                          weightType="cosine",
                          nonLinear=True,
                          windowSize=1.0)
        """
        hpcp = esstr.HPCP(
            size=12,
            referenceFrequency = tuningFreq,
            harmonics = 8,
            bandPreset = True,
            minFrequency = 40.0,
            maxFrequency = 5000.0,
            bandSplitFrequency = 250.0,
            weightType = "cosine",
            nonLinear = False,
            windowSize = 1.0)
        """
        pool = essentia.Pool()
        # connect algorithms together
        loader.audio >> framecutter.signal
        framecutter.frame >> windowing.frame >> spectrum.frame
        spectrum.spectrum >> spectralpeaks.spectrum
        spectrum.spectrum >> (pool, 'spectrum.magnitude')
        spectralpeaks.magnitudes >> hpcp.magnitudes
        spectralpeaks.frequencies >> hpcp.frequencies
        hpcp.hpcp >> (pool, 'chroma.hpcp')

        essentia.run(loader)
        # roll from 'A' based to 'C' based
        chroma = pool['chroma.hpcp']
        chroma = np.roll(chroma, shift=-3, axis=1)
        return chroma
Exemplo n.º 3
0
    def tonalAnalysis(self, signal):
        vectorinput = ess.VectorInput(np.single(signal))
        framecutter = ess.FrameCutter(frameSize=4096,
                                      hopSize=2048,
                                      silentFrames='noise')
        windowing = ess.Windowing(type='blackmanharris62')
        spectrum = ess.Spectrum()
        spectralpeaks = ess.SpectralPeaks(orderBy='frequency',
                                          magnitudeThreshold=1e-5,
                                          minFrequency=20,
                                          maxFrequency=3500,
                                          maxPeaks=60)

        dissonance = ess.Dissonance()
        tuning_frequency = ess.TuningFrequency()
        inharmonicity = ess.Inharmonicity()

        # Use pool to store data
        pool = essentia.Pool()

        # Connect streaming algorithms
        vectorinput.data >> framecutter.signal
        framecutter.frame >> windowing.frame >> spectrum.frame
        spectrum.spectrum >> spectralpeaks.spectrum
        spectralpeaks.magnitudes >> dissonance.magnitudes
        spectralpeaks.frequencies >> dissonance.frequencies
        spectralpeaks.magnitudes >> tuning_frequency.magnitudes
        spectralpeaks.frequencies >> tuning_frequency.frequencies
        spectralpeaks.magnitudes >> inharmonicity.magnitudes
        spectralpeaks.frequencies >> inharmonicity.frequencies

        dissonance.dissonance >> (pool, 'tonal.dissonance')
        inharmonicity.inharmonicity >> (pool, 'tonal.inharmonicity')
        tuning_frequency.tuningFrequency >> (pool, 'tonal.tuningFrequency')
        tuning_frequency.tuningCents >> (pool, 'tonal.tuningCents')

        # Run streaming network
        essentia.run(vectorinput)

        return pool['tonal.dissonance'], pool['tonal.inharmonicity'], pool[
            'tonal.tuningFrequency']
Exemplo n.º 4
0
]

for name in names:

    for i in range(10):
        song_name = name + '.' + '0000' + str(i) + '.au'
        print(song_name)
        loader = ess.MonoLoader(filename="genres/" + name + "/" + song_name)
        framecutter = ess.FrameCutter(frameSize=4096,
                                      hopSize=2048,
                                      silentFrames='noise')
        windowing = ess.Windowing(type='blackmanharris62')
        spectrum = ess.Spectrum()
        spectralpeaks = ess.SpectralPeaks(orderBy='magnitude',
                                          magnitudeThreshold=0.00001,
                                          minFrequency=20,
                                          maxFrequency=3500,
                                          maxPeaks=60)

        # Use default HPCP parameters
        hpcp = ess.HPCP()

        # Use pool to store data
        pool = essentia.Pool()

        # Connect streaming algorithms
        loader.audio >> framecutter.signal
        framecutter.frame >> windowing.frame >> spectrum.frame
        spectrum.spectrum >> spectralpeaks.spectrum
        spectralpeaks.magnitudes >> hpcp.magnitudes
        spectralpeaks.frequencies >> hpcp.frequencies
Exemplo n.º 5
0
soundfiles = os.listdir(audio_folder)
if '.DS_Store' in soundfiles:
    soundfiles.remove('.DS_Store')

print "\nANALYSIS..."
for item in soundfiles:
    loader = estr.MonoLoader(filename=audio_folder+'/'+item, 
                             sampleRate=sample_rate)
    framecutter = estr.FrameCutter(frameSize=window_size, 
                                   hopSize=hop_size)
    windowing = estr.Windowing(size=window_size, 
                               type=window_type)
    spectrum = estr.Spectrum(size=window_size)
    spectralpeaks = estr.SpectralPeaks(magnitudeThreshold=magnitude_threshold, 
                                       minFrequency=min_frequency, 
                                       maxFrequency=max_frequency, 
                                       maxPeaks=max_peaks,
                                       sampleRate=sample_rate)      
    hpcp = estr.HPCP(bandPreset=band_preset,
                     harmonics = harmonics,
                     minFrequency=min_frequency, 
                     maxFrequency=max_frequency,
                     nonLinear=non_linear,
                     normalized=normalize,
                     referenceFrequency=reference_frequency,
                     sampleRate=sample_rate,
                     weightType=weight_type,
                     windowSize=weight_window_size)
    pool = e.Pool()
    loader.audio >> framecutter.signal
    framecutter.frame >> windowing.frame >> spectrum.frame
Exemplo n.º 6
0
def compute_features(path, f_mfcc_kl, f_mfcc_euclid, f_notes, f_chroma, f_bh):
    gc.enable()
    # Loading audio file
    #will resample if sampleRate is different!
    try:
        audio = es.MonoLoader(filename=path, sampleRate=fs)()
    except:
        print("Erroneos File detected by essentia standard: skipping!")
        #return bpm, histogram, key, scale, notes, chroma_matrix, mean, cov, var, cov_kl
        return 0, [], 0, 0, [], [], [], [], [], []
    #will resample if sampleRate is different!
    try:
        loader = ess.MonoLoader(filename=path, sampleRate=44100)
    except:
        print("Erroneos File detected by essentia streaming: skipping!")
        #return bpm, histogram, key, scale, notes, chroma_matrix, mean, cov, var, cov_kl
        return 0, [], 0, 0, [], [], [], [], [], []
    #Initialize algorithms we will use
    frameSize = 4096  #512
    hopSize = 2048  #256
    #######################################
    # DO FILTERING ONLY FOR MFCC - not with essentia standard
    # below is just an example
    #HP = es.HighPass(cutoffFrequency=128)
    #LP = es.LowPass(cutoffFrequency=4096)
    #lp_f = LP(audio)
    #hp_f = HP(lp_f)
    #audio = hp_f
    #MonoWriter(filename='music/filtered.wav')(filtered_audio)
    HP = ess.HighPass(cutoffFrequency=128)
    LP = ess.LowPass(cutoffFrequency=4096)
    #loader = ess.MonoLoader(filename=path, sampleRate=44100)
    #writer = ess.MonoWriter(filename='music/filtered.wav')
    #frameCutter = FrameCutter(frameSize = 1024, hopSize = 512)
    #pool = essentia.Pool()
    # Connect streaming algorithms
    #loader.audio >> HP.signal
    #HP.signal >> LP.signal
    #LP.signal >> writer.audio
    # Run streaming network
    #essentia.run(loader)
    bpm = 0
    histogram = 0
    key = 0
    scale = 0
    notes = 0
    chroma_matrix = 0
    mean = 0
    cov = 0
    var = 0
    cov_kl = 0
    #####################################
    # extract mfcc
    #####################################
    if f_mfcc_kl == 1 or f_mfcc_euclid == 1:
        #features, features_frames = es.MusicExtractor(analysisSampleRate=44100, mfccStats=['mean', 'cov'])(path)
        #m, n = features['lowlevel.mfcc.cov'].shape
        #print m
        #iu1 = np.triu_indices(m)
        #cov = features['lowlevel.mfcc.cov'][iu1]
        #mean = features['lowlevel.mfcc.mean']
        #print(features['lowlevel.mfcc.cov'])
        hamming_window = es.Windowing(type='hamming')
        spectrum = es.Spectrum()  # we just want the magnitude spectrum
        mfcc = es.MFCC(numberCoefficients=13)
        frame_sz = 2048  #512
        hop_sz = 1024  #256
        mfccs = np.array([
            mfcc(spectrum(hamming_window(frame)))[1] for frame in
            es.FrameGenerator(audio, frameSize=frame_sz, hopSize=hop_sz)
        ])
        #Let's scale the MFCCs such that each coefficient dimension has zero mean and unit variance:
        #mfccs = sklearn.preprocessing.scale(mfccs)
        #print mfccs.shape
        mean = np.mean(mfccs.T, axis=1)
        #print(mean)
        var = np.var(mfccs.T, axis=1)
        #print(var)
        cov = np.cov(mfccs.T)
        cov_kl = cov  #.flatten()
        #get only upper triangular matrix values to shorten length
        iu1 = np.triu_indices(13)
        cov = cov[iu1]
        #plt.imshow(mfccs.T, origin='lower', aspect='auto', interpolation='nearest')
        #plt.ylabel('MFCC Coefficient Index')
        #plt.xlabel('Frame Index')
        #plt.colorbar()
    #####################################
    # extract beat features and histogram
    #####################################
    if f_bh == 1 or f_chroma == 1 or f_notes == 1:
        # Compute beat positions and BPM
        rhythm_extractor = es.RhythmExtractor2013(method="multifeature")
        bpm, beats, beats_confidence, _, beats_intervals = rhythm_extractor(
            audio)
        if f_bh == 1:
            peak1_bpm, peak1_weight, peak1_spread, peak2_bpm, peak2_weight, peak2_spread, histogram = es.BpmHistogramDescriptors(
            )(beats_intervals)
        tempo = bpm
        times = beats
        beats_frames = (beats * fs) / hopSize
        beats_frames = beats_frames.astype(int)

        #fig, ax = plt.subplots()
        #ax.bar(range(len(histogram)), histogram, width=1)
        #ax.set_xlabel('BPM')
        #ax.set_ylabel('Frequency')
        #plt.title("BPM histogram")
        #ax.set_xticks([20 * x + 0.5 for x in range(int(len(histogram) / 20))])
        #ax.set_xticklabels([str(20 * x) for x in range(int(len(histogram) / 20))])
        #plt.show()

    #####################################
    # extract full beat aligned chroma
    #####################################

    framecutter = ess.FrameCutter(frameSize=frameSize,
                                  hopSize=hopSize,
                                  silentFrames='noise')
    windowing = ess.Windowing(type='blackmanharris62')
    spectrum = ess.Spectrum()
    spectralpeaks = ess.SpectralPeaks(orderBy='magnitude',
                                      magnitudeThreshold=0.00001,
                                      minFrequency=20,
                                      maxFrequency=3500,
                                      maxPeaks=60)
    # Use default HPCP parameters for plots, however we will need higher resolution
    # and custom parameters for better Key estimation
    hpcp = ess.HPCP()
    hpcp_key = ess.HPCP(
        size=36,  # we will need higher resolution for Key estimation
        referenceFrequency=440,  # assume tuning frequency is 44100.
        bandPreset=False,
        minFrequency=20,
        maxFrequency=3500,
        weightType='cosine',
        nonLinear=False,
        windowSize=1.)
    key = ess.Key(
        profileType='edma',  # Use profile for electronic music
        numHarmonics=4,
        pcpSize=36,
        slope=0.6,
        usePolyphony=True,
        useThreeChords=True)
    # Use pool to store data
    pool = essentia.Pool()
    # Connect streaming algorithms
    ###################################
    # USE FILTER - comment next lines in
    loader.audio >> HP.signal
    HP.signal >> LP.signal
    LP.signal >> framecutter.signal
    ###################################
    ###################################
    # NO FILTER - comment next line in
    #loader.audio >> framecutter.signal
    ###################################
    framecutter.frame >> windowing.frame >> spectrum.frame
    spectrum.spectrum >> spectralpeaks.spectrum
    spectralpeaks.magnitudes >> hpcp.magnitudes
    spectralpeaks.frequencies >> hpcp.frequencies
    spectralpeaks.magnitudes >> hpcp_key.magnitudes
    spectralpeaks.frequencies >> hpcp_key.frequencies
    hpcp_key.hpcp >> key.pcp
    hpcp.hpcp >> (pool, 'tonal.hpcp')
    key.key >> (pool, 'tonal.key_key')
    key.scale >> (pool, 'tonal.key_scale')
    key.strength >> (pool, 'tonal.key_strength')
    # Run streaming network
    essentia.run(loader)
    #print("Estimated key and scale:", pool['tonal.key_key'] + " " + pool['tonal.key_scale'])
    #print(pool['tonal.hpcp'].T)
    chroma = pool['tonal.hpcp'].T
    key = pool['tonal.key_key']
    scale = pool['tonal.key_scale']
    if f_chroma == 1:
        # Plot HPCP
        #imshow(pool['tonal.hpcp'].T, aspect='auto', origin='lower', interpolation='none')
        #plt.title("HPCPs in frames (the 0-th HPCP coefficient corresponds to A)")
        #show()
        #print beats_frames.shape[0]
        chroma_matrix = np.zeros((beats_frames.shape[0], 12))
        prev_beat = 0
        act_beat = 0
        sum_key = np.zeros(12)
        chroma_align = chroma
        chroma_align = chroma_align.transpose()
        mat_index = 0
        for i in beats_frames:
            act_beat = i
            value = sum(
                chroma_align[prev_beat:act_beat]) / (act_beat - prev_beat)
            chroma_align[prev_beat:act_beat] = value
            prev_beat = i
            if np.linalg.norm(value, ord=1) != 0:
                value = value / np.linalg.norm(value, ord=1)
            chroma_matrix[mat_index] = value
            mat_index = mat_index + 1

        #chroma_align = chroma_align.transpose()
        #plt.figure(figsize=(10, 4))
        #librosa.display.specshow(chroma_align, y_axis='chroma', x_axis='time')
        #plt.vlines(times, 0, 12, alpha=0.5, color='r', linestyle='--', label='Beats')
        #plt.colorbar()
        #plt.title('Chromagram')
        #plt.tight_layout()
        #chroma_align = chroma_align.transpose()
    #print(chroma_align[24:28])
    #####################################
    # extract full chroma text
    #####################################
    if f_notes == 1:
        #print(chroma.shape)
        m, n = chroma.shape
        avg = 0
        chroma = chroma.transpose()
        m, n = chroma.shape
        for j in chroma:
            avg = avg + np.sum(j)
        avg = avg / m
        threshold = avg / 2
        for i in chroma:
            if np.sum(i) > threshold:
                ind = np.where(i == np.max(i))
                max_val = i[ind]  #is always 1!
                i[ind] = 0

                ind2 = np.where(i == np.max(i))
                i[ind] = 1

                #if np.any(i[ind2][0] >= 0.8 * max_val):
                #i[ind2] = i[ind2]
                #pass
                #low_values_flags = i < 1
                low_values_flags = i < 0.8

                i[low_values_flags] = 0
            else:
                i.fill(0)
        chroma = chroma.transpose()
        # Compute beat positions and BPM
        prev_beat = 0
        act_beat = 0
        sum_key = np.zeros(12)
        chroma = chroma.transpose()
        for i in beats_frames:
            act_beat = i
            sum_key = sum(chroma[prev_beat:act_beat])
            #print(sum_key)
            #print(chroma[prev_beat:act_beat])

            ind = np.where(sum_key == np.max(sum_key))
            ind = ind[0]
            #print("debug")
            fill = np.zeros(len(j))
            if (np.all(chroma[prev_beat:act_beat] == 0)):
                fill[ind] = 0
            else:
                fill[ind] = 1
            chroma[prev_beat:act_beat] = fill
            #print(chroma[prev_beat:act_beat])
            prev_beat = i
            #print("BEAT")
        notes = []
        for i in notes:
            del i
        prev_beat = 0
        act_beat = 0
        for i in beats_frames:
            act_beat = i
            sum_key = sum(chroma[prev_beat:act_beat])
            ind = np.where(sum_key == np.max(sum_key))
            prev_beat = i
            notes.append(ind[0][0])
            prev_beat = i
        #chroma = chroma.transpose()
        #plt.figure(figsize=(10, 4))
        #librosa.display.specshow(chroma, y_axis='chroma', x_axis='time')
        #plt.vlines(times, 0, 12, alpha=0.5, color='r', linestyle='--', label='Beats')
        #plt.colorbar()
        #plt.title('Chromagram')
        #plt.tight_layout()
        #chroma = chroma.transpose()
    gc.collect()
    return bpm, histogram, key, scale, notes, chroma_matrix, mean, cov, var, cov_kl