示例#1
0
def HPFilter(audio, cutoff):

    HPF = es.HighPass(cutoffFrequency=cutoff)

    filtered_audio = HPF(audio)

    writer = es.MonoWriter(filename='holst_test.wav')

    writer(filtered_audio)

    return filtered_audio
示例#2
0
def filter_loops():

    loops = os.listdir(CHOPPED_PATH)
    proc_loops = os.listdir(EQ_NEW_PATH)

    lp_filter = es.LowPass(cutoffFrequency=90,sampleRate=sampleRate)
    bp_filter = es.BandPass(bandwidth=100 ,cutoffFrequency=280,sampleRate=sampleRate)
    hp_filter = es.HighPass(cutoffFrequency=9000,sampleRate=sampleRate)
    i=0
    for loop in loops:
        i=i+1
        if i % 50 == 0:
            print(str(i))
        if ".wav" in loop:
            if ("bpf_" + loop) not in proc_loops:
                audio_file=es.MonoLoader(filename=CHOPPED_PATH+loop,sampleRate=sampleRate)
                #lpf_audio = lp_filter(audio_file())
                bpf_audio = bp_filter(audio_file())
                #hpf_audio = hp_filter(audio_file())
                #sf.write(EQ_PATH + "lpf_" + loop, lpf_audio, sampleRate)
                sf.write(EQ_NEW_PATH + "bpf_" + loop, bpf_audio, sampleRate)
示例#3
0
def filter_loops_eval():
    loops_paths = [ "icassp2021_outputs/outputs_stft_coherence/",
                    "icassp2021_outputs/outputs_wavstft_coherence/"]
    
    lp_filter = es.LowPass(cutoffFrequency=90,sampleRate=sampleRate)
    bp_filter = es.BandPass(bandwidth=100 ,cutoffFrequency=280,sampleRate=sampleRate)
    hp_filter = es.HighPass(cutoffFrequency=9000,sampleRate=sampleRate)
    for path in loops_paths:
        loops = os.listdir(path)
        for loop in loops:
            if ".wav" in loop:
                audio_file=es.MonoLoader(filename=path+loop,sampleRate=sampleRate)
                if "lpf" in loop:
                    lpf_audio = lp_filter(audio_file())
                    sf.write(path + "eq/" + loop, lpf_audio, sampleRate)
                if "bpf" in loop:
                    bpf_audio = bp_filter(audio_file())
                    sf.write(path + "eq/" + loop, bpf_audio, sampleRate)
                if "hpf" in loop:
                    hpf_audio = hp_filter(audio_file())
                    sf.write(path + "eq/" + loop, hpf_audio, sampleRate)
示例#4
0
import essentia.standard as ess
import numpy as np
import pickle
import glob
import utilFunctions as UF
import scipy.spatial.distance as DS

import parameters as params
import csv

rms=ess.RMS()
window = ess.Windowing(type = "hamming")
spec = ess.Spectrum(size=params.Nfft)
zz = np.zeros((params.zeropadLen,), dtype = 'float32')
genmfcc = ess.MFCC(highFrequencyBound = 22000.0, inputSize = params.Nfft/2+1, sampleRate = params.Fs)
hps = ess.HighPass(cutoffFrequency = 240.0)
onsets = ess.Onsets()

strokeLabels = ['dha', 'dhen', 'dhi', 'dun', 'ge', 'kat', 'ke', 'na', 'ne', 're', 'tak', 'te', 'tit', 'tun']

taals = {"teen": {"nmatra": 16, "accents": np.array([4, 1, 1, 1, 3, 1, 1, 1, 2, 1, 1, 1, 3, 1, 1, 1])}, 
         "ek": {"nmatra": 12, "accents": np.array([4, 1, 1, 2, 1, 1, 3, 1, 1, 2, 1, 1])},
         "jhap": {"nmatra": 10, "accents": np.array([4, 1, 2, 1, 1, 3, 1, 2, 1, 1])},
         "rupak": {"nmatra": 7, "accents": np.array([2, 1, 1, 3, 1, 3, 1])}
         }

rolls = [{"bol": ['dha/dha_02', 'te/te_05', 're/re_04', 'dha/dha_02'], "dur": np.array([1.0, 1.0, 1, 1]), "amp": np.array([1.0, 1.0, 1.0, 1.0])},
         {"bol": ['te/te_02', 're/re_05', 'ke/ke_04', 'te/te_02'], "dur": np.array([1.0, 1.0, 1, 1]), "amp": np.array([1.0, 1.0, 1.0, 1.0])},
         {"bol": ['ge/ge_02', 'ge/ge_05', 'te/te_04', 'te/te_02'], "dur": np.array([1.0, 1.0, 1, 1]), "amp": np.array([1.0, 1.0, 1.0, 1.0])},
         {"bol": ['ge/ge_02', 'ge/ge_05', 'dhi/dhi_04', 'na/na_02'], "dur": np.array([1.0, 1.0, 1, 1]), "amp": np.array([1.0, 1.0, 1.0, 1.0])},
         {"bol": ['dha/dha_02', 'dha/dha_02', 'te/te_05', 'te/te_06'], "dur": np.array([1.0, 1.0, 1, 1]), "amp": np.array([1.0, 1.0, 1.0, 1.0])},
示例#5
0
def estimate_key(input_audio_file, output_text_file=None, key_profile=None):
    """
    This function estimates the overall key of an audio track
    optionaly with extra modal information.
    :type input_audio_file: str
    :type output_text_file: str
    """

    if key_profile is not None:
        global USE_THREE_PROFILES
        global WITH_MODAL_DETAILS
        global KEY_PROFILE

        KEY_PROFILE = key_profile
        USE_THREE_PROFILES = False
        WITH_MODAL_DETAILS = False

    loader = estd.MonoLoader(filename=input_audio_file, sampleRate=SAMPLE_RATE)
    cut = estd.FrameCutter(frameSize=WINDOW_SIZE, hopSize=HOP_SIZE)
    window = estd.Windowing(size=WINDOW_SIZE, type=WINDOW_SHAPE)
    rfft = estd.Spectrum(size=WINDOW_SIZE)
    sw = estd.SpectralWhitening(maxFrequency=MAX_HZ, sampleRate=SAMPLE_RATE)
    speaks = estd.SpectralPeaks(magnitudeThreshold=SPECTRAL_PEAKS_THRESHOLD,
                                maxFrequency=MAX_HZ,
                                minFrequency=MIN_HZ,
                                maxPeaks=SPECTRAL_PEAKS_MAX,
                                sampleRate=SAMPLE_RATE)
    hpcp = estd.HPCP(
        bandPreset=HPCP_BAND_PRESET,
        #bandSplitFrequency=HPCP_SPLIT_HZ,
        harmonics=HPCP_HARMONICS,
        maxFrequency=MAX_HZ,
        minFrequency=MIN_HZ,
        nonLinear=HPCP_NON_LINEAR,
        normalized=HPCP_NORMALIZE,
        referenceFrequency=HPCP_REFERENCE_HZ,
        sampleRate=SAMPLE_RATE,
        size=HPCP_SIZE,
        weightType=HPCP_WEIGHT_TYPE,
        windowSize=HPCP_WEIGHT_WINDOW_SEMITONES,
        maxShifted=HPCP_SHIFT)
    if HIGHPASS_CUTOFF is not None:
        hpf = estd.HighPass(cutoffFrequency=HIGHPASS_CUTOFF,
                            sampleRate=SAMPLE_RATE)
        audio = hpf(hpf(hpf(loader())))
    else:
        audio = loader()
    duration = len(audio)
    n_slices = 1 + (duration // HOP_SIZE)
    chroma = np.empty([n_slices, HPCP_SIZE], dtype='float64')
    for slice_n in range(n_slices):
        spek = rfft(window(cut(audio)))
        p1, p2 = speaks(spek)
        if SPECTRAL_WHITENING:
            p2 = sw(spek, p1, p2)
        pcp = hpcp(p1, p2)
        if not DETUNING_CORRECTION or DETUNING_CORRECTION_SCOPE == 'average':
            chroma[slice_n] = pcp
        elif DETUNING_CORRECTION and DETUNING_CORRECTION_SCOPE == 'frame':
            pcp = shift_pcp(pcp, HPCP_SIZE)
            chroma[slice_n] = pcp
        else:
            raise NameError("SHIFT_SCOPE must be set to 'frame' or 'average'.")
    chroma = np.sum(chroma, axis=0)
    if PCP_THRESHOLD is not None:
        chroma = normalize_pcp_peak(chroma)
        chroma = pcp_gate(chroma, PCP_THRESHOLD)
    if DETUNING_CORRECTION and DETUNING_CORRECTION_SCOPE == 'average':
        chroma = shift_pcp(chroma, HPCP_SIZE)
    chroma = np.roll(
        chroma, -3)  # Adjust to essentia's HPCP calculation starting on A...
    if USE_THREE_PROFILES:
        estimation_1 = template_matching_3(chroma, KEY_PROFILE)
    else:
        estimation_1 = template_matching_2(chroma, KEY_PROFILE)
    key_1 = estimation_1[0] + '\t' + estimation_1[1]
    correlation_value = estimation_1[2]
    if WITH_MODAL_DETAILS:
        estimation_2 = template_matching_modal(chroma)
        key_2 = estimation_2[0] + '\t' + estimation_2[1]
        key_verbose = key_1 + '\t' + key_2
        key = key_verbose.split('\t')
        # Assign monotonic tracks to minor:
        if key[3] == 'monotonic' and key[0] == key[2]:
            key = '{0}\tminor'.format(key[0])
        else:
            key = key_1
    else:
        key = key_1
    if output_text_file is not None:
        textfile = open(output_text_file, 'w')
        textfile.write(key + '\t' + str(correlation_value) + '\n')
        textfile.close()
    return key, correlation_value
示例#6
0
def key_aes(input_audio_file, output_text_file, **kwargs):
    """
    This function estimates the overall key of an audio track
    optionally with extra modal information.
    :type input_audio_file: str
    :type output_text_file: str

    """
    if not kwargs:
        kwargs = KEY_SETTINGS

    loader = estd.MonoLoader(filename=input_audio_file,
                             sampleRate=kwargs["SAMPLE_RATE"])

    cut = estd.FrameCutter(frameSize=kwargs["WINDOW_SIZE"],
                           hopSize=kwargs["HOP_SIZE"])

    window = estd.Windowing(size=kwargs["WINDOW_SIZE"],
                            type=kwargs["WINDOW_SHAPE"])

    rfft = estd.Spectrum(size=kwargs["WINDOW_SIZE"])

    sw = estd.SpectralWhitening(maxFrequency=kwargs["MAX_HZ"],
                                sampleRate=kwargs["SAMPLE_RATE"])

    speaks = estd.SpectralPeaks(
        magnitudeThreshold=kwargs["SPECTRAL_PEAKS_THRESHOLD"],
        maxFrequency=kwargs["MAX_HZ"],
        minFrequency=kwargs["MIN_HZ"],
        maxPeaks=kwargs["SPECTRAL_PEAKS_MAX"],
        sampleRate=kwargs["SAMPLE_RATE"])

    hpcp = estd.HPCP(bandPreset=kwargs["HPCP_BAND_PRESET"],
                     splitFrequency=kwargs["HPCP_SPLIT_HZ"],
                     harmonics=kwargs["HPCP_HARMONICS"],
                     maxFrequency=kwargs["MAX_HZ"],
                     minFrequency=kwargs["MIN_HZ"],
                     nonLinear=kwargs["HPCP_NON_LINEAR"],
                     normalized=kwargs["HPCP_NORMALIZE"],
                     referenceFrequency=kwargs["HPCP_REFERENCE_HZ"],
                     sampleRate=kwargs["SAMPLE_RATE"],
                     size=kwargs["HPCP_SIZE"],
                     weightType=kwargs["HPCP_WEIGHT_TYPE"],
                     windowSize=kwargs["HPCP_WEIGHT_WINDOW_SEMITONES"],
                     maxShifted=kwargs["HPCP_SHIFT"])

    audio = loader()

    if kwargs["HIGHPASS_CUTOFF"] is not None:
        hpf = estd.HighPass(cutoffFrequency=kwargs["HIGHPASS_CUTOFF"],
                            sampleRate=kwargs["SAMPLE_RATE"])
        audio = hpf(hpf(hpf(audio)))

    if kwargs["DURATION"] is not None:
        audio = audio[(kwargs["START_TIME"] *
                       kwargs["SAMPLE_RATE"]):(kwargs["DURATION"] *
                                               kwargs["SAMPLE_RATE"])]

    duration = len(audio)
    number_of_frames = int(duration / kwargs["HOP_SIZE"])
    chroma = []
    for bang in range(number_of_frames):
        spek = rfft(window(cut(audio)))
        p1, p2 = speaks(spek)
        if kwargs["SPECTRAL_WHITENING"]:
            p2 = sw(spek, p1, p2)

        pcp = hpcp(p1, p2)

        if np.sum(pcp) > 0:
            if not kwargs["DETUNING_CORRECTION"] or kwargs[
                    "DETUNING_CORRECTION_SCOPE"] == 'average':
                chroma.append(pcp)
            elif kwargs["DETUNING_CORRECTION"] and kwargs[
                    "DETUNING_CORRECTION_SCOPE"] == 'frame':
                pcp = _detuning_correction(pcp, kwargs["HPCP_SIZE"])
                chroma.append(pcp)
            else:
                raise NameError(
                    "SHIFT_SCOPE musts be set to 'frame' or 'average'.")

    if not chroma:
        return 'Silence'

    chroma = np.sum(chroma, axis=0)
    chroma = norm_peak(chroma)

    if kwargs["PCP_THRESHOLD"] is not None:
        chroma = vector_threshold(chroma, kwargs["PCP_THRESHOLD"])

    if kwargs["DETUNING_CORRECTION"] and kwargs[
            "DETUNING_CORRECTION_SCOPE"] == 'average':
        chroma = _detuning_correction(chroma, kwargs["HPCP_SIZE"])

    # Adjust to essentia's HPCP calculation starting on A (pc = 9)
    chroma = np.roll(chroma, -3 * (kwargs["HPCP_SIZE"] // 12))

    estimation_1 = estimate_key(chroma,
                                kwargs["KEY_PROFILE"],
                                kwargs["PROFILE_INTERPOLATION"],
                                conf_thres=kwargs["NOKEY_THRESHOLD"],
                                vocabulary=kwargs["KEY_VOCABULARY"])

    key_1 = estimation_1[0]
    correlation_value = estimation_1[1]

    if kwargs["WITH_MODAL_DETAILS"]:
        estimation_2 = _key7(chroma, kwargs["PROFILE_INTERPOLATION"])
        key_2 = estimation_2[0] + '\t' + estimation_2[1]
        key_verbose = key_1 + '\t' + key_2
        key = key_verbose.split('\t')

        # Assign monotonic track to minor:
        if key[3] == 'monotonic' and key[0] == key[2]:
            key = '{0}\tminor'.format(key[0])
        else:
            key = key_1
    else:
        key = key_1

    textfile = open(output_text_file, 'w')
    textfile.write(key)
    textfile.close()

    return key, correlation_value
示例#7
0
def key_ecir(input_audio_file, output_text_file, **kwargs):

    if not kwargs:
        kwargs = KEY_SETTINGS

    loader = estd.MonoLoader(filename=input_audio_file,
                             sampleRate=kwargs["SAMPLE_RATE"])
    cut = estd.FrameCutter(frameSize=kwargs["WINDOW_SIZE"],
                           hopSize=kwargs["HOP_SIZE"])
    window = estd.Windowing(size=kwargs["WINDOW_SIZE"],
                            type=kwargs["WINDOW_SHAPE"])
    rfft = estd.Spectrum(size=kwargs["WINDOW_SIZE"])
    sw = estd.SpectralWhitening(maxFrequency=kwargs["MAX_HZ"],
                                sampleRate=kwargs["SAMPLE_RATE"])
    speaks = estd.SpectralPeaks(
        magnitudeThreshold=kwargs["SPECTRAL_PEAKS_THRESHOLD"],
        maxFrequency=kwargs["MAX_HZ"],
        minFrequency=kwargs["MIN_HZ"],
        maxPeaks=kwargs["SPECTRAL_PEAKS_MAX"],
        sampleRate=kwargs["SAMPLE_RATE"])
    hpcp = estd.HPCP(bandPreset=kwargs["HPCP_BAND_PRESET"],
                     splitFrequency=kwargs["HPCP_SPLIT_HZ"],
                     harmonics=kwargs["HPCP_HARMONICS"],
                     maxFrequency=kwargs["MAX_HZ"],
                     minFrequency=kwargs["MIN_HZ"],
                     nonLinear=kwargs["HPCP_NON_LINEAR"],
                     normalized=kwargs["HPCP_NORMALIZE"],
                     referenceFrequency=kwargs["HPCP_REFERENCE_HZ"],
                     sampleRate=kwargs["SAMPLE_RATE"],
                     size=kwargs["HPCP_SIZE"],
                     weightType=kwargs["HPCP_WEIGHT_TYPE"],
                     windowSize=kwargs["HPCP_WEIGHT_WINDOW_SEMITONES"],
                     maxShifted=kwargs["HPCP_SHIFT"])

    key = estd.Key(numHarmonics=kwargs["KEY_HARMONICS"],
                   pcpSize=kwargs["HPCP_SIZE"],
                   profileType=kwargs["KEY_PROFILE"],
                   slope=kwargs["KEY_SLOPE"],
                   usePolyphony=kwargs["KEY_POLYPHONY"],
                   useThreeChords=kwargs["KEY_USE_THREE_CHORDS"])

    audio = loader()

    if kwargs["HIGHPASS_CUTOFF"] is not None:
        hpf = estd.HighPass(cutoffFrequency=kwargs["HIGHPASS_CUTOFF"],
                            sampleRate=kwargs["SAMPLE_RATE"])
        audio = hpf(hpf(hpf(audio)))

    if kwargs["DURATION"] is not None:
        audio = audio[(kwargs["START_TIME"] *
                       kwargs["SAMPLE_RATE"]):(kwargs["DURATION"] *
                                               kwargs["SAMPLE_RATE"])]

    duration = len(audio)
    number_of_frames = int(duration / kwargs["HOP_SIZE"])
    chroma = []
    for bang in range(number_of_frames):
        spek = rfft(window(cut(audio)))
        p1, p2 = speaks(spek)  # p1 = frequencies; p2 = magnitudes
        if kwargs["SPECTRAL_WHITENING"]:
            p2 = sw(spek, p1, p2)
        vector = hpcp(p1, p2)
        sum_vector = np.sum(vector)

        if sum_vector > 0:
            if kwargs["DETUNING_CORRECTION"] == False or kwargs[
                    "DETUNING_CORRECTION_SCOPE"] == 'average':
                chroma.append(vector)
            elif kwargs["DETUNING_CORRECTION"] and kwargs[
                    "DETUNING_CORRECTION_SCOPE"] == 'frame':
                vector = _detuning_correction(vector, kwargs["HPCP_SIZE"])
                chroma.append(vector)
            else:
                print("SHIFT_SCOPE must be set to 'frame' or 'average'")

    chroma = np.mean(chroma, axis=0)

    if kwargs["DETUNING_CORRECTION"] and kwargs[
            "DETUNING_CORRECTION_SCOPE"] == 'average':
        chroma = _detuning_correction(chroma, kwargs["HPCP_SIZE"])
    key = key(chroma.tolist())
    confidence = (key[2], key[3])
    key = key[0] + '\t' + key[1]
    textfile = open(output_text_file, 'w')
    textfile.write(key + '\n')
    textfile.close()
    return key, confidence
示例#8
0
def analysis_function(loop, sampleRate=16000):
    lp_filter = es.LowPass(cutoffFrequency=90, sampleRate=sampleRate)
    bp_filter = es.BandPass(bandwidth=20,
                            cutoffFrequency=280,
                            sampleRate=sampleRate)
    hp_filter = es.HighPass(cutoffFrequency=9000, sampleRate=sampleRate)

    [_, pattern] = ADT([loop],
                       output_act='yes',
                       tab='no',
                       save_dir="analysis/")
    pattern = np.array(pattern)[0]
    time_audio = np.linspace(0, float(29538) / 16000, 29538)
    time_act = np.linspace(0, float(29538) / 16000, 160)
    final_pattern = np.clip(
        np.array([
            interp1d(time_act, pattern[0, :, 0])(time_audio),
            interp1d(time_act, pattern[1, :, 0])(time_audio),
            interp1d(time_act, pattern[2, :, 0])(time_audio)
        ]).T, 0.0, 1.0)
    final_pattern = final_pattern / final_pattern.max(axis=0)
    final_pattern = np.expand_dims(final_pattern, 0)
    audio_file = es.MonoLoader(filename=loop, sampleRate=sampleRate)

    loop_basename = ntpath.basename(loop)
    lpf_audio = lp_filter(audio_file())
    bpf_audio = bp_filter(audio_file())
    hpf_audio = hp_filter(audio_file())

    sf.write("analysis/lpf_" + loop_basename, lpf_audio, sampleRate)
    sf.write("analysis/bpf_" + loop_basename, bpf_audio, sampleRate)
    sf.write("analysis/hpf_" + loop_basename, hpf_audio, sampleRate)

    unordered_kick_features = timbral_models.timbral_extractor(
        "analysis/lpf_" + loop_basename, clip_output=True)
    unordered_snare_features = timbral_models.timbral_extractor(
        "analysis/bpf_" + loop_basename, clip_output=True)
    unordered_hh_features = timbral_models.timbral_extractor("analysis/hpf_" +
                                                             loop_basename,
                                                             clip_output=True)

    features_kick = [
        unordered_kick_features['warmth'] / 69.738235,
        unordered_kick_features['roughness'] / 71.95989,
        unordered_kick_features['brightness'] / 82.336105,
        unordered_kick_features['hardness'] / 75.53646,
        unordered_kick_features['boominess'] / 71.00043,
        unordered_kick_features['depth'] / 100.0,
        unordered_kick_features['sharpness'] / 81.7323,
    ]

    features_snare = [
        unordered_snare_features['warmth'] / 69.57681,
        unordered_snare_features['roughness'] / 67.66642,
        unordered_snare_features['brightness'] / 80.19115,
        unordered_snare_features['hardness'] / 71.689445,
        unordered_snare_features['boominess'] / 61.422714,
        unordered_snare_features['depth'] / 100.0,
        unordered_snare_features['sharpness'] / 71.406494
    ]

    features_hh = [
        unordered_hh_features['warmth'] / 32.789112,
        unordered_hh_features['roughness'] / 1.0,
        unordered_hh_features['brightness'] / 85.24432,
        unordered_hh_features['hardness'] / 67.71172,
        unordered_hh_features['boominess'] / 2.491137,
        unordered_hh_features['depth'] / 0.5797179,
        unordered_hh_features['sharpness'] / 87.83693
    ]

    hpcp = file_to_hpcp(audio_file())

    #[69.57681, 67.66642, 80.19115, 71.689445, 61.422714, 100.0, 71.406494]
    #[32.789112, 1.0, 85.24432, 67.71172, 2.491137, 0.5797179, 87.83693]
    #[69.738235, 71.95989, 82.336105, 75.53646, 71.00043, 100.0, 81.7323]

    return final_pattern, hpcp, features_kick, features_snare, np.clip(
        features_hh, 0, 1)