示例#1
0
    def predict_onsets(self):
        proc_0 = CNNOnsetProcessor()
        proc_1 = OnsetPeakPickingProcessor(threshold = 0.95,fps=100)
        predicts = proc_1(proc_0(self.track_name))

        #=====manually adding true onsets
        #predicts = [onset.prediction for onset in self.true_tablature.onsets]
        #====

        return list(zip(predicts, [1]*len(predicts))) # here correct it when i can get confidence
示例#2
0
def get_onsets(path):
    """
    Finds onsets of audio file
    :param path: Path to audio file
    :return: List of times (in seconds) corresponding to onsets
    """
    processor = OnsetPeakPickingProcessor(threshold=0.45, combine=.2)
    act = RNNOnsetProcessor()(path)
    onsets = processor(act)
    onsets = onsets.tolist()
    return onsets
示例#3
0
def GuitarOnsetDetector(audio_filename, fs=44100):
    # TODO: ad hoc!!!! Derive it automatically.
    onset_threshold = 2
    series_delta = 0.22
    fps = 180
    fs = 44100
    hopSize = int(fs / fps)
    max_spectral_centroid = 3500
    # fps must be a divisor of fs to obtain integer hopSize
    # (it just simplifies the code below)
    sodf = SpectralOnsetProcessor(onset_method='superflux',
                                  fps=fps,
                                  filterbank=LogarithmicFilterbank,
                                  num_bands=24,
                                  log=np.log10)
    sodf_onsets = sodf(audio_filename)
    # "fusion" with rms-diff.
    rms, cs = rms_centroids(audio_filename,
                            frameSize=1024,
                            hopSize=hopSize,
                            sampleRate=fs)
    rms = signal.smooth(rms, int(fs / hopSize * 0.2))
    rms = preprocessing.scale(rms, with_mean=False, copy=False)
    rms = rms[1:] - rms[:-1]

    sodf_onsets[rms <= 0] = 0
    #sodf_onsets = sodf_onsets * np.power(rms, 0.01)
    #sodf_onsets[np.isnan(sodf_onsets)] = 0

    proc = OnsetPeakPickingProcessor(fps=fps, threshold=onset_threshold)
    p_onsets = proc(sodf_onsets)
    p_onsets = combine_series(p_onsets, series_delta)
    smoothed = []
    for i in range(len(p_onsets)):
        onset = p_onsets[i]
        duration = 0.5
        if (i < len(p_onsets) - 1):
            duration = min((p_onsets[i + 1] - p_onsets[i]), duration)
        window_len = int(duration * fs / hopSize)
        s = int(float(onset) * fs / hopSize)
        d = min(window_len, len(cs) - s)
        w = eval('np.hanning(2*d)')
        w = w[d:] / np.sum(w[d:])
        w = np.reshape(w, (1, d))
        c = cs[s:s + d]
        smoothed.append(np.dot(w, c)[0])
    result = []
    for i in range(len(p_onsets)):
        if smoothed[i] < max_spectral_centroid:
            result.append(p_onsets[i])
    return result
示例#4
0
文件: libod.py 项目: siyarvurucu/SAAT
def ninos(filename,gamma=0.94):
    """
    reference: Mounir, M., Karsmakers, P., & Van Waterschoot, T. (2016). Guitar note onset detection based on a spectral sparsity measure. 
    European Signal Processing Conference. https://doi.org/10.1109/EUSIPCO.2016.7760394
    """
    N = 2048
    hopSize = int(N/10)
    J = int(N*gamma/2)
    audio = MonoLoader(filename=filename, sampleRate=44100)()
    mag = []
    for frame in FrameGenerator(audio, frameSize = N, hopSize = hopSize):
        m = CartesianToPolar()(FFT()(Windowing(type='hann')(frame)))[0]
        m = np.asarray(m)
        idx = np.argsort(m)[::-1][:J]
        mag.append(m[idx])
    mag = np.asarray(mag)
    x2 = mag*mag
    inos=np.sum(x2,axis=1)/(np.sum(x2*x2,axis=1)**(0.25))
    ninos = inos/(J**(0.25))
    return  OnsetPeakPickingProcessor(threshold=0.03,fps=44100/hopSize)(ninos)                          
示例#5
0
import numpy
from madmom.features.chords import DeepChromaChordRecognitionProcessor
from madmom.audio.chroma import DeepChromaProcessor
from madmom.features.beats import DBNBeatTrackingProcessor
from madmom.features.beats import RNNBeatProcessor
from madmom.features.onsets import OnsetPeakPickingProcessor
from madmom.features.onsets import RNNOnsetProcessor

#Setting up Deep Chroma Chord Recognition Processor
dcp = DeepChromaProcessor()
decode = DeepChromaChordRecognitionProcessor()
chroma = dcp(sys.argv[1])
chords = decode(chroma)

#Setting up Onset Peak Picking Processor
proc = OnsetPeakPickingProcessor(fps=100,
                                 threshold=0.7,
                                 pre_avg=0.25,
                                 post_avg=0.25,
                                 smooth=0.01)
act = RNNOnsetProcessor()(sys.argv[1])
beats = proc(act)

#calculating msi
beatsArray = numpy.array(beats)
msi = numpy.mean(beatsArray[1:] - beatsArray[:-1]) * 1000

#generating and printing beatmap
bmaFunctions.fancyPrint(bmaFunctions.assignKeys(beats, chords, sys.argv[3]),
                        msi, sys.argv[2])
示例#6
0
        results_cnn[test_idx] = result.cpu().numpy()

    return results_cnn


# In[ ]:

predicted = None
picked_beats = []

if PREDICT:
    # beat_picker = BeatTrackingProcessor(fps=FPS) # TODO: replace with OnsetPeakPickingProcessor(fps=FPS)
    beat_picker = OnsetPeakPickingProcessor(
        fps=FPS,
        threshold=THRESHOLD,
        pre_avg=PRE_AVG,
        post_avg=POST_AVG,
        pre_max=PRE_MAX,
        post_max=POST_MAX
    )  # TODO: replace with OnsetPeakPickingProcessor(fps=FPS)

    # predict beats
    if VERBOSE:
        print('predicting...')
    predicted = run_prediction(test_f)  #[test_t[0], test_t[1]]

    # pick peaks
    if VERBOSE:
        print('picking beats...')

    for i, pred in enumerate(predicted):
        picked = beat_picker(
示例#7
0
def predictOneSong(audioPath, featureOption, clfModelPath):
    predictions = []
    if featureOption == 'convRandom':
        modelSavePath = './autoencoder/savedRandomAeModels/'
        features = extractRandomConvFeatures(audioPath, modelSavePath)  #64 x M
    elif featureOption == 'convAe':
        modelSavePath = './autoencoder/savedAeModels/'
        features = extractConvFeatures(audioPath, modelSavePath)
    elif featureOption == 'convDae':
        modelSavePath = './autoencoder/savedDaeModels/'
        features = extractConvFeatures(audioPath, modelSavePath)
    elif featureOption == 'baseline':
        features = extractBaselineFeatures(audioPath)  #60 x M
    else:
        print('unknown feature option')
    #==== onset detection
    onsetDetector = CNNOnsetProcessor()
    nvt = onsetDetector(audioPath)
    peakPicker = OnsetPeakPickingProcessor(fps=100)
    onsets = peakPicker(nvt)
    onsetsInFrames = [
        round(np.divide(onset, HOPSIZE / FS)) for onset in onsets
    ]

    #==== collect feature of interest
    X = []
    timeStamp = []
    for i in range(0, len(onsetsInFrames)):
        midIndex = int(onsetsInFrames[i])
        curTime = onsets[i]
        frontFrame = 0
        rearFrame = 2
        splicedFeature = featureSplicing(features, midIndex, frontFrame,
                                         rearFrame)
        X.append(splicedFeature)
        timeStamp.append(curTime)
    #print(np.shape(X))
    #==== drum transcription
    tmp = np.load(clfModelPath)
    classifiers = tmp['arr_0']
    normParams = tmp['arr_1']
    clfBd = classifiers[0]
    clfSd = classifiers[1]
    clfHh = classifiers[2]
    maxVec = normParams[0]
    minVec = normParams[1]
    XScaled = scaleMatrixWithMinMax(X, maxVec, minVec)

    predictions = []
    for i in range(0, len(timeStamp)):
        curFeature = XScaled[i]
        curFeature = np.expand_dims(curFeature, axis=0)
        detectBd = clfBd.predict(curFeature)
        detectSd = clfSd.predict(curFeature)
        detectHh = clfHh.predict(curFeature)

        if detectBd:
            predictions.append((timeStamp[i], 'KD'))
        if detectSd:
            predictions.append((timeStamp[i], 'SD'))
        if detectHh:
            predictions.append((timeStamp[i], 'HH'))
    return predictions
示例#8
0
文件: libod.py 项目: siyarvurucu/SAAT
def rectifiedComplexDomain(filename):
     audio = MonoLoader(filename=filename, sampleRate=44100)()
     return OnsetPeakPickingProcessor()(SpectralOnsetProcessor(onset_method='rectified_complex_domain')(audio))
示例#9
0
文件: libod.py 项目: siyarvurucu/SAAT
def PhaseDev(filename):
     audio = MonoLoader(filename=filename, sampleRate=44100)()
     return OnsetPeakPickingProcessor()(SpectralOnsetProcessor(onset_method='phase_deviation')(audio))
示例#10
0
文件: libod.py 项目: siyarvurucu/SAAT
def modifiedKL(filename):
     audio = MonoLoader(filename=filename, sampleRate=44100)()
     return OnsetPeakPickingProcessor()(SpectralOnsetProcessor(onset_method='modified_kullback_leibler')(audio))
示例#11
0
文件: libod.py 项目: siyarvurucu/SAAT
def RNNOnsetDetector(filename):
     audio = MonoLoader(filename=filename, sampleRate=44100)()
     return OnsetPeakPickingProcessor()(RNNOnsetProcessor()(audio))