def run_NMFdiag(): inpPath = '../data/' matlabMatricesPath = 'matrices/NMFdiag/' filenameSource = 'Bees_Buzzing.wav' filenameTarget = 'Beatles_LetItBe.wav' # read signals fs, xs = wav.read(os.path.join(inpPath, filenameSource)) fs, xt = wav.read(os.path.join(inpPath, filenameTarget)) # make monaural if necessary xs = make_monaural(xs) xt = make_monaural(xt) # convert wavs from int16 to float32 xs = pcmInt16ToFloat32Numpy(xs) xt = pcmInt16ToFloat32Numpy(xt) paramSTFT = dict() paramSTFT['blockSize'] = 2048 paramSTFT['hopSize'] = 1024 paramSTFT['winFunc'] = np.hanning(paramSTFT['blockSize']) paramSTFT['reconstMirror'] = True paramSTFT['appendFrame'] = True paramSTFT['numSamples'] = len(xt) # STFT computation Xs, As, Ps = forwardSTFT(xs, paramSTFT) Xt, At, Pt = forwardSTFT(xt, paramSTFT) # get dimensions and time and freq resolutions _, numTargetFrames = Xt.shape # initialize activations randomly # load randomly initialized matrix on MATLAB H0 = load_matlab_dict(os.path.join(matlabMatricesPath, 'H0.mat'), 'H0') # init templates by source frames W0 = As * 1. / (EPS + np.sum(As, axis=0)) paramNMFdiag = dict() paramNMFdiag['fixW'] = True paramNMFdiag['numOfIter'] = 3 paramNMFdiag['continuity'] = dict() paramNMFdiag['continuity']['polyphony'] = 10 paramNMFdiag['continuity']['length'] = 7 paramNMFdiag['continuity']['grid'] = 1 paramNMFdiag['continuity']['sparsen'] = [1, 7] # call the reference implementation as provided by Jonathan Driedger # with divergence update rules nmfdiagW_div, nmfdiagH_div = NMFdiag(At, W0, H0, paramNMFdiag) python_res = { 'nmfdiagW_div': nmfdiagW_div, 'nmfdiagH_div': nmfdiagH_div, } return python_res
def run_logFreqLogMag(): inpPath = '../data/' filename = 'runningExample_IGotYouMixture.wav' # read signal fs, x = wav.read(os.path.join(inpPath, filename)) # make monaural if necessary x = make_monaural(x) # convert wav from int16 to float32 x = pcmInt16ToFloat32Numpy(x) # spectral parameters paramSTFT = dict() paramSTFT['blockSize'] = 2048 paramSTFT['hopSize'] = 512 paramSTFT['winFunc'] = np.hanning(paramSTFT['blockSize']) paramSTFT['reconstMirror'] = True paramSTFT['appendFrame'] = True paramSTFT['numSamples'] = len(x) # STFT computation _, A, _ = forwardSTFT(x, paramSTFT) # get dimensions and time and freq resolutions deltaF = fs / paramSTFT['blockSize'] # get logarithmically-spaced frequency axis version for visualization logFreqLogMagA, logFreqAxis = logFreqLogMag(A, deltaF) python_res = {'logFreqLogMagA': logFreqLogMagA, 'logFreqAxis': logFreqAxis} return python_res
def run_LSEE_MSTFTM_GriffinLim(): inpPath = '../data/' filename = 'runningExample_IGotYouMixture.wav' # read signal fs, x = wav.read(os.path.join(inpPath, filename)) # make monaural if necessary x = make_monaural(x) # convert wav from int16 to float32 x = pcmInt16ToFloat32Numpy(x) # spectral parameters paramSTFT = dict() paramSTFT['blockSize'] = 2048 paramSTFT['hopSize'] = 512 paramSTFT['winFunc'] = np.hanning(paramSTFT['blockSize']) paramSTFT['reconstMirror'] = True paramSTFT['appendFrame'] = True paramSTFT['numSamples'] = len(x) # STFT computation _, A, _ = forwardSTFT(x, paramSTFT) Xout, Pout, res = LSEE_MSTFTM_GriffinLim(A, paramSTFT) python_res = dict() python_res['res'] = res return python_res
def run_initActivations(): inpPath = '../data/' filename = 'runningExample_IGotYouMixture.wav' # read signal fs, x = wav.read(os.path.join(inpPath, filename)) # make monaural if necessary x = make_monaural(x) # convert wav from int16 to float32 x = pcmInt16ToFloat32Numpy(x) # read corresponding transcription files melodyTranscription = np.loadtxt( os.path.join(inpPath, 'runningExample_IGotYouMelody.txt')) drumsTranscription = np.loadtxt( os.path.join(inpPath, 'runningExample_IGotYouDrums.txt')) # spectral parameters paramSTFT = dict() paramSTFT['blockSize'] = 2048 paramSTFT['hopSize'] = 512 paramSTFT['winFunc'] = np.hanning(paramSTFT['blockSize']) paramSTFT['reconstMirror'] = True paramSTFT['appendFrame'] = True paramSTFT['numSamples'] = len(x) # STFT computation X, A, P = forwardSTFT(x, paramSTFT) # get dimensions and time and freq resolutions numBins, numFrames = X.shape deltaT = paramSTFT['hopSize'] / fs # generate score-informed activations for the melodic part paramActivations = dict() paramActivations['deltaT'] = deltaT paramActivations['numFrames'] = numFrames paramActivations['pitches'] = melodyTranscription[:, 1] paramActivations['onsets'] = melodyTranscription[:, 0] paramActivations['durations'] = melodyTranscription[:, 2] pitchedH = initActivations(paramActivations, 'pitched') # generate score-informed activations for the drum part paramActivations['drums'] = drumsTranscription[:, 1] paramActivations['onsets'] = drumsTranscription[:, 0] paramActivations['decay'] = 0.75 drumsH = initActivations(paramActivations, 'drums') # generate uniform activations paramActivations = dict() paramActivations['numComp'] = 30 paramActivations['numFrames'] = numFrames uniformH = initActivations(paramActivations, 'uniform') python_res = {'pitchedH': pitchedH, 'drumsH': drumsH, 'uniformH': uniformH} return python_res
def run_initTemplates(): inpPath = '../data/' filename = 'runningExample_IGotYouMixture.wav' # read signal fs, x = wav.read(os.path.join(inpPath, filename)) # make monaural if necessary x = make_monaural(x) # convert wav from int16 to float32 x = pcmInt16ToFloat32Numpy(x) # read corresponding transcription files melodyTranscription = np.loadtxt( os.path.join(inpPath, 'runningExample_IGotYouMelody.txt')) # spectral parameters paramSTFT = dict() paramSTFT['blockSize'] = 2048 paramSTFT['hopSize'] = 512 paramSTFT['winFunc'] = np.hanning(paramSTFT['blockSize']) paramSTFT['reconstMirror'] = True paramSTFT['appendFrame'] = True paramSTFT['numSamples'] = len(x) # STFT computation X, A, P = forwardSTFT(x, paramSTFT) # get dimensions and time and freq resolutions numBins, numFrames = X.shape deltaF = fs / paramSTFT['blockSize'] # set common parameters numDrumComp = 3 numTemplateFrames = 8 # generate score-informed templates for the melodic part paramTemplates = dict() paramTemplates['deltaF'] = deltaF paramTemplates['numBins'] = numBins paramTemplates['numTemplateFrames'] = numTemplateFrames paramTemplates['pitches'] = melodyTranscription[:, 1] pitchedW = initTemplates(paramTemplates, 'pitched') # generate audio-informed templates for the drum part paramTemplates['numComp'] = numDrumComp drumsW = initTemplates(paramTemplates, 'drums') # generate uniform templates uniformW = initTemplates(paramTemplates, 'uniform') python_res = {'pitchedW': pitchedW, 'drumsW': drumsW, 'uniformW': uniformW} return python_res
def run_HPSS_KAM(): inpPath = '../data/' filename = 'runningExample_IGotYouMixture.wav' # read signals fs, x = wav.read(os.path.join(inpPath, filename)) # make monaural if necessary x = make_monaural(x) # convert wavs from int16 to float32 x = pcmInt16ToFloat32Numpy(x) paramSTFT = dict() paramSTFT['blockSize'] = 2048 paramSTFT['hopSize'] = 512 paramSTFT['winFunc'] = np.hanning(paramSTFT['blockSize']) paramSTFT['reconstMirror'] = True paramSTFT['appendFrame'] = True paramSTFT['numSamples'] = len(x) X, A, P = forwardSTFT(x, paramSTFT) numIterKAM = 1 medFitzGeraldA, Kern, KernOrd = HPSS_KAM_Fitzgerald( A, numIterKAM, 15, True, 2) convFitzGeraldA, _, _ = HPSS_KAM_Fitzgerald(A, numIterKAM, 15, False, 2) # WARNING!: conv2 on MATLAB and convolve2d on python don't give the same result!! python_res = { 'medFitzGeraldA': medFitzGeraldA, 'convFitzGeraldA': convFitzGeraldA, 'Kern': Kern, 'KernOrd': KernOrd } return python_res
def run_NMFconv(): inpPath = '../data' filename = 'runningExample_AmenBreak.wav' # read signals fs, x = wav.read(os.path.join(inpPath, filename)) # make monaural if necessary x = make_monaural(x) # convert wav from int16 to float32 x = pcmInt16ToFloat32Numpy(x) paramSTFT = dict() paramSTFT['blockSize'] = 2048 paramSTFT['hopSize'] = 512 paramSTFT['winFunc'] = np.hanning(paramSTFT['blockSize']) paramSTFT['reconstMirror'] = True paramSTFT['appendFrame'] = True paramSTFT['numSamples'] = len(x) # STFT computation X, A, P = forwardSTFT(x, paramSTFT) # get dimensions and time and freq resolutions numBins, numFrames = X.shape deltaT = paramSTFT['hopSize'] / fs deltaF = fs / paramSTFT['blockSize'] # 3. apply NMF variants to STFT magnitude # set common parameters numComp = 3 numIter = 3 numTemplateFrames = 8 # generate initial guess for templates paramTemplates = dict() paramTemplates['deltaF'] = deltaF paramTemplates['numComp'] = numComp paramTemplates['numBins'] = numBins paramTemplates['numTemplateFrames'] = numTemplateFrames initW = initTemplates(paramTemplates, 'drums') # generate initial activations paramActivations = dict() paramActivations['numComp'] = numComp paramActivations['numFrames'] = numFrames initH = initActivations(paramActivations, 'uniform') # NMFconv parameters paramNMFconv = dict() paramNMFconv['numComp'] = numComp paramNMFconv['numFrames'] = numFrames paramNMFconv['numIter'] = numIter paramNMFconv['numTemplateFrames'] = numTemplateFrames paramNMFconv['initW'] = initW paramNMFconv['initH'] = initH paramNMFconv['beta'] = 0 # NMFconv core method nmfconvW, nmfconvH, nmfconvV, divBeta = NMFconv(A, paramNMFconv) python_res = { 'nmfconvW': nmfconvW, 'nmfconvH': nmfconvH, 'nmfconvV': nmfconvV, 'divBeta': divBeta.reshape(1, -1) } return python_res
def run_NMF(): inpPath = '../data/' filename = 'runningExample_AmenBreak.wav' # read signals fs, x = wav.read(os.path.join(inpPath, filename)) # make monaural if necessary x = make_monaural(x) # convert wavs from int16 to float32 x = pcmInt16ToFloat32Numpy(x) # spectral parameters paramSTFT = dict() paramSTFT['blockSize'] = 2048 paramSTFT['hopSize'] = 512 paramSTFT['winFunc'] = np.hanning(paramSTFT['blockSize']) paramSTFT['reconstMirror'] = True paramSTFT['appendFrame'] = True paramSTFT['numSamples'] = len(x) # STFT computation X, A, P = forwardSTFT(x, paramSTFT) # get dimensions and time and freq resolutions numBins, numFrames = X.shape deltaT = paramSTFT['hopSize'] / fs deltaF = fs / paramSTFT['blockSize'] # Apply NMF variants to STFT magnitude # set common parameters numComp = 3 numIter = 3 numTemplateFrames = 8 # generate initial guess for templates paramTemplates = dict() paramTemplates['deltaF'] = deltaF paramTemplates['numComp'] = numComp paramTemplates['numBins'] = numBins paramTemplates['numTemplateFrames'] = numTemplateFrames initW = initTemplates(paramTemplates, 'drums') # generate initial activations paramActivations = dict() paramActivations['numComp'] = numComp paramActivations['numFrames'] = numFrames initH = initActivations(paramActivations, 'uniform') # NMFconv parameters paramNMFconv = dict() paramNMFconv['numComp'] = numComp paramNMFconv['numFrames'] = numFrames paramNMFconv['numIter'] = numIter paramNMFconv['numTemplateFrames'] = numTemplateFrames paramNMFconv['initW'] = initW paramNMFconv['initH'] = initH paramNMFconv['beta'] = 0 # NMFconv core method nmfconvW, _, nmfconvV, _ = NMFconv(A, paramNMFconv) # alpha-Wiener filtering nmfconvA, _ = alphaWienerFilter(A, nmfconvV, 1) W0 = np.concatenate(nmfconvW, axis=1) # set common parameters numComp = W0.shape[1] numIter = 3 # generate random initialization for activations paramActivations = dict() paramActivations['numComp'] = numComp paramActivations['numFrames'] = numFrames initH = initActivations(paramActivations, 'uniform') # store common parameters paramNMF = dict() paramNMF['numComp'] = numComp paramNMF['numFrames'] = numFrames paramNMF['numIter'] = numIter paramNMF['initW'] = W0 paramNMF['initH'] = initH # NMF with Euclidean Distance cost function paramNMF['costFunc'] = 'EucDist' nmfEucDistW, nmfEucDistH, nmfEucDistV = NMF(A, paramNMF) # NMF with KLDiv Distance cost function paramNMF['costFunc'] = 'KLDiv' nmfKLDivW, nmfKLDivH, nmfKLDivV = NMF(A, paramNMF) # NMF with ISDiv Distance cost function paramNMF['costFunc'] = 'ISDiv' nmfISDivW, nmfISDivH, nmfISDivV = NMF(A, paramNMF) python_res = { 'nmfEucDistW': nmfEucDistW, 'nmfEucDistH': nmfEucDistH, 'nmfEucDistV': nmfEucDistV, 'nmfKLDivW': nmfKLDivW, 'nmfKLDivH': nmfKLDivH, 'nmfKLDivV': nmfKLDivV, 'nmfISDivW': nmfISDivW, 'nmfISDivH': nmfISDivH, 'nmfISDivV': nmfISDivV } return python_res