def writeSeparatedSignals(self, suffix='.wav'): """Writes the separated signals to the files in self.files. If suffix contains 'VUIMM', then this method will take the WF0 and HF0 that contain the estimated unvoiced elements. """ if 'VUIMM' in suffix: WF0 = self.SIMMParams['WUF0'] HF0 = self.SIMMParams['HUF0'] else: WF0 = self.SIMMParams['WF0'] HF0 = self.SIMMParams['HF0'] WGAMMA = self.SIMMParams['WGAMMA'] HGAMMA = self.SIMMParams['HGAMMA'] HPHI = self.SIMMParams['HPHI'] HM = self.SIMMParams['HM'] WM = self.SIMMParams['WM'] alphaR = self.SIMMParams['alphaR'] alphaL = self.SIMMParams['alphaL'] betaR = self.SIMMParams['betaR'] betaL = self.SIMMParams['betaL'] windowSizeInSamples = self.stftParams['windowSizeInSamples'] WPHI = np.dot(WGAMMA, HGAMMA) SPHI = np.dot(WPHI, HPHI) SF0 = np.dot(WF0, HF0) hatSXR = (alphaR**2) * SF0 * SPHI + np.dot(np.dot(WM, betaR**2),HM) hatSXL = (alphaL**2) * SF0 * SPHI + np.dot(np.dot(WM, betaL**2),HM) hatSXR = np.maximum(hatSXR, eps) hatSXL = np.maximum(hatSXL, eps) hatVR = (alphaR**2) * SPHI * SF0 / hatSXR * self.XR vestR = slf.istft(hatVR, hopsize=self.stftParams['hopsize'], nfft=self.stftParams['NFT'], window=slf.sinebell(windowSizeInSamples)) / 4.0 hatVR = (alphaL**2) * SPHI * SF0 / hatSXL * self.XL vestL = slf.istft(hatVR, hopsize=self.stftParams['hopsize'], nfft=self.stftParams['NFT'], window=slf.sinebell(windowSizeInSamples)) / 4.0 vestR = np.array(np.round(vestR*self.scaleData), dtype=self.dataType) vestL = np.array(np.round(vestL*self.scaleData), dtype=self.dataType) wav.write(self.files['voc_output_file'][:-4] + suffix, self.fs, np.array([vestR,vestL]).T) hatMR = (np.dot(np.dot(WM,betaR ** 2), HM)) / hatSXR * self.XR mestR = slf.istft(hatMR, hopsize=self.stftParams['hopsize'], nfft=self.stftParams['NFT'], window=slf.sinebell(windowSizeInSamples)) / 4.0 hatMR = (np.dot(np.dot(WM,betaL ** 2), HM)) / hatSXL * self.XL mestL = slf.istft(hatMR, hopsize=self.stftParams['hopsize'], nfft=self.stftParams['NFT'], window=slf.sinebell(windowSizeInSamples)) / 4.0 mestR = np.array(np.round(mestR*self.scaleData), dtype=self.dataType) mestL = np.array(np.round(mestL*self.scaleData), dtype=self.dataType) wav.write(self.files['mus_output_file'][:-4] + suffix, self.fs, np.array([mestR,mestL]).T)
def writeSeparatedSignals(self, suffix='.wav'): """Writes the separated signals to the files in self.files. If suffix contains 'VUIMM', then this method will take the WF0 and HF0 that contain the estimated unvoiced elements. """ if 'VUIMM' in suffix: WF0 = self.SIMMParams['WUF0'] HF0 = self.SIMMParams['HUF0'] else: WF0 = self.SIMMParams['WF0'] HF0 = self.SIMMParams['HF0'] WGAMMA = self.SIMMParams['WGAMMA'] HGAMMA = self.SIMMParams['HGAMMA'] HPHI = self.SIMMParams['HPHI'] HM = self.SIMMParams['HM'] WM = self.SIMMParams['WM'] alphaR = self.SIMMParams['alphaR'] alphaL = self.SIMMParams['alphaL'] betaR = self.SIMMParams['betaR'] betaL = self.SIMMParams['betaL'] windowSizeInSamples = self.stftParams['windowSizeInSamples'] WPHI = np.dot(WGAMMA, HGAMMA) SPHI = np.dot(WPHI, HPHI) SF0 = np.dot(WF0, HF0) hatSXR = (alphaR**2) * SF0 * SPHI + np.dot(np.dot(WM, betaR**2), HM) hatSXL = (alphaL**2) * SF0 * SPHI + np.dot(np.dot(WM, betaL**2), HM) hatSXR = np.maximum(hatSXR, eps) hatSXL = np.maximum(hatSXL, eps) hatVR = (alphaR**2) * SPHI * SF0 / hatSXR * self.XR vestR = slf.istft(hatVR, hopsize=self.stftParams['hopsize'], nfft=self.stftParams['NFT'], window=slf.sinebell(windowSizeInSamples)) / 4.0 hatVR = (alphaL**2) * SPHI * SF0 / hatSXL * self.XL vestL = slf.istft(hatVR, hopsize=self.stftParams['hopsize'], nfft=self.stftParams['NFT'], window=slf.sinebell(windowSizeInSamples)) / 4.0 vestR = np.array(np.round(vestR * self.scaleData), dtype=self.dataType) vestL = np.array(np.round(vestL * self.scaleData), dtype=self.dataType) wav.write(self.files['voc_output_file'][:-4] + suffix, self.fs, np.array([vestR, vestL]).T) hatMR = (np.dot(np.dot(WM, betaR**2), HM)) / hatSXR * self.XR mestR = slf.istft(hatMR, hopsize=self.stftParams['hopsize'], nfft=self.stftParams['NFT'], window=slf.sinebell(windowSizeInSamples)) / 4.0 hatMR = (np.dot(np.dot(WM, betaL**2), HM)) / hatSXL * self.XL mestL = slf.istft(hatMR, hopsize=self.stftParams['hopsize'], nfft=self.stftParams['NFT'], window=slf.sinebell(windowSizeInSamples)) / 4.0 mestR = np.array(np.round(mestR * self.scaleData), dtype=self.dataType) mestL = np.array(np.round(mestL * self.scaleData), dtype=self.dataType) wav.write(self.files['mus_output_file'][:-4] + suffix, self.fs, np.array([mestR, mestL]).T)
def __init__(self, inputAudioFilename, windowSize=0.0464, nbIter=10, numCompAccomp=40, minF0=60, maxF0=2000, stepNotes=16, K_numFilters=4, P_numAtomFilters=30, imageCanvas=None, wavCanvas=None, progressBar=None, verbose=True, outputDirSuffix='/'): """During init, process is initiated, STFTs are computed, and the parameters are stored. Parameters ---------- inputAudioFilename : string filename of the input audio file windowSize : double, optional analysis frame ('windows') size, in s. By default, 0.0464s nbIter : integer, optional number of iterations for the estimation algorithm. By default, 10 numCompAccomp : integer, optional number of components for the accompaniment, default = 40 minF0 : double/integer, optional lowest F0 candidate (in Hz), default=60Hz maxF0 : double/integer, optional highest F0 candidate (in Hz), default=2000Hz stepNotes : integer, optional number of F0 candidates in one semitone, default=16 F0s/semitone K_numFilters : integer, optional number of filter spectral shapes, default=4 P_numAtomFilters : integer, optional number of atomic filter smooth spectral shapes, default=30 imageCanvas : MplCanvas/MplCanvas3Axes, optional an instance of the MplCanvas/MplCanvas3Axes, giving access to the axes where to draw the HF0 image. By default=None wavCanvas : MplCanvas/MplCanvas3Axes, optional an instance of the MplCanvas/MplCanvas3Axes, giving access to the axes to draw the waveform of the input signal. progressBar : boolean, optional ??? ??? verbose : boolean, optional Whether to write out or not information about the evolution of the algorithm. By default=False. outputDirSuffix : string, optional the subfolder name (to be appended to the full path to the audio signal), where the output files are going to be written. By default ='/' """ self.files['inputAudioFilename'] = str(inputAudioFilename) self.imageCanvas = imageCanvas self.wavCanvas = wavCanvas self.displayEvolution = True self.verbose=verbose if self.imageCanvas is None: self.displayEvolution = False if inputAudioFilename[-4:] != ".wav": raise ValueError("File not WAV file? Only WAV format support, "+\ "for now...") self.files['outputDirSuffix'] = outputDirSuffix self.files['outputDir'] = str('/').join(\ self.files['inputAudioFilename'].split('/')[:-1])+\ '/'+self.files['outputDirSuffix'] +'/' if os.path.isdir(self.files['outputDir']): print "Output directory already existing - "+\ "NB: overwriting files in:" print self.files['outputDir'] else: print "Creating output directory" print self.files['outputDir'] os.mkdir(self.files['outputDir']) self.files['pathBaseName'] = self.files['outputDir'] + \ self.files['inputAudioFilename'\ ].split('/')[-1][:-4] self.files['mus_output_file'] = str(self.files['pathBaseName']+\ '_acc.wav') self.files['voc_output_file'] = str(self.files['pathBaseName']+\ '_lead.wav') self.files['pitch_output_file'] = str(self.files['pathBaseName']+\ '_pitches.txt') print "Writing the different following output files:" print " separated lead in", \ self.files['voc_output_file'] print " separated accompaniment in", \ self.files['mus_output_file'] print " separated lead + unvoc in", \ self.files['voc_output_file'][:-4] + '_VUIMM.wav' print " separated acc - unvoc in", \ self.files['mus_output_file'][:-4] + '_VUIMM.wav' print " estimated pitches in", \ self.files['pitch_output_file'] # read the WAV file and store the STFT self.fs, data = wav.read(self.files['inputAudioFilename']) # for some bad format wav files, data is a str? # cf. files from beat/tempo evaluation campaign of MIREX ## print self.fs, data self.scaleData = 1.2 * np.abs(data).max() # to rescale the data. self.dataType = data.dtype data = np.double(data) / self.scaleData # makes data vary from -1 to 1 if data.shape[0] == data.size: # data is multi-channel print "The audio file is not stereo. Making stereo out of mono." print "(You could also try the older separateLead.py...)" data = np.vstack([data,data]).T if data.shape[1] != 2: print "The data is multichannel, but not stereo... \n" print "Unfortunately this program does not scale well. Data is \n" print "reduced to its 2 first channels.\n" data = data[:,0:2] # parameters for the STFT: self.stftParams['windowSizeInSamples'] = \ slf.nextpow2(np.round(windowSize * self.fs)) self.stftParams['hopsize'] = self.stftParams['windowSizeInSamples']/8 self.stftParams['NFT'] = self.stftParams['windowSizeInSamples'] self.SIMMParams['niter'] = nbIter self.SIMMParams['R'] = numCompAccomp print "Some parameter settings:" print " Size of analysis windows: ", \ self.stftParams['windowSizeInSamples'] print " Hopsize: ", self.stftParams['hopsize'] print " Size of Fourier transforms: ", self.stftParams['NFT'] print " Number of iterations to be done: ",self.SIMMParams['niter'] print " Number of elements in WM: ", self.SIMMParams['R'] self.XR, F, N = slf.stft(data[:,0], fs=self.fs, hopsize=self.stftParams['hopsize'] , window=slf.sinebell(\ self.stftParams['windowSizeInSamples']), nfft=self.stftParams['NFT'] ) self.XL, F, N = slf.stft(data[:,1], fs=self.fs, hopsize=self.stftParams['hopsize'] , window=slf.sinebell(\ self.stftParams['windowSizeInSamples']), nfft=self.stftParams['NFT'] ) # non need to store this. ## self.SXR = np.abs(self.XR) ** 2 ## self.SXL = np.abs(self.XL) ** 2 # drawing the waveform to wavCanvas: if not(self.wavCanvas is None): if self.wavCanvas==self.imageCanvas: self.wavCanvas.ax2.clear() self.wavCanvas.ax2.plot(np.arange(data.shape[0]) / \ np.double(self.stftParams['hopsize']), \ data) #self.wavCanvas.ax2.plot(np.arange(data.shape[0]) / \ # np.double(self.fs), \ # data) self.wavCanvas.ax2.axis('tight') self.wavCanvas.draw() else: self.wavCanvas.ax.clear() self.wavCanvas.ax.plot(np.arange(data.shape[0]) / \ np.double(self.fs), \ data) self.wavCanvas.ax.axis('tight') self.wavCanvas.draw() del data, F, N # TODO: also process these as options: self.SIMMParams['minF0'] = minF0 self.SIMMParams['maxF0'] = maxF0 self.F, self.N = self.XR.shape # this is the number of F0s within one semitone self.SIMMParams['stepNotes'] = stepNotes # number of spectral shapes for the filter part self.SIMMParams['K'] = K_numFilters # number of elements in dictionary of smooth filters self.SIMMParams['P'] = P_numAtomFilters # number of chirped spectral shapes between each F0 # this feature should be further studied before # we find a good way of doing that. self.SIMMParams['chirpPerF0'] = 1 self.scopeAllowedHF0 = 4.0 / 1.0 # Create the harmonic combs, for each F0 between minF0 and maxF0: self.SIMMParams['F0Table'], WF0 = \ slf.generate_WF0_chirped(minF0=self.SIMMParams['minF0'], maxF0=self.SIMMParams['maxF0'], Fs=self.fs, Nfft=self.stftParams['NFT'],\ stepNotes=self.SIMMParams['stepNotes'],\ lengthWindow=\ self.stftParams['windowSizeInSamples'], Ot=0.25,\ perF0=self.SIMMParams['chirpPerF0'],\ depthChirpInSemiTone=.15, loadWF0=True,\ analysisWindow='sinebell') self.SIMMParams['WF0'] = WF0[:self.F, :] # ensure same size as SX # number of harmonic combs self.SIMMParams['NF0'] = self.SIMMParams['F0Table'].size # Normalization: # by max or by sum? self.SIMMParams['WF0'] = self.SIMMParams['WF0'] / \ np.outer(np.ones(self.F), \ np.sum(self.SIMMParams['WF0'], axis=0)) # for debug: if False: #DEBUG self.imageCanvas.ax.imshow(np.log(np.abs(self.XR)), aspect='auto',origin='lower') self.imageCanvas.draw() raise KeyboardInterrupt("Check these matrices !") if False: #DEBUG from IPython.Shell import IPShellEmbed ipshell = IPShellEmbed() ipshell() plt.figure() plt.imshow(np.log(self.SIMMParams['WF0']), aspect='auto', origin='lower',) plt.figure() plt.imshow(np.log(np.abs(self.XR)),aspect='auto',origin='lower') # Create the dictionary of smooth filters, for the filter part of # the lead isntrument: self.SIMMParams['WGAMMA'] = \ slf.generateHannBasis(numberFrequencyBins=self.F, sizeOfFourier=self.stftParams['NFT'], Fs=self.fs, frequencyScale='linear', numberOfBasis=self.SIMMParams['P'], overlap=.75)
def __init__(self, inputAudioFilename, windowSize=0.0464, nbIter=10, numCompAccomp=40, minF0=60, maxF0=2000, stepNotes=16, K_numFilters=4, P_numAtomFilters=30, imageCanvas=None, wavCanvas=None, progressBar=None, verbose=True, outputDirSuffix='/'): """During init, process is initiated, STFTs are computed, and the parameters are stored. Parameters ---------- inputAudioFilename : string filename of the input audio file windowSize : double, optional analysis frame ('windows') size, in s. By default, 0.0464s nbIter : integer, optional number of iterations for the estimation algorithm. By default, 10 numCompAccomp : integer, optional number of components for the accompaniment, default = 40 minF0 : double/integer, optional lowest F0 candidate (in Hz), default=60Hz maxF0 : double/integer, optional highest F0 candidate (in Hz), default=2000Hz stepNotes : integer, optional number of F0 candidates in one semitone, default=16 F0s/semitone K_numFilters : integer, optional number of filter spectral shapes, default=4 P_numAtomFilters : integer, optional number of atomic filter smooth spectral shapes, default=30 imageCanvas : MplCanvas/MplCanvas3Axes, optional an instance of the MplCanvas/MplCanvas3Axes, giving access to the axes where to draw the HF0 image. By default=None wavCanvas : MplCanvas/MplCanvas3Axes, optional an instance of the MplCanvas/MplCanvas3Axes, giving access to the axes to draw the waveform of the input signal. progressBar : boolean, optional ??? ??? verbose : boolean, optional Whether to write out or not information about the evolution of the algorithm. By default=False. outputDirSuffix : string, optional the subfolder name (to be appended to the full path to the audio signal), where the output files are going to be written. By default ='/' """ self.files['inputAudioFilename'] = str(inputAudioFilename) self.imageCanvas = imageCanvas self.wavCanvas = wavCanvas self.displayEvolution = True self.verbose = verbose if self.imageCanvas is None: self.displayEvolution = False if inputAudioFilename[-4:] != ".wav": raise ValueError("File not WAV file? Only WAV format support, "+\ "for now...") self.files['outputDirSuffix'] = outputDirSuffix self.files['outputDir'] = str('/').join(\ self.files['inputAudioFilename'].split('/')[:-1])+\ '/'+self.files['outputDirSuffix'] +'/' if os.path.isdir(self.files['outputDir']): print "Output directory already existing - "+\ "NB: overwriting files in:" print self.files['outputDir'] else: print "Creating output directory" print self.files['outputDir'] os.mkdir(self.files['outputDir']) self.files['pathBaseName'] = self.files['outputDir'] + \ self.files['inputAudioFilename'\ ].split('/')[-1][:-4] self.files['mus_output_file'] = str(self.files['pathBaseName']+\ '_acc.wav') self.files['voc_output_file'] = str(self.files['pathBaseName']+\ '_lead.wav') self.files['pitch_output_file'] = str(self.files['pathBaseName']+\ '_pitches.txt') print "Writing the different following output files:" print " separated lead in", \ self.files['voc_output_file'] print " separated accompaniment in", \ self.files['mus_output_file'] print " separated lead + unvoc in", \ self.files['voc_output_file'][:-4] + '_VUIMM.wav' print " separated acc - unvoc in", \ self.files['mus_output_file'][:-4] + '_VUIMM.wav' print " estimated pitches in", \ self.files['pitch_output_file'] # read the WAV file and store the STFT self.fs, data = wav.read(self.files['inputAudioFilename']) # for some bad format wav files, data is a str? # cf. files from beat/tempo evaluation campaign of MIREX ## print self.fs, data self.scaleData = 1.2 * np.abs(data).max() # to rescale the data. self.dataType = data.dtype data = np.double(data) / self.scaleData # makes data vary from -1 to 1 if data.shape[0] == data.size: # data is multi-channel print "The audio file is not stereo. Making stereo out of mono." print "(You could also try the older separateLead.py...)" data = np.vstack([data, data]).T if data.shape[1] != 2: print "The data is multichannel, but not stereo... \n" print "Unfortunately this program does not scale well. Data is \n" print "reduced to its 2 first channels.\n" data = data[:, 0:2] # parameters for the STFT: self.stftParams['windowSizeInSamples'] = \ slf.nextpow2(np.round(windowSize * self.fs)) self.stftParams['hopsize'] = self.stftParams['windowSizeInSamples'] / 8 self.stftParams['NFT'] = self.stftParams['windowSizeInSamples'] self.SIMMParams['niter'] = nbIter self.SIMMParams['R'] = numCompAccomp print "Some parameter settings:" print " Size of analysis windows: ", \ self.stftParams['windowSizeInSamples'] print " Hopsize: ", self.stftParams['hopsize'] print " Size of Fourier transforms: ", self.stftParams['NFT'] print " Number of iterations to be done: ", self.SIMMParams['niter'] print " Number of elements in WM: ", self.SIMMParams['R'] self.XR, F, N = slf.stft(data[:,0], fs=self.fs, hopsize=self.stftParams['hopsize'] , window=slf.sinebell(\ self.stftParams['windowSizeInSamples']), nfft=self.stftParams['NFT'] ) self.XL, F, N = slf.stft(data[:,1], fs=self.fs, hopsize=self.stftParams['hopsize'] , window=slf.sinebell(\ self.stftParams['windowSizeInSamples']), nfft=self.stftParams['NFT'] ) # non need to store this. ## self.SXR = np.abs(self.XR) ** 2 ## self.SXL = np.abs(self.XL) ** 2 # drawing the waveform to wavCanvas: if not (self.wavCanvas is None): if self.wavCanvas == self.imageCanvas: self.wavCanvas.ax2.clear() self.wavCanvas.ax2.plot(np.arange(data.shape[0]) / \ np.double(self.stftParams['hopsize']), \ data) #self.wavCanvas.ax2.plot(np.arange(data.shape[0]) / \ # np.double(self.fs), \ # data) self.wavCanvas.ax2.axis('tight') self.wavCanvas.draw() else: self.wavCanvas.ax.clear() self.wavCanvas.ax.plot(np.arange(data.shape[0]) / \ np.double(self.fs), \ data) self.wavCanvas.ax.axis('tight') self.wavCanvas.draw() del data, F, N # TODO: also process these as options: self.SIMMParams['minF0'] = minF0 self.SIMMParams['maxF0'] = maxF0 self.F, self.N = self.XR.shape # this is the number of F0s within one semitone self.SIMMParams['stepNotes'] = stepNotes # number of spectral shapes for the filter part self.SIMMParams['K'] = K_numFilters # number of elements in dictionary of smooth filters self.SIMMParams['P'] = P_numAtomFilters # number of chirped spectral shapes between each F0 # this feature should be further studied before # we find a good way of doing that. self.SIMMParams['chirpPerF0'] = 1 self.scopeAllowedHF0 = 4.0 / 1.0 # Create the harmonic combs, for each F0 between minF0 and maxF0: self.SIMMParams['F0Table'], WF0 = \ slf.generate_WF0_chirped(minF0=self.SIMMParams['minF0'], maxF0=self.SIMMParams['maxF0'], Fs=self.fs, Nfft=self.stftParams['NFT'],\ stepNotes=self.SIMMParams['stepNotes'],\ lengthWindow=\ self.stftParams['windowSizeInSamples'], Ot=0.25,\ perF0=self.SIMMParams['chirpPerF0'],\ depthChirpInSemiTone=.15, loadWF0=True,\ analysisWindow='sinebell') self.SIMMParams['WF0'] = WF0[:self.F, :] # ensure same size as SX # number of harmonic combs self.SIMMParams['NF0'] = self.SIMMParams['F0Table'].size # Normalization: # by max or by sum? self.SIMMParams['WF0'] = self.SIMMParams['WF0'] / \ np.outer(np.ones(self.F), \ np.sum(self.SIMMParams['WF0'], axis=0)) # for debug: if False: #DEBUG self.imageCanvas.ax.imshow(np.log(np.abs(self.XR)), aspect='auto', origin='lower') self.imageCanvas.draw() raise KeyboardInterrupt("Check these matrices !") if False: #DEBUG from IPython.Shell import IPShellEmbed ipshell = IPShellEmbed() ipshell() plt.figure() plt.imshow( np.log(self.SIMMParams['WF0']), aspect='auto', origin='lower', ) plt.figure() plt.imshow(np.log(np.abs(self.XR)), aspect='auto', origin='lower') # Create the dictionary of smooth filters, for the filter part of # the lead isntrument: self.SIMMParams['WGAMMA'] = \ slf.generateHannBasis(numberFrequencyBins=self.F, sizeOfFourier=self.stftParams['NFT'], Fs=self.fs, frequencyScale='linear', numberOfBasis=self.SIMMParams['P'], overlap=.75)