def __call__(self, combo): c = np.array(combo) chunk_size = self.chunk_size db = self.db tt=transformFFT(frameSize=4096, hopSize=512, sampleRate=44100, window=blackmanharris) maxLength=0 for i in range(len(self.sources)): instlen = int(util.getMidiLength(self.sources_midi[i]+'_g'+self.style_midi[s],os.path.join(db,f))) if instlen>maxLength: maxLength = instlen if chunk_size>maxLength: chunk_size = maxLength for chnk in range(int(np.floor(maxLength/chunk_size))): chunk_start = chunk_size * chnk chunk_end = (chnk+1) * chunk_size if not os.path.isfile(os.path.join(feature_path,f,self.style[s],f+'_'+str(c)+'_'+str(chnk)+'.data')): try: for i in range(len(self.sources)): nframes = int(np.ceil(chunk_size*self.sampleRate / np.double(tt.hopSize))) + 2 size = int(chunk_size*self.sampleRate-int(np.max( c[:,0].astype(float))*self.sampleRate)) if self.sampleRate != 44100: print 'sample rate is not consistent' if i==0: audio = np.zeros((size,len(self.sources)+1)) melody,melodyBegin,melodyEnd,melNotes = util.getMidi(self.sources_midi[i]+'_g'+self.style_midi[s],os.path.join(db,f),chunk_start,chunk_end,self.sampleRate,tt.hopSize,tt.frameSize,c[i,0],c[i,0],nframes,1) #generate the audio, note by note for m in range(len(melNotes)): note = self.instruments[i].getNote(melNotes[m],self.allowed_dynamics[int(c[i,1])],self.allowed_styles[int(c[i,2])],int(c[i,3])) if note is None: raise GetOutOfLoop else: segment = note.getAudio(max_duration=melodyEnd[m]-melodyBegin[m]) if len(segment)>(len(audio)-int(np.floor(melodyBegin[m]*self.sampleRate))): audio[int(np.floor(melodyBegin[m]*self.sampleRate)):int(np.floor(melodyBegin[m]*self.sampleRate)+len(segment)),i+1] = segment[:len(audio)-int(np.floor(melodyBegin[m]*self.sampleRate))] else: audio[int(np.floor(melodyBegin[m]*self.sampleRate)):int(np.floor(melodyBegin[m]*self.sampleRate)+len(segment)),i+1] = segment segment = None note = None segment = None audio[:,0] = np.sum(audio[:,1:len(self.sources)+1],axis=1) tt.compute_transform(audio,os.path.join(feature_path,f,self.style[s],f+'_'+str(c).encode('base64','strict')+'_'+str(chnk)+'.data'),phase=False) audio = None melody= None except GetOutOfLoop: pass
else: feature_path = os.path.join(db, 'transforms', 't1') assert os.path.isdir( db ), "Please input the directory for the iKala dataset with --db path_to_iKala" tt = None for f in os.listdir(os.path.join(db, "Wavfile")): if f.endswith(".wav"): #read the audio file audioObj, sampleRate, bitrate = util.readAudioScipy( os.path.join(db, "Wavfile", f)) if tt is None: #initialize the transform object which will compute the STFT tt = transformFFT(frameSize=1024, hopSize=512, sampleRate=sampleRate, window=blackmanharris) pitchhop = 0.032 * float(sampleRate) #seconds to frames assert sampleRate == 44100, "Sample rate needs to be 44100" audio = np.zeros((audioObj.shape[0], 3)) audio[:, 0] = audioObj[:, 0] + audioObj[:, 1] #create mixture voice + accompaniment audio[:, 1] = audioObj[:, 1] #voice audio[:, 2] = audioObj[:, 0] #accompaniment audioObj = None #read pitches so they can be written as separate features
if kwargs.__getattribute__('skip'): skip = int(kwargs.__getattribute__('skip')) else: skip = False path_in = [] testfile_list = [] path_in = feature_path for f in sorted(os.listdir(db)): if os.path.isdir(os.path.join(db, f)) and f[0].isdigit(): testfile_list.append(f) #tt object needs to be the same as the one in compute_features tt = transformFFT(frameSize=4096, hopSize=512, sampleRate=44100, window=blackmanharris) ld1 = LargeDataset(path_transform_in=path_in, nsources=4, nsamples=nsamples, batch_size=batch_size, batch_memory=batch_memory, time_context=time_context, overlap=overlap, nprocs=nprocs, mult_factor_in=scale_factor, mult_factor_out=scale_factor) logging.info(" Maximum:\t\t{:.6f}".format(ld1.getMax())) logging.info(" Mean:\t\t{:.6f}".format(ld1.getMean())) logging.info(" Standard dev:\t\t{:.6f}".format(ld1.getStd()))
def __call__(self, combo): c = np.array(combo) chunk_size = self.chunk_size db = self.db feature_path = self.feature_path tt = transformFFT(frameSize=4096, hopSize=512, sampleRate=44100, window=blackmanharris) maxLength = 0 for i in range(len(self.sources)): instlen = util.getMidiLength( self.sources_midi[i] + '_g' + self.style_midi, db) if instlen > maxLength: maxLength = instlen if chunk_size > maxLength: chunk_size = maxLength for chnk in range(int(np.floor(maxLength / chunk_size))): chunk_start = float(chunk_size * chnk) chunk_end = float((chnk + 1) * chunk_size) if not os.path.isfile( os.path.join( feature_path, self.style, str(c).encode('base64', 'strict') + '_' + str(chnk) + '.data')): try: nelem_g = 1 for i in range(len(self.sources)): ng = util.getMidiNum( self.sources_midi[i] + '_g' + self.style_midi, db, chunk_start, chunk_end) nelem_g = np.maximum(ng, nelem_g) melody_g = np.zeros((len(self.sources), int(nelem_g), 2 * self.nharmonics + 3)) melody_e = np.zeros((len(self.sources), int(nelem_g), 2 * self.nharmonics + 3)) for i in range(len(self.sources)): nframes = int( np.ceil(chunk_size * self.sampleRate / np.double(tt.hopSize))) + 2 size = int(chunk_size * self.sampleRate - int( np.max(c[:, 0].astype(float)) * self.sampleRate)) if self.sampleRate != 44100: print 'sample rate is not consistent' if i == 0: audio = np.zeros((size, len(self.sources) + 1)) tmp = util.expandMidi( self.sources_midi[i] + '_g' + self.style_midi, db, chunk_start, chunk_end, self.interval, self.tuning_freq, self.nharmonics, self.sampleRate, tt.hopSize, tt.frameSize, c[i, 0], c[i, 0], nframes) melody_g[i, :tmp.shape[0], :] = tmp tmp = None tmp = util.expandMidi(self.sources_midi[i] + '_g' + self.style_midi, db, chunk_start, chunk_end, self.interval, self.tuning_freq, self.nharmonics, self.sampleRate, tt.hopSize, tt.frameSize, c[i, 0] + 0.2, c[i, 0] + 0.2, nframes, fermata=c[i, 0] + 0.5) melody_e[i, :tmp.shape[0], :] = tmp tmp = None #generate the audio, note by note for m in range(nelem_g): if melody_g[i, m, 2] > 0: note = self.instruments[i].getNote( melody_g[i, m, 2], self.allowed_dynamics[int(c[i, 1])], self.allowed_styles[int(c[i, 2])], int(c[i, 3])) if note is None: raise GetOutOfLoop else: segment = note.getAudio( max_duration=float(melody_g[i, m, 1] - melody_g[i, m, 0]) * tt.hopSize / self.sampleRate) if len(segment) > (len(audio) - int( np.floor(melody_g[i, m, 0] * tt.hopSize))): audio[int( np.floor(melody_g[i, m, 0] * tt.hopSize) ):int( np.floor(melody_g[i, m, 0] * tt.hopSize) + len(segment)), i + 1] = segment[:len(audio) - int( np.floor(melody_g[i, m, 0] * tt.hopSize))] else: audio[int( np.floor(melody_g[i, m, 0] * tt.hopSize) ):int( np.floor(melody_g[i, m, 0] * tt.hopSize) + len(segment)), i + 1] = segment segment = None note = None segment = None audio[:, 0] = np.sum(audio[:, 1:len(self.sources) + 1], axis=1) tt.compute_transform( audio, os.path.join( feature_path, self.style, str(c).encode('base64', 'strict') + '_' + str(chnk) + '.data'), phase=False) tt.saveTensor(melody_g, '__g_') tt.saveTensor(melody_e, '__e_') audio = None melody_g = None melody_e = None except GetOutOfLoop: pass
nsamples = int(kwargs.__getattribute__('nsamples')) else: nsamples = 0 if kwargs.__getattribute__('function'): function = kwargs.__getattribute__('function') else: function = 'build_ca' funcs = {'build_ca': build_ca} if function not in funcs: function = 'build_ca' path_in = [feature_path] #tt object needs to be the same as the one in compute_features tt = transformFFT(frameSize=1024, hopSize=512, sampleRate=44100, window=hanning) ld1 = LargeDatasetMulti(path_transform_in=path_in, nsources=4, nsamples=nsamples, batch_size=batch_size, batch_memory=batch_memory, time_context=time_context, overlap=overlap, nprocs=nprocs,mult_factor_in=scale_factor,mult_factor_out=scale_factor,\ sampleRate=tt.sampleRate,tensortype=theano.config.floatX) logging.info(" Maximum input:\t\t{:.6f}".format(ld1.getMax())) logging.info(" Minimum input:\t\t{:.6f}".format(ld1.getMin())) logging.info(" Mean input:\t\t{:.6f}".format(ld1.getMean())) logging.info(" Standard dev input:\t\t{:.6f}".format(ld1.getStd())) logging.info(" Maximum:\t\t{:.6f}".format(ld1.getMax(inputs=False))) logging.info(" Minimum:\t\t{:.6f}".format(ld1.getMin(inputs=False))) logging.info(" Mean:\t\t{:.6f}".format(ld1.getMean(inputs=False))) logging.info(" Standard dev:\t\t{:.6f}".format(ld1.getStd(inputs=False))) if not os.path.exists(os.path.join(output, 'output', model)): os.makedirs(os.path.join(output, 'output', model))