def pyaudioread_wrapper(fn,tlast=-1,fs_target=-1,stats_only=False) : #internal use only try: import pyaudiolab as P except : import scikits.audiolab as P a=P.sndfile(fn) fs=a.get_samplerate() if stats_only : #return len_sec,fs,t_count triple return (int(round(float(a.get_nframes())/fs)),fs,a.get_channels()) if tlast==-1 : fcount=a.get_nframes() else : fcount = min(a.get_nframes(),int(fs*tlast+.5)) #print 'reading in first',fcount,'of',a.get_nframes() if fs_target>0 and not fs_target==fs : import pysamplerate as P #print 'Resampling' x=P.resample(a.read_frames(fcount),float(fs_target)/float(fs),P.converter_format('sinc_fastest'),verbose=False) fs=fs_target else : x=a.read_frames(fcount) a.close() return (x,fs)
def pyaudioread_wrapper(fn, tlast=-1, fs_target=-1, stats_only=False): #internal use only try: import pyaudiolab as P except: import scikits.audiolab as P a = P.sndfile(fn) fs = a.get_samplerate() if stats_only: #return len_sec,fs,t_count triple return (int(round(float(a.get_nframes()) / fs)), fs, a.get_channels()) if tlast == -1: fcount = a.get_nframes() else: fcount = min(a.get_nframes(), int(fs * tlast + .5)) #print 'reading in first',fcount,'of',a.get_nframes() if fs_target > 0 and not fs_target == fs: import pysamplerate as P #print 'Resampling' x = P.resample(a.read_frames(fcount), float(fs_target) / float(fs), P.converter_format('sinc_fastest'), verbose=False) fs = fs_target else: x = a.read_frames(fcount) a.close() return (x, fs)
def audioread(fn,mono=False,tlast=-1,fs_target=-1,stripzeros='none',stats_only=False,decoder='madplay') : #Reads several formats of audio #Flags: # mono: yields a mono sample # tlast: is desired last sample in sec # fs_target: is a target fs. resampling / decimation will be used to get the target rate # stripzeros: removes near-zero-values from beginning and/or end of signal # values can be 'none','both','leading','trailing' # stats_only: will return triple (n_sec,fs,num_channels) of song, ignoring stripzeros, fs_target and t_last # that is, we get the size and fs of the song as written on disk, not what we would get from # reading using audioread and supplied params. Note we get n_sec not n_samples because that # is faster. I don't think we know number of samples until decoding # decoder: pymad, madplay, mpg123; default is madplay because it uses same decoder as pymad but supports downsampling if not os.path.exists(fn) : raise IOError ('File %s not found' %fn ) stub=fn.lower().split('.') stub=stub[len(stub)-1] #print 'Decoder is',decoder if stub=='mp3' : #if pymad it is missing, default to mpg123 if decoder=='pymad' : try : import mad except : decoder='mpg123' #load stats by default using mad. otherwise try mp3info try : import mad #here we work around an oddity of mad that sometimes the sampling rate is wrong for the very first frame #this may be normal behavior but it seems odd to me. So we read once and then reset mf=mad.MadFile(fn) try : mf.seek_time(0) except : pass fs=mf.samplerate() if mf.mode() == mad.MODE_SINGLE_CHANNEL: channel_count=1 #print "single channel" elif mf.mode() == mad.MODE_DUAL_CHANNEL: channel_count=2; #print "dual channel" elif mf.mode() == mad.MODE_JOINT_STEREO: channel_count=2; #print "joint (MS/intensity) stereo" elif mf.mode() == mad.MODE_STEREO: channel_count=2; #print "normal L/R stereo" else: #print "unexpected mode value" channel_count=1 secs=int(mf.total_time()/1000.0) #print fn,'Total time',mf.total_time(),'fs',fs except : print 'Python mad library not loaded. Trying to use mp3info' try : cmd = 'mp3info -p \"%%Q %%S %%o\" %s' % slashify(fn) data=command_with_output(cmd) vals = data.split(' ') fs=int(vals[0]) secs=int(vals[1]) stereo_mono_mode=string.join(vals[2:len(vals)]) channel_count=1 if stereo_mono_mode.find('stereo')>=0: channel_count=2 except : raise ImportError, "You must load the python mad library or have mp3info available" if stats_only : return (secs,fs,channel_count) #mono and downsampling don't work with mpg321 if decoder=='mpg123' : print 'Warning mpg123 decoder doe not work very well compared to madplay' cmd=decoder if mono: cmd = '%s -m' % (cmd) channel_count=1 #force the channel count to be 1 if not fs_target==-1 : #mpg123 downsamples by 2:1 or 4:1 fsratio = float(fs)/float(fs_target) if fsratio >= 4.0 : cmd = '%s -%i' % (cmd,4) fs/=4 elif fsratio >=2.0 : cmd = '%s -%i' % (cmd,2) fs/=2 cmd = '%s -q -s %s' % (cmd,slashify(fn)) data=command_with_output(cmd) x = N.zeros(len(data)/2,'float') x[:] = N.fromstring(data,'short') / float(32768) if channel_count>1 : x=x.reshape(-1,channel_count) elif decoder=='madplay': import tempfile cmd=decoder #0.000357536896089 if not fs_target==-1 : #madplay downsamples by 2:1 #but the downsampling is low quality. maybe don't use it? fsratio = float(fs)/float(fs_target) if fsratio >=2.0 : cmd = '%s --downsample' % (cmd) fs/=2 if False: #use temporary file on disk (tmpfh,tmpfn)=tempfile.mkstemp() cmd = '%s -Q --output=raw:%s %s' % (cmd,tmpfn,slashify(fn)) #print 'Calling ',cmd tic=time.time() command_with_output(cmd) tmpfh=open(tmpfn,'r') data = N.fromfile(tmpfh,N.short) print 'Done calling',time.time()-tic else : #use stdout cmd = '%s -Q --output=raw:- %s' % (cmd,slashify(fn)) #print 'Running',cmd data=command_with_output(cmd) data=N.fromstring(data,'short') x = N.zeros(len(data),'float') x[:] = data/float(32768) #print 'Loaded',N.shape(x),'with mean',N.mean(x) #here we override the channel count from above using a time test #this is because some mp3s seem to return SINGLE_CHANNEL when they are in fact stereo mp3s if float(fs)*secs>0 : test_channel_count = int(round(len(x)/(float(fs)*secs))) if test_channel_count<>channel_count : print ' Overriding channel_count',channel_count,'with',test_channel_count,'for',fn channel_count=test_channel_count if channel_count>1 : x=x.reshape(-1,channel_count) elif decoder=='pymad' : samps_per_channel = (mf.samplerate() * mf.total_time()) /1000.0 channels=mf.mode() samples = samps_per_channel * channels x = N.zeros(((samps_per_channel+fs)*channels,1)) #we store 1 second at end of song st=0 while True : buf = if buf is None : break bsamps=len(buf)/2 if st+bsamps>samples : break x[st:st+bsamps,0]=N.fromstring(buf,'short') / float(32768) st+=bsamps x=x.reshape(-1,2) x=x[0:st/2,:] else: print ' unknown decoder',decoder sys.exit(0) if fs_target>0 and not fs==fs_target : #print 'audioio : resampling from',fs,'to',fs_target,'on x len',N.shape(x), import pysamplerate as P x=P.resample(x,float(fs_target)/float(fs),P.converter_format('sinc_fastest'),verbose=False) fs=fs_target if tlast>0 : if len(N.shape(x))==1 : x=x[0:tlast*float(fs)] else : x=x[0:tlast*float(fs),:] elif stub=='mid' or stub=='midi' : import pymidi as P mf=P.MidiFile(fn) if fs_target==-1 : fs_target=1000 (x,fs)=mf.timeseries(fs=fs_target) else : decoder='wav' if stats_only : return pyaudioread_wrapper(fn,tlast=tlast,fs_target=fs_target,stats_only=stats_only) (x,fs)= pyaudioread_wrapper(fn,tlast=tlast,fs_target=fs_target) if mono and len(N.shape(x))>1 : x=N.mean(x,1) #could this cause problems? #x=x.flatten() #now remove zeros assert(len(N.shape(x))<=2) #better be stereo or mono! if not (stripzeros=='leading' or stripzeros=='trailing' or stripzeros=='both' or stripzeros=='none') : print 'Invalid value for stripzeros; must be leading, trailing, both or none' print 'Setting to none' stripzeros='none' if not stripzeros=='none' : (x,svals) = strip_zeros(x,stripzeros) #print 'Loaded',fn,'length',len(x)/float(fs) if stripzeros=='none' : return (x,fs,[]) else : return (x,fs,svals)
def audioread(fn, mono=False, tlast=-1, fs_target=-1, stripzeros='none', stats_only=False, decoder='madplay'): #Reads several formats of audio #Flags: # mono: yields a mono sample # tlast: is desired last sample in sec # fs_target: is a target fs. resampling / decimation will be used to get the target rate # stripzeros: removes near-zero-values from beginning and/or end of signal # values can be 'none','both','leading','trailing' # stats_only: will return triple (n_sec,fs,num_channels) of song, ignoring stripzeros, fs_target and t_last # that is, we get the size and fs of the song as written on disk, not what we would get from # reading using audioread and supplied params. Note we get n_sec not n_samples because that # is faster. I don't think we know number of samples until decoding # decoder: pymad, madplay, mpg123; default is madplay because it uses same decoder as pymad but supports downsampling if not os.path.exists(fn): raise IOError('File %s not found' % fn) stub = fn.lower().split('.') stub = stub[len(stub) - 1] #print 'Decoder is',decoder if stub == 'mp3': #if pymad it is missing, default to mpg123 if decoder == 'pymad': try: import mad except: decoder = 'mpg123' #load stats by default using mad. otherwise try mp3info try: import mad #here we work around an oddity of mad that sometimes the sampling rate is wrong for the very first frame #this may be normal behavior but it seems odd to me. So we read once and then reset mf = mad.MadFile(fn) try: mf.seek_time(0) except: pass fs = mf.samplerate() if mf.mode() == mad.MODE_SINGLE_CHANNEL: channel_count = 1 #print "single channel" elif mf.mode() == mad.MODE_DUAL_CHANNEL: channel_count = 2 #print "dual channel" elif mf.mode() == mad.MODE_JOINT_STEREO: channel_count = 2 #print "joint (MS/intensity) stereo" elif mf.mode() == mad.MODE_STEREO: channel_count = 2 #print "normal L/R stereo" else: #print "unexpected mode value" channel_count = 1 secs = int(mf.total_time() / 1000.0) #print fn,'Total time',mf.total_time(),'fs',fs except: print 'Python mad library not loaded. Trying to use mp3info' try: cmd = 'mp3info -p \"%%Q %%S %%o\" %s' % slashify(fn) data = command_with_output(cmd) vals = data.split(' ') fs = int(vals[0]) secs = int(vals[1]) stereo_mono_mode = string.join(vals[2:len(vals)]) channel_count = 1 if stereo_mono_mode.find('stereo') >= 0: channel_count = 2 except: raise ImportError, "You must load the python mad library or have mp3info available" if stats_only: return (secs, fs, channel_count) #mono and downsampling don't work with mpg321 if decoder == 'mpg123': print 'Warning mpg123 decoder doe not work very well compared to madplay' cmd = decoder if mono: cmd = '%s -m' % (cmd) channel_count = 1 #force the channel count to be 1 if not fs_target == -1: #mpg123 downsamples by 2:1 or 4:1 fsratio = float(fs) / float(fs_target) if fsratio >= 4.0: cmd = '%s -%i' % (cmd, 4) fs /= 4 elif fsratio >= 2.0: cmd = '%s -%i' % (cmd, 2) fs /= 2 cmd = '%s -q -s %s' % (cmd, slashify(fn)) data = command_with_output(cmd) x = N.zeros(len(data) / 2, 'float') x[:] = N.fromstring(data, 'short') / float(32768) if channel_count > 1: x = x.reshape(-1, channel_count) elif decoder == 'madplay': import tempfile cmd = decoder #0.000357536896089 if not fs_target == -1: #madplay downsamples by 2:1 #but the downsampling is low quality. maybe don't use it? fsratio = float(fs) / float(fs_target) if fsratio >= 2.0: cmd = '%s --downsample' % (cmd) fs /= 2 if False: #use temporary file on disk (tmpfh, tmpfn) = tempfile.mkstemp() cmd = '%s -Q --output=raw:%s %s' % (cmd, tmpfn, slashify(fn)) #print 'Calling ',cmd tic = time.time() command_with_output(cmd) tmpfh = open(tmpfn, 'r') data = N.fromfile(tmpfh, N.short) print 'Done calling', time.time() - tic else: #use stdout cmd = '%s -Q --output=raw:- %s' % (cmd, slashify(fn)) #print 'Running',cmd data = command_with_output(cmd) data = N.fromstring(data, 'short') x = N.zeros(len(data), 'float') x[:] = data / float(32768) #print 'Loaded',N.shape(x),'with mean',N.mean(x) #here we override the channel count from above using a time test #this is because some mp3s seem to return SINGLE_CHANNEL when they are in fact stereo mp3s if float(fs) * secs > 0: test_channel_count = int(round(len(x) / (float(fs) * secs))) if test_channel_count <> channel_count: print ' Overriding channel_count', channel_count, 'with', test_channel_count, 'for', fn channel_count = test_channel_count if channel_count > 1: x = x.reshape(-1, channel_count) elif decoder == 'pymad': samps_per_channel = (mf.samplerate() * mf.total_time()) / 1000.0 channels = mf.mode() samples = samps_per_channel * channels x = N.zeros(((samps_per_channel + fs) * channels, 1)) #we store 1 second at end of song st = 0 while True: buf = if buf is None: break bsamps = len(buf) / 2 if st + bsamps > samples: break x[st:st + bsamps, 0] = N.fromstring(buf, 'short') / float(32768) st += bsamps x = x.reshape(-1, 2) x = x[0:st / 2, :] else: print ' unknown decoder', decoder sys.exit(0) if fs_target > 0 and not fs == fs_target: #print 'audioio : resampling from',fs,'to',fs_target,'on x len',N.shape(x), import pysamplerate as P x = P.resample(x, float(fs_target) / float(fs), P.converter_format('sinc_fastest'), verbose=False) fs = fs_target if tlast > 0: if len(N.shape(x)) == 1: x = x[0:tlast * float(fs)] else: x = x[0:tlast * float(fs), :] elif stub == 'mid' or stub == 'midi': import pymidi as P mf = P.MidiFile(fn) if fs_target == -1: fs_target = 1000 (x, fs) = mf.timeseries(fs=fs_target) else: decoder = 'wav' if stats_only: return pyaudioread_wrapper(fn, tlast=tlast, fs_target=fs_target, stats_only=stats_only) (x, fs) = pyaudioread_wrapper(fn, tlast=tlast, fs_target=fs_target) if mono and len(N.shape(x)) > 1: x = N.mean(x, 1) #could this cause problems? #x=x.flatten() #now remove zeros assert (len(N.shape(x)) <= 2) #better be stereo or mono! if not (stripzeros == 'leading' or stripzeros == 'trailing' or stripzeros == 'both' or stripzeros == 'none'): print 'Invalid value for stripzeros; must be leading, trailing, both or none' print 'Setting to none' stripzeros = 'none' if not stripzeros == 'none': (x, svals) = strip_zeros(x, stripzeros) #print 'Loaded',fn,'length',len(x)/float(fs) if stripzeros == 'none': return (x, fs, []) else: return (x, fs, svals)