def new_numeral_captcha_on_words(self, fname): wordstr = wordstrgen.get_random_wordstr(self.wordbank, self.nwords) numstr = wordstrgen.get_random_numstr(self.nnums) ensure_dir('temp') #these are the filenames of the audio files wordaudio = speechsynth.make_audio(wordstr, 'words', './temp/') numaudio = speechsynth.make_audio(numstr, 'nums', './temp/') # read audio data wordaudio_data, fs_word, enc_word = wavread(wordaudio) numaudio_data, fs_num, enc_num = wavread(numaudio) wordaudio_data = ensure_equal_length(wordaudio_data, numaudio_data) # combine audio data modifying volumes captcha_audio = self.noise_vol * wordaudio_data + self.captcha_vol * numaudio_data outputfname = self.outputdir + fname if (os.path.exists(outputfname)): os.remove(outputfname) wavwrite(captcha_audio, outputfname, 22050) # return output filename and the answer return outputfname, prettify(numstr)
def main(): # import soundfile snd = wavread('trumpet.wav')[0] kick = wavread('kick.wav')[0] amb = wavread('amb.wav')[0] amb = amb * 0.8 # reduce gain of this soundfile a little bit print len(amb) #low_demo(snd, 10., 500.) #high_demo(snd, 10000., 10.) #allpass_demo(snd, 1000, -find_c(1000., fs), find_c(1000., fs), 1.0) #iir_comb_demo(kick, 100, 0.5, -0.5) t = len(amb) / fs period = 1.0 / fs t_v = arange(0.0, t, period) delayTime = 2.0 width = 1.0 freq = 1 breakPoint = (sin(2. * pi * freq * t_v)) #breakPoint = linspace(1, -1, len(amb)) #var_allpass_demo(snd, delayTime / 1000., width / 1000., -find_c(8000, fs), find_c(8000, fs), 1.0, breakPoint) #var_allpass_demo(amb, delayTime / 1000., width / 1000., 0.5, -0.5, 0.0, breakPoint) # flanger var_allpass_demo(amb, delayTime, width, 0.7, 0.7, 0.7, breakPoint)
def noise_reduce_test(): sample = wavread('../../sounds/single-bloop-trimmed.wav')[0] noise = wavread('../../sounds/single-bloop-noise.wav')[0] sample = bandpass(sample,30000,50000) t0 = time.time() sample = noise_reduce(sample,noise,NoiseReduceSettings()) print 'noise filter in time:', round(time.time() - t0,2) '''
def __init__(self, audio, mic_amount, trials, proc_number): logging.info('Starting.') self.proc_numer = proc_number # the magic of preparing audio data; from numpy arrays to flatten list with removed duplicated elements self.wave = wavread(audio)[ 0] # removing wav technical data; only audio data stays self.wave = [list(pair) for pair in self.wave] audio_data = numpy.array(self.wave) self.wave = list(audio_data.flatten()) self.wave = self.wave[::2] self.wave = numpy.array(self.wave).reshape(-1, 1) self.scale = 0.8 / max(self.wave) self.wave = numpy.multiply(self.scale, self.wave) self.sample = float(wavread(audio)[1]) print '\nSampling rate used: ' + str(self.sample) self.trials = trials self.__microphone_amount = mic_amount self.subArrays_X = [] self.subArrays_Y = [] self.subArrays_Z = [] self.element = [0] self.generate_combinations(X_dict, Y_dict, Z_dict, 4) #print self.subArrays X_receiver = [] Y_receiver = [] Z_receiver = [] indices_X = self.subArrays_X[0] indices_Y = self.subArrays_Y[0] indices_Z = self.subArrays_Z[0] for i, j, k in zip(indices_X, indices_Y, indices_Z): X_receiver.append(i) Y_receiver.append(j) Z_receiver.append(k) self.X = [X_receiver[i] for i in range(4)] self.Y = [Y_receiver[i] for i in range(4)] self.Z = [Z_receiver[i] for i in range(4)] self.sensor_positions = numpy.column_stack((self.X, self.Y, self.Z)) self.true_positions = numpy.zeros((self.trials, 3)) self.estimated_positions = numpy.zeros((self.trials, 3)) self.distances = [] self.time_delays = [] self.padding = [] print '\nReceiver Locations:' for i in range(len(self.X)): print 'Receiver ' + str(i + 1) + ': X: ' + str( self.X[i]) + ' Y: ' + str(self.Y[i]) + ' Z: ' + str( self.Z[i]) print '\n' logging.info('Inited core.')
def cut_silence_in_sound(source, target, rmsTreshhold=-40, WndSize=128): """ source : fsource audio file target : output sound This function cuts the silence at the begining and at the end of an audio file in order. It's usefull for normalizing the length of the audio stimuli in an experiment. The default parameters were tested with notmal speech. """ NbofWrittendFiles = 1 x, fs, enc = wavread(str(source)) index = 0 #Remove the silence at the begining while index + WndSize < len(x): DataArray = x[index:index + WndSize] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = lin2db(rms) index = 0.5 * WndSize + index if rms > rmsTreshhold: end = 0 beginning = index print beginning / 44100 break #Remove the silence at the end x, fs, enc = wavread(str(source)) WndSize = 128 index = 0 x = list(reversed(x)) while index + WndSize < len(x): DataArray = x[int(index):int(index + WndSize)] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = lin2db(rms) index = 0.5 * WndSize + index if rms > rmsTreshhold: end = 0 final = index print(len(x) - final) / 44100 break #write the sound source without silences x, fs, enc = wavread(str(source)) WndSize = 128 rmsTreshhold = -70 index = 0 name_of_source = str(os.path.basename(source)) name_of_source = os.path.splitext(name_of_source)[0] path, sourcename = os.path.split(source) wavwrite(x[beginning:len(x) - final], target, fs, enc='pcm24')
def main(): """ Main function for processing the specified soundfile through this reverb. """ parser = argparse.ArgumentParser(description='Artificial Reverb') parser.add_argument('soundfile', help='audio file to process', type=validInput) # the soundfile is the first agument, with parameter values to follow parser.add_argument('outfile', help='path to output file', type=validInput) parser.add_argument('-w', '--wetdry', default=0.2, type=float, help='amount of wet signal in the mix') parser.add_argument('-da', '--damping', default=0.25, type=float, help='amount of high frequency damping') parser.add_argument('-de', '--decay', default=0.4, type=float, help='amount of attentuation applied to signal to make it decay') parser.add_argument('-pd', '--predelay', default=30, type=float, help='amount of time before starting reverb') parser.add_argument('-b', '--bandwidth', default=0.6, type=float, help='amount of high frequency attentuation on input') parser.add_argument('-t', '--tankoffset', default=0, type=float, help='amount of time (ms) to increase the last tank delay time') # Parse the commandline arguments args = parser.parse_args() # Get the entire path and assign soundfile soundfilePath = os.path.join(os.getcwd(), args.soundfile) # From here on, x refers to the input signal x, sampleRate, wavType = wavread(soundfilePath) dry = x.copy() y = reverbTest(x, sampleRate, args.damping, args.decay, args.predelay, args.bandwidth, args.tankoffset) # Apply wet/dry mix output = dryWet(dry, y, args.wetdry) # Finally write the output file wavwrite(transpose(output), args.outfile, sampleRate)
def wavread(path): """ Wrapper around scikits functions Returns: wavdata, sample rate, encoding type See pyaudiolab or scikits.audiolab for more information """ return AUDIOLAB.wavread(path)
def computeFeaturesForFullSong(file_path, feature_list, pack_size): """ Computes each of the features (must be full_song features) for the song recording. This method is used for one shot computation of a songs features. :param file_path: :param features: :param pack_size: :return: a tuple of values with length = len(features). Each item is the resulting feature value corresponding to features[]. """ # will hold the evaluated feature values feature_values = [] raw_data, fs, enc = wavread(file_path) raw_chunks = chunks(raw_data, pack_size) for feature_name in feature_list: # print "Computing " + feature_name class_ = getattr(features, feature_name) if class_.requireFullSong is False: # ensure full song raise "Every feature must be a full song feature" feature = class_(raw_chunks) feature_values.append(feature.value) return feature_values
def sound_wav(): clf() (snd, sampFreq, nBits) = audiolab.wavread('temp.wav') wave_form = [] signal = snd[:,0] if (len(signal)) < 500000: timeArray = arange(0, float(len(signal)), 1) timeArray = timeArray / sampFreq wave_form = signal else: downsample_factor = len(signal) / 30000 i = 0 while i < len(signal): wave_form = wave_form + [signal[i]] i = i + downsample_factor timeArray = arange(0, float(len(wave_form)), 1) timeArray = timeArray * downsample_factor / sampFreq timeArray = timeArray * 1000 plot(timeArray, wave_form, color='k') ylabel('Amplitude') xlabel('Time (ms)') savefig('wave_form.png', bbox_inches=0) # show() # setup('skream.wav') # sound_wav() # teardown()
def __init__(self, filepath): self.filepath = filepath (self.audio_array, self.sample_rate, self.format) = wavread(filepath) self.name = os.path.basename(filepath) samples = len(self.audio_array) self.length = float(samples) / float(self.sample_rate)
def get_RMS_over_time(audio_file, window_size = 1024, in_db = True): """ parameters: audio_file : file to anlayse window_size : window size for FFT computing returns : time series with the RMS and the time warning : this function only works for mono files """ import glob from scikits.audiolab import wavread, aiffread from scipy import signal import numpy as np try: sound_in, fs, enc = aiffread(audio_file) except ValueError: sound_in, fs, enc = wavread(audio_file) begin = 0 values = [] time_tags = [] while (begin + window_size) < len(sound_in): data = sound_in[begin : begin + window_size] time_tag = (begin + (window_size / 2)) / np.float(fs) values.append(get_rms_from_data(data, in_db = in_db)) time_tags.append(time_tag) begin = begin + window_size return time_tags, values
def loadFiles(path): """reads wave files from path and returns dictionary with fields: - "name" - name of file - "nameGender" - a sex readed from filename - "signal" - numpy array with sound signal readed from file - "sampleRate" - sample rate of the file and dictionary that contains numbers of male and female voices """ print "reading files..." files = [ f for f in listdir(path) if isfile(join(path,f)) and splitext(f)[1] == ".wav" ] samples = [] maleCount = 0 femaleCount = 0 for f in files: p = path + '/' + f print "...", f data,rate,encoding=wavread(p) sig=[mean(d) for d in data] samples.append({'name': f, 'nameGender': f[-5:-4], 'signal': sig, 'sampleRate': rate}) if f[-5:-4] == "M": maleCount += 1 else: femaleCount += 1 counters = {"maleCount":maleCount, "femaleCount":femaleCount} return samples, counters
def __init__(self, filepath=None, units=None): """ Can be initialised from a ``filepath`` or from an array of Unit objects, or with no default audio - an empty container. If initialised with units or filepath, the data is parsed and ``self._calculate_metadata`` is called to populate ``Mosaic`` attributes. """ self.units = [] if filepath: self.filepath = filepath self.name = os.path.basename(filepath) if os.path.isfile(self.filepath): (self.data, self.sample_rate, type_format) = wavread(filepath) self._calculate_metadata() else: self.samples = 0 self.sample_rate = 44100 self.length = 0 self.data = None elif units: self.units = units self.data = self._make_data(units) self.sample_rate = 44100 self._calculate_metadata() else: self.samples = 0 self.sample_rate = 44100 self.length = 0 self.data = None
def _analyse(self, filepath): audio = to_mono(wavread(filepath)[0]) audio = audio.astype('float32') w = Windowing(type = 'hann') fft = FFT() # this gives us a complex FFT c2p = CartesianToPolar() # and this turns it into a pair (magnitude, phase) hfc_detect = OnsetDetection(method = 'hfc') complex_detect = OnsetDetection(method = 'complex') rms_detect = RMS() spec = Spectrum() #pd = PitchDetection() flux = Flux() pool = Pool() #wap = WarpedAutoCorrelation() # let's get down to business print 'Computing onset detection functions...' for frame in FrameGenerator(audio, frameSize = self.frame_size,\ hopSize = self.hop_size): mag, phase, = c2p(fft(w(frame))) spectrum = spec(w(frame)) f = flux(spectrum) #pitch = pd(spectrum) pool.add('hfc', hfc_detect(mag, phase)) pool.add('complex', complex_detect(mag, phase)) pool.add('rms', rms_detect(frame)) pool.add('flux', f) #pool.add('pitch', pitch[0]) #print pool['pitch'] #pool.add('autoc', wap(pool['pitch'])) return pool, audio
def estimate_f0s(self, audio_path): if not os.path.exists(audio_path): raise ValueError('Invalid audio path') x, fs, _ = wavread(audio_path) # make x mono if stereo if x.ndim > 1: _, n_channels = x.shape x = x.sum(axis=1)/n_channels X = self._stft(x, fs) # Section 2.1 Spectrally whiten the signal to suppress timbral information Y = self._spectral_whitening(X, fs) # perform iterative estimation of the fundamental periods in the audio file f0_estimations = self._iterative_est(Y, fs) # get notes which correspond to these frequency estimates notes = [] for frame_ests in f0_estimations: notes.append([self._freq_to_note(f) for f in frame_ests]) return f0_estimations, notes
def sibilant_detector(filename): """ The aim of this algorithm is to detect where are the parts in filename where the energy is maximal. This algorithm works as follows: 1- First compute the spectrogram 2- Then compute a gaussian curve centered in the frequency researched. Usually for sibilants it's around 6000 Hz 3- Multiply the spectrum and the gaussian in order to weight the spectrum 4- Mean all the resultant signal and normalize 5- The peaks in the resulting signal are the parts in time where the energy in the researched area is the most important. """ sound_data, fs, enc = wavread(filename) #Gaussian coefs sigma = 5 mu = 10000 # mean frequency NFFT = 512 #Spectre Pxx, freqs, bins, im = specgram(sound_data, NFFT=NFFT, noverlap=128, Fs=fs) show() #Siflantes detector nb_of_windows = Pxx.shape[1] nb_of_fft_coefs = Pxx.shape[0] #Compute the gaussian vector and plot weights = weighting_vector(nb_of_fft_coefs, sigma, mu, fs) f_wweights = np.linspace(0, fs / 2, len(weights), endpoint=True) plot(f_wweights, weights) show() fft_coeficients = np.zeros(nb_of_fft_coefs) sibilant_desc = [] weighted_ffts = [] #Multiply the weights and the spectrum and show the multiplication for i in range(nb_of_windows): weighted_fft = Pxx[:, i] * weights if len(weighted_ffts) == 0: weighted_ffts = weighted_fft else: weighted_ffts = np.c_[weighted_ffts, weighted_fft] sibilant_desc.append(sum(weighted_fft)) imshow(weighted_ffts, interpolation='nearest', aspect='auto') show() #Now mean the matrix to have only one descriptor sibilant_desc = [float(i) / max(sibilant_desc) for i in sibilant_desc] plot(sibilant_desc) show() #export audio max_index, max_value = max(enumerate(sibilant_desc), key=operator.itemgetter(1)) wavwrite(sound_data[(max_index - 5) * NFFT:(max_index + 5) * NFFT], 'test.wav', fs=44100)
def test_bad_wavread(self): """ Check wavread on bad file""" # Create a tmp audio file with non wav format, write some random data into it, # and check it can not be opened by wavread rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav') try: nbuff = 22050 noise = 0.1 * N.random.randn(nbuff) # Open the copy file for writing format = audio_format('aiff', 'pcm16') b = Sndfile(cfilename, 'w', format, 1, nbuff) b.write_frames(noise) b.close() b = Sndfile(cfilename, 'r') rcnoise = b.read_frames(nbuff) b.close() try: rnoise = wavread(cfilename)[0] raise Exception( "wavread on non wav file succeded, expected to fail") except ValueError, e: pass #print str(e) + ", as expected" finally: close_tmp_file(rfd, cfilename)
def test_bad_wavread(self): """ Check wavread on bad file""" # Create a tmp audio file with non wav format, write some random data into it, # and check it can not be opened by wavread rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav') try: nbuff = 22050 noise = 0.1 * N.random.randn(nbuff) # Open the copy file for writing format = audio_format('aiff', 'pcm16') b = Sndfile(cfilename, 'w', format, 1, nbuff) b.write_frames(noise) b.close() b = Sndfile(cfilename, 'r') rcnoise = b.read_frames(nbuff) b.close() try: rnoise = wavread(cfilename)[0] raise Exception("wavread on non wav file succeded, expected to fail") except ValueError, e: pass #print str(e) + ", as expected" finally: close_tmp_file(rfd, cfilename)
def feature_extraction(wav_fd, fe_fd): names = [na for na in os.listdir(wav_fd) if na.endswith('.wav')] names = sorted(names) for na in names: print na path = wav_fd + '/' + na wav, fs, enc = wavread( path ) if wav.ndim == 2: wav=np.mean(wav, axis=-1) ham_win = np.hamming(n_fft) [f, t, x] = signal.spectral.spectrogram(x=wav, window=ham_win, nperseg=n_fft, noverlap=0, detrend=False, return_onesided=True, mode='magnitude') x = x.T if globals().get('melW') is None: global melW melW = librosa.filters.mel(sr=fs, n_fft=n_fft, n_mels=64, fmin=0., fmax=22100) x = np.dot(x, melW.T) out_path = fe_fd + '/' + na[0:-4] + '.f' cPickle.dump(x, open(out_path, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL)
def load_data(self): """ Load the audio file data. This function works only for mono wav files! """ self._log("Loading audio data") # check the file can be read if self.file_path is None: raise AttributeError("File path is None") if not os.path.isfile(self.file_path): self._log(["File '%s' cannot be read", self.file_path], Logger.CRITICAL) raise OSError("File cannot be read") self._log("Loading wav file...") self.audio_data, self.audio_sample_rate, self.audio_format = wavread( self.file_path) self.audio_length = (float(len(self.audio_data)) / self.audio_sample_rate) self._log(["Sample length: %f", self.audio_length]) self._log(["Sample rate: %f", self.audio_sample_rate]) self._log(["Audio format: %s", self.audio_format]) self._log("Loading wav file... done")
def estimate_f0s(self, audio_path): if not os.path.exists(audio_path): raise ValueError('Invalid audio path') x, fs, _ = wavread(audio_path) # make x mono if stereo if x.ndim > 1: _, n_channels = x.shape x = x.sum(axis=1) / n_channels X = self._stft(x, fs) # Section 2.1 Spectrally whiten the signal to suppress timbral information Y = self._spectral_whitening(X, fs) # perform iterative estimation of the fundamental periods in the audio file f0_estimations = self._iterative_est(Y, fs) # get notes which correspond to these frequency estimates notes = [] for frame_ests in f0_estimations: notes.append([self._freq_to_note(f) for f in frame_ests]) return f0_estimations, notes
def open_wav_audiolab(self, filename): #http://scikits.appspot.com/audiolab from scikits.audiolab import wavread results, sample_frequency,encoding = wavread(filename) self.sample_rate = sample_frequency print 'Sample Rate is ', sample_frequency return results, self.sample_rate
def raw_specs(filestring): from scikits.audiolab import wavread import pylab import matplotlib.pyplot as plt import sys import os if (filestring.find('wav') > 0 or filestring.find('WAV') > 0): nomewav = os.path.basename(filestring) filename = os.path.splitext(nomewav)[0] maindir = "temp/"+filename+"/" for fnamefiles in os.listdir(maindir): if os.path.isdir(maindir + fnamefiles) or os.stat(maindir+fnamefiles).st_size == 0: print "not a file." else: if fnamefiles.find('wav') > 0 or fnamefiles.find('WAV') > 0: if not os.path.exists(maindir+"/Spec/"): os.makedirs(maindir+"/Spec/") signal, fs, enc = wavread(maindir+fnamefiles); NFFT = 256 # the length of the windowing segments Fs = int(300) # the sampling frequency pylab.figure(num=None, figsize=(4, 8),frameon=False) Pxx, freqs, bins, im = pylab.specgram(signal, NFFT=NFFT, Fs=Fs, noverlap=int(NFFT-1),cmap=pylab.cm.gist_heat) if fnamefiles.find('wav') > 0: figname = maindir+"/Spec/"+os.sep+fnamefiles.replace('wav','png') else: figname = maindir+"/Spec/"+os.sep+fnamefiles.replace('WAV','png') pylab.savefig(figname) plt.close('all') print "Spectrogramas gerados."
def convert_wav(File, ofile): import scikits.audiolab as audiolab from scikits.samplerate import resample # lastest scikits.audiolab include sound record lib, based on python-alsaaudio # if you want make the down sample rate using scipy.signal #import scipy.signal #using audiolab to read wav file Signal, fs = audiolab.wavread(File)[:2] #changing the original sample rate to 16000fs fast mode Signal = resample(Signal, fr/float(fs), 'sinc_best') #changing sample rate from audio file using scipy this is a bit slow #Signal=scipy.signal.resample(Signal,int(round(len(Getsignal)*fr)/float(fs)),window=None) # file Format type fmt = audiolab.Format('flac', 'pcm16') nchannels = 1 # convert into the file .flac ofile = audiolab.Sndfile(FileNameTmp, 'w', fmt, nchannels, fr) #writing in the file ofile.write_frames(Signal) # return ofile
def process(file): # read in the file f, sr, enc = wavread(file) # compute the fourier transform & compute the window times: D = librosa.stft(f) times = librosa.frames_to_samples(np.arange(D.shape[1])) # compute the onset strength envelope: env = librosa.onset.onset_strength(y=f, sr=sr) assert (len(times) == len(env)) # compute the onsets we are actually interested in, convert to samples: onsets = librosa.onset.onset_detect(y=f, sr=sr) onset_samps = librosa.frames_to_samples(onsets) assert (onset_samps[-1] <= len(f)) # create a lookup table for retrieving onset strenghts: lookup = [] prevval = 0 for v in onset_samps: for i in xrange(prevval, len(times)): if times[i] == v: lookup.append(i) prevval = i + 1 break # create an empty audio buffer (result): result = np.zeros(len(f)) # write envelope onset strength values at every onset point # computed by the envelope: for i in xrange(len(lookup)): result[onset_samps[i]] = env[lookup[i]] # write the result: wavwrite(result, file[:-4] + '_proc.wav', sr, enc) return
def gather_training_data(path=SAMPLE_PATH): instr_names = os.walk(path).next()[1] samples = dict() pitch_pattern = re.compile("([A-G][sb]?)(\d+)") # NOTE: Could potentially make subdirs for different qualities for instr in instr_names: #if instr not in ('guitar', 'trumpet'): continue instr_samples = [] instr_sample_dir = "%s\%s" % (SAMPLE_PATH, instr) for samp in [f for f in os.listdir(instr_sample_dir) \ if os.path.isfile(os.path.join(instr_sample_dir, f)) \ and os.path.splitext(f)[1].lower() == ".wav"]: data, fs, enc = skal.wavread("%s\%s" % (instr_sample_dir, samp)) matches = pitch_pattern.search(samp) assert matches is not None chroma, octave = matches.groups() chroma = canonical_chroma[chroma] # NOTE: It's quite possible that using a dictionary # instead of a list will be helpful, but we'll # cross that bridge when we get to it instr_samples.append( (data, chroma, octave) ) samples[instr] = instr_samples return samples
def normalize_target_audio(input_file='moviehires_endpos_beta02.imatsh.wav', sources_expr='/home/mkc/Music/GoldbergVariations/*48_1.wav', write_me=False, amp_factor=0.5, proc_audio=True): """ Per-variation normalization of concatenated imatsh file using individual sources as locators Assumes that the input_file and the source_dir have the same sample rate inputs: input_file - the file to be processed (locally normalized) sources_expr- regular expression for input files write_me - write output files when true [False] amp_factor - amplitude change factor (proportion of full scale normalization) [0.5] proc_audio - whether to process target audio using source audio info [1] outputs: sample_locators - sample locators for each variation audio_summaries - min, max, rms values for each variation output files: output_file = {input_file_stem}+'norm.'+{input_ext} """ # Compute min, max, rms per source file flist = glob.glob(sources_expr) flist.sort() sample_locators = [0] audio_summaries = [] ext_pos = input_file.rindex('.') outfile_stem, ext = input_file[:ext_pos], input_file[ext_pos+1:] for i,f in enumerate(flist): x,sr,fmt = skaud.wavread(f) print f, sr, fmt if(len(x.shape)>1): x = x[:,0] # Take left-channel only sample_locators.extend([len(x)]) audio_summaries.append([max(abs(x)), np.sqrt(np.mean(x**2))]) if proc_audio: y,sr_y,fmt_y = skaud.wavread(input_file, first=np.cumsum(sample_locators)[-2], last=sample_locators[-1]) if sr != sr_y: raise ValueError("input and source sample rates don't match: %d,%d"%(sr,sr_y)) audio_summaries.append([max(abs(y[:,0])), np.sqrt(np.mean(y[:,0]**2))]) max_val = audio_summaries[-1][0] rms_val = audio_summaries[-1][1] norm_cf = amp_factor / max_val + (1 - amp_factor) outfile = outfile_stem+'_%02d.%s'%(i+1,ext) max_amp_val = norm_cf * max_val rms_amp_val = norm_cf * rms_val print '%s: nrm=%05.2fdB, peak=%05.2fdB, *peak=%05.2fdB, rms=%05.2fdB, *rms=%05.2fdB'%( outfile, dB(norm_cf), dB(max_val), dB(max_amp_val), dB(rms_val), dB(rms_amp_val)) if(write_me): skaud.wavwrite(norm_cf*y, outfile, sr, fmt) return np.cumsum(sample_locators), np.array(audio_summaries)
def train_codebook(basedirectory, spectral, desired_fs, clfs, n_samples): """Train the codebooks. Arguments: :param basedirectory: root directory of the audio corpus :param spectral: Spectral feature extraction. Object should be picklable and implement the \c Spectral abc; i.e. provide a \c transform method. :param clfs: list of clusterers. valid clusterers have a \c fit method and a \c predict method. optionally, for soft vq, also implement a \c predict_proba method. :param n_samples: number of spectral frames to sample from the audio corpus. :returns: a list of Codebook objects, of same length as the output of spectral_func """ wavs = list(rglob(basedirectory, '*.wav')) np.random.shuffle(wavs) inds = None idx = 0 X = None for i, wav in enumerate(wavs): if i % 10 == 0 and i > 0: print 'samples: {3}/{4}; loading file: {0} ({1}/{2})'.format( wavs[i], i+1, len(wavs), X.shape[0], n_samples ) sig, fs, _ = audiolab.wavread(wav) start, stop = trim_silence(sig, fs) specs = spectral.transform(samplerate.resample(sig[start:stop], desired_fs/fs, 'sinc_best')) if inds is None: inds = [0] + list(np.cumsum([spec.shape[1] for spec in specs])) spec = np.hstack(specs) if idx + spec.shape[0] >= n_samples: spec = spec[:n_samples - idx, :] if X is None: X = spec else: X = np.vstack((X, spec)) idx += spec.shape[0] if idx >= n_samples: break cdbs = [Codebook(clf) for clf in clfs] for i, cdb in enumerate(cdbs): cdb.train(X[:, inds[i]:inds[i+1]]) return cdbs
def wav_to_aif(source, target): """ source : fsource audio file target : starget audio file """ x, fs, enc = wavread(str(file)) AifFileName = target wavwrite(x, AifFileName, fs, enc='pcm24')
def get_sound_duration(file): """ returns sound duration in seconds """ from scikits.audiolab import wavread sound_in, sr, pcm = wavread(file) return len(sound_in)/float(sr)
def set_filepath(self, path): """ When passed a valid wav file into ``path``, this file is read and the current data is replaced by this new data. """ self.filepath = path (self.data, self.sample_rate, self.format) = wavread(path) self.recalculate()
def analyzeWAV(inputFile): """ inputFile = .wav audiofile returns array of audiodata and the sampling rate """ data, fs, nbits = audiolab.wavread(inputFile) samplingRate = fs return [data, samplingRate]
def loadSignal(fileName): try: x, Fs, encFmt = al.wavread(fileName) except IOError: print('Could not import file "%s"' % sigPath) return None return (x, Fs)
def generateMfcc(wavFile): filteredFile = filtering(wavFile, 2800, 3400) audio, fs, enc = wavread(filteredFile) size = getFrameSize(filteredFile) ceps, mspec, spec = mfcc(audio, nwin=size, nfft=size, fs=fs, nceps=13) return ceps
def wavread(filename): """ wav, fs, nbits = wavread(filename) Read file FILENAME. WAV is a numpy array, FS is the sampling rate, and NBITS is the bit depth. """ return audiolab.wavread(filename)
def envelope(file, attack=1, release=10): # read in the file: f, sr, enc = wavread(file) env = Envelope() env.configure(attackTime=attack, releaseTime=release) result = env(essentia.array(f)) # wavwrite(result, file[:-4] + '_env.wav', sr, enc) return result.reshape(-1)
def get_rms_from_wav(audio_file): """ Returns the root-mean-square (power) of the audio buffer """ from scikits.audiolab import wavread data, fs, enc = wavread(audio_file) return get_rms_from_data(data)
def main(args): # Load up the song (must be .wav) into memory amp_data, fs, enc = wavread(args[0]) AMP_MAX = 10000 # list of BPMs to match BPMs = range(30, 180, 5) # period = (60 / x seconds / beat) * (44100 samples / second) = (60 / x) (44100) samples / beat # analyze 5 seconds from the middle of the song middle = len(amp_data)/2 five_seconds = [item[0] + item[1] for item in amp_data[middle:middle + 44100 * 5]] # compute the FFT of the 5 second portion N = len(five_seconds) song_fft = np.fft.fft(five_seconds)[0:N / 2] # cut off repeated FFT data numb_bands = 6 bands = [ [] for i in range(0, numb_bands)] # for i in range(len(song_fft)): # bands[get_freq_band_index(i, 8, len(song_fft))].append(song_fft[i]) hz_0 = 0 hz_200 = indexOfFFTFreq(200, len(song_fft)) #200 HZ hz_400 = indexOfFFTFreq(400, len(song_fft)) #400 HZ hz_800 = indexOfFFTFreq(800, len(song_fft)) #800 HZ hz_1600 = indexOfFFTFreq(1600, len(song_fft)) #1600 HZ hz_3200 = indexOfFFTFreq(3200, len(song_fft)) #3200 HZ hz_6400 = indexOfFFTFreq(6400, len(song_fft)) #6400 HZ bands[0] = song_fft[hz_0:hz_200] bands[1] = song_fft[hz_200:hz_400] bands[2] = song_fft[hz_400:hz_800] bands[3] = song_fft[hz_800:hz_1600] bands[4] = song_fft[hz_1600:hz_3200] bands[5] = song_fft[hz_3200:-1] bands[0] = bands[0] + list(reversed(bands[0])) bands[1] = bands[1] + list(reversed(bands[1])) bands[2] = bands[2] + list(reversed(bands[2])) bands[3] = bands[3] + list(reversed(bands[3])) bands[4] = bands[4] + list(reversed(bands[4])) bands[5] = bands[5] + list(reversed(bands[5])) fft_bands = [np.fft.ifft(band) for band in bands] fig, axs = plt.subplots(nrows=2, ncols=1) x = range(0, len(bands[0])) y = bands[0] axs[0].bar(x, y, facecolor='b', alpha=0.5, linewidth=1, width=1) axs[0].set_ylabel('') axs[0].set_xlabel('') plt.savefig("graphs/" + args[0].split('/')[-1] + "_graph.png")
def get_sound_without_silence(source, rmsTreshhold=-40, WndSize=128): """ source : source audio file This function returns a begining and end time tags for the begining and the end of audio in a file """ x, fs, enc = wavread(str(source)) index = 0 #Remove the silence at the begining while index + WndSize < len(x): DataArray = x[index:index + WndSize] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = lin2db(rms) index = 0.5 * WndSize + index if rms > rmsTreshhold: end = 0 beginning = index break #Remove the silence at the end x, fs, enc = wavread(str(source)) WndSize = 128 index = 0 x = list(reversed(x)) while index + WndSize < len(x): DataArray = x[int(index):int(index + WndSize)] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = lin2db(rms) index = 0.5 * WndSize + index if rms > rmsTreshhold: end = 0 final = index break #write the sound source without silences x, fs, enc = wavread(str(source)) WndSize = 128 rmsTreshhold = -70 index = 0 end = len(x) - final return beginning / fs, end / fs
def parse_audio_w(self): """ """ # data: raw audio data # fs: sample rate sig, fs = wavread(self.filename)[:2] return sig
def fastICA(mix_file, jamming_file): sig1, fs1, enc1 = wavread(mix_file) sig2, fs2, enc2 = wavread(jamming_file) sig1, sig2 = chop_sig(sig1, sig2) wavwrite(array([sig1, sig2]).T, "mixed.wav", fs1, enc1) # Load in the stereo file recording, fs, enc = wavread("mixed.wav") # Perform FastICA algorithm on the two channels sources = fastica(recording) # The output levels of this algorithm are arbitrary, so normalize them to 1.0. m = [] for k in sources: m.append(k[0]) # Write back to a file wavwrite(array(m), "sources.wav", fs, enc)
def main(args): # Load up the song (must be .wav) into memory amp_data, fs, enc = wavread(args[0]) AMP_MAX = 10000 # list of BPMs to match BPMs = range(30, 180, 5) # period = (60 / x seconds / beat) * (44100 samples / second) = (60 / x) (44100) samples / beat # analyze 5 seconds from the middle of the song middle = len(amp_data) / 2 five_seconds = [ item[0] + item[1] for item in amp_data[middle:middle + 44100 * 5] ] # compute the FFT of the 5 second portion N = len(five_seconds) song_fft = np.fft.fft(five_seconds)[0:N / 2] # cut off repeated FFT data numb_bands = 6 bands = [[] for i in range(0, numb_bands)] # for i in range(len(song_fft)): # bands[get_freq_band_index(i, 8, len(song_fft))].append(song_fft[i]) hz_0 = 0 hz_200 = indexOfFFTFreq(200, len(song_fft)) #200 HZ hz_400 = indexOfFFTFreq(400, len(song_fft)) #400 HZ hz_800 = indexOfFFTFreq(800, len(song_fft)) #800 HZ hz_1600 = indexOfFFTFreq(1600, len(song_fft)) #1600 HZ hz_3200 = indexOfFFTFreq(3200, len(song_fft)) #3200 HZ hz_6400 = indexOfFFTFreq(6400, len(song_fft)) #6400 HZ bands[0] = song_fft[hz_0:hz_200] bands[1] = song_fft[hz_200:hz_400] bands[2] = song_fft[hz_400:hz_800] bands[3] = song_fft[hz_800:hz_1600] bands[4] = song_fft[hz_1600:hz_3200] bands[5] = song_fft[hz_3200:-1] bands[0] = bands[0] + list(reversed(bands[0])) bands[1] = bands[1] + list(reversed(bands[1])) bands[2] = bands[2] + list(reversed(bands[2])) bands[3] = bands[3] + list(reversed(bands[3])) bands[4] = bands[4] + list(reversed(bands[4])) bands[5] = bands[5] + list(reversed(bands[5])) fft_bands = [np.fft.ifft(band) for band in bands] fig, axs = plt.subplots(nrows=2, ncols=1) x = range(0, len(bands[0])) y = bands[0] axs[0].bar(x, y, facecolor='b', alpha=0.5, linewidth=1, width=1) axs[0].set_ylabel('') axs[0].set_xlabel('') plt.savefig("graphs/" + args[0].split('/')[-1] + "_graph.png")
def samples(directory): for filename in map(lambda _: directory + _, os.listdir(directory)): signal, sample_frequency, _ = wavread(filename) if signal.ndim > 1: transposed = signal.transpose() signal = (transposed[0] + transposed[1])/2.0 yield (os.path.basename(filename)[4], signal, sample_frequency)
def wavopen(file): #file => character string from scikits.audiolab import wavread import numpy data, fs, encoding = wavread(file) temp = data.tolist() data = temp del temp t = int(numpy.ceil(len(data) / fs)) return data, fs, t, encoding
def main(args): # Load up the song (must be .wav) into memory amp_data, fs, enc = wavread(args[0]) AMP_MAX = 10000 # list of BPMs to match BPMs = range(30, 180, 5) # period = (60 / x seconds / beat) * (44100 samples / second) = (60 / x) (44100) samples / beat # analyze 5 seconds from the middle of the song middle = len(amp_data) / 2 five_seconds = [ item[0] + item[1] for item in amp_data[middle:middle + 44100 * 5] ] # compute the FFT of the 5 second portion N = len(five_seconds) song_fft = np.fft.fft(five_seconds)[0:N] # cut off repeated FFT data correlation_energies = [] # create FFT wave for each impulse wave generated from the corresponding BPM for bpm in BPMs: period = int((60.0 / bpm) * 44100) impulse_train = [0] * N # generate the impulse_train for k in range(0, N): if k % period == 0: impulse_train[k] = AMP_MAX imptrain_fft = np.fft.fft(impulse_train)[0:N] #ti[k] & tj[k] # calculate the correlation between the two waves imptrain_fft & song_fft correlation_energy = 0 for k in range(0, len(song_fft)): correlation_energy += abs(song_fft[k] * imptrain_fft[k]) correlation_energies.append(correlation_energy) #print str(bpm) + ": \t" + str(correlation_energy) largest_i = 0 for i in reversed(range(0, len(BPMs))): if correlation_energies[i] > correlation_energies[largest_i]: largest_i = i print "BPM: " + str(BPMs[largest_i]) fig, axs = plt.subplots(nrows=2, ncols=1) x = BPMs y = correlation_energies axs[0].bar(x, y, facecolor='b', alpha=0.5, linewidth=1, width=1) axs[0].set_ylabel('E_BPMs') axs[0].set_xlabel('BPM') plt.savefig("graphs/" + args[0].split('/')[-1] + "_graph.png")
def main(args): # Load up the song (must be .wav) into memory amp_data, fs, enc = wavread(args[0]) AMP_MAX = 10000 # list of BPMs to match BPMs = range(30, 180, 5) # period = (60 / x seconds / beat) * (44100 samples / second) = (60 / x) (44100) samples / beat # analyze 5 seconds from the middle of the song middle = len(amp_data)/2 five_seconds = [item[0] + item[1] for item in amp_data[middle:middle + 44100 * 5]] # compute the FFT of the 5 second portion N = len(five_seconds) song_fft = np.fft.fft(five_seconds)[0:N] # cut off repeated FFT data correlation_energies = [] # create FFT wave for each impulse wave generated from the corresponding BPM for bpm in BPMs: period = int((60.0 / bpm) * 44100) impulse_train = [0] * N # generate the impulse_train for k in range(0, N): if k % period == 0: impulse_train[k] = AMP_MAX imptrain_fft = np.fft.fft(impulse_train)[0:N] #ti[k] & tj[k] # calculate the correlation between the two waves imptrain_fft & song_fft correlation_energy = 0 for k in range(0, len(song_fft)): correlation_energy += abs(song_fft[k] * imptrain_fft[k]) correlation_energies.append(correlation_energy) #print str(bpm) + ": \t" + str(correlation_energy) largest_i = 0 for i in reversed(range(0, len(BPMs))): if correlation_energies[i] > correlation_energies[largest_i]: largest_i = i print "BPM: " + str(BPMs[largest_i]) fig, axs = plt.subplots(nrows=2, ncols=1) x = BPMs y = correlation_energies axs[0].bar(x, y, facecolor='b', alpha=0.5, linewidth=1, width=1) axs[0].set_ylabel('E_BPMs') axs[0].set_xlabel('BPM') plt.savefig("graphs/" + args[0].split('/')[-1] + "_graph.png")
def _read_wav_file(self): """ Simply read raw audio data into class var. """ fullsndpath = os.path.join(os.path.expanduser(self.rootpath), 'snd', self.filename) try: self.rawaudio, self.sr, self.fmt = wavread(fullsndpath) except IOError: return "IOError! WAV read failed!" return self.rawaudio
def file_preprocessing(path): data, fs, enc = wavread(path) signal = [mean(d) for d in data] f = wave.open(path, "r") frames = f.getnframes() fs = f.getframerate() f.close() return (signal, fs, frames)
def wavopen(file): #file => character string from scikits.audiolab import wavread import numpy data, fs, encoding = wavread(file) temp = data.tolist() data = temp del temp del encoding t = int(numpy.ceil(len(data) / fs)) return data, fs, t
def read_file(filename): from os import path ext = path.splitext(filename)[1].lower() if ext == WAV_EXT: amplitudes_array, sample_frequency, fmt = audiolab.wavread(filename) else: raise NotImplementedError( "Format '%s' not supported. Supported formats are: %s" % (ext, ', '.join(SUPPORTED_FORMATS))) return amplitudes_array, sample_frequency
def readfiles(files): ''' Given an array of file paths, reads all of the files and generates a list of audilfile objects as defined above. ''' allfiles = [] for f in files: data, sr, enc = wavread(f) allfiles.append(audiofile(data, sr, enc, f[:-4])) return allfiles
def retrieve_file_data(self): # removing header and second channel data wave = wavread(self.__audio)[0] wave = [list(pair) for pair in wave] audio_data = numpy.array(wave) wave = list(audio_data.flatten()) wave = wave[::2] wave = numpy.array(wave).reshape(-1, 1) scale = 0.8 / max(wave) self.__wave = numpy.multiply(scale, wave)
def file_misclass_error_printf(dnn_model, aux_model, which_layers, data_dir, file_list, filter_cutoff, dnn_save_file, aux_save_file): # closures def dnn_classify(X): batch = dnn_model.get_input_space().make_theano_batch() fprop = theano.function([batch], dnn_model.fprop(batch)) prediction = np.argmax(np.sum(fprop(X), axis=0)) return prediction def aux_classify(X): Xagg = aggregate_features(dnn_model, X, which_layers) prediction = np.argmax( np.bincount(np.array(aux_model.predict(Xagg), dtype='int'))) return prediction # filter coeffs b, a = sp.signal.butter(4, filter_cutoff / (22050. / 2.)) dnn_file = open(dnn_save_file, 'w') aux_file = open(aux_save_file, 'w') label_list = { 'blues': 0, 'classical': 1, 'country': 2, 'disco': 3, 'hiphop': 4, 'jazz': 5, 'metal': 6, 'pop': 7, 'reggae': 8, 'rock': 9 } for i, fname in enumerate(file_list): print 'Processing file {} of {}'.format(i + 1, len(file_list)) true_label = label_list[fname.split('/')[0]] x, _, _ = audiolab.wavread(os.path.join(data_dir, fname)) x = sp.signal.lfilter(b, a, x) X, _ = compute_fft(x) X = np.array(X[:, :513], dtype=np.float32) dnn_pred = dnn_classify(X) dnn_file.write('{fname}\t{true_label}\t{pred_label}\n'.format( fname=fname, true_label=true_label, pred_label=dnn_pred)) aux_pred = aux_classify(X) aux_file.write('{fname}\t{true_label}\t{pred_label}\n'.format( fname=fname, true_label=true_label, pred_label=aux_pred)) dnn_file.close() aux_file.close()
def createRings(filename, samples): data, fs, enc = wavread(filename) if data.ndim == 1: # mono maxValues = data else: # stereo maxValues = data.max(axis = 1) perRing = len(data)/samples steps = int((maxD - minD)/step) + 1 highest = [maxValues[(perRing*i):(perRing*(i+1))].max() for i in range(samples)] biggest = max(highest) perStep = biggest/steps return [int(math.ceil(x/perStep)) for x in highest]