def test_simple(self): ofilename = join(TEST_DATA_DIR, 'test.wav') # Open the test file for reading a = sndfile(ofilename, 'read') nframes = a.get_nframes() buffsize = 1024 buffsize = min(nframes, buffsize) # First, read some frames, go back, and compare buffers buff = a.read_frames(buffsize) a.seek(0) buff2 = a.read_frames(buffsize) assert_array_equal(buff, buff2) a.close() # Now, read some frames, go back, and compare buffers # (check whence == 1 == SEEK_CUR) a = sndfile(ofilename, 'read') a.read_frames(buffsize) buff = a.read_frames(buffsize) a.seek(-buffsize, 1) buff2 = a.read_frames(buffsize) assert_array_equal(buff, buff2) a.close() # Now, read some frames, go back, and compare buffers # (check whence == 2 == SEEK_END) a = sndfile(ofilename, 'read') buff = a.read_frames(nframes) a.seek(-buffsize, 2) buff2 = a.read_frames(buffsize) assert_array_equal(buff[-buffsize:], buff2)
def test_basic_io(self): """ Check open, close and basic read/write""" # dirty ! ofilename = join(TEST_DATA_DIR, 'test.wav') rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav') try: nbuff = 22050 # Open the test file for reading a = sndfile(ofilename, 'read') nframes = a.get_nframes() # Open the copy file for writing format = audio_format('wav', 'pcm16') b = sndfile(fd, 'write', format, a.get_channels(), a.get_samplerate()) # Copy the data for i in range(nframes / nbuff): tmpa = a.read_frames(nbuff) assert tmpa.dtype == np.float b.write_frames(tmpa, nbuff) nrem = nframes % nbuff tmpa = a.read_frames(nrem) assert tmpa.dtype == np.float b.write_frames(tmpa, nrem) a.close() b.close() finally: close_tmp_file(rfd, cfilename)
def test_rw(self): """Test read/write pointers for seek.""" ofilename = join(TEST_DATA_DIR, 'test.wav') rfd, fd, cfilename = open_tmp_file('rwseektest.wav') try: ref = sndfile(ofilename, 'read') test = sndfile(fd, 'rwrite', format = ref._format, channels = ref.get_channels(), samplerate = ref.get_samplerate()) n = 1024 rbuff = ref.read_frames(n, dtype = np.int16) test.write_frames(rbuff) tbuff = test.read_frames(n, dtype = np.int16) assert_array_equal(rbuff, tbuff) # Test seeking both read and write pointers test.seek(0, 0) test.write_frames(rbuff) tbuff = test.read_frames(n, dtype = np.int16) assert_array_equal(rbuff, tbuff) # Test seeking only read pointer rbuff1 = rbuff.copy() rbuff2 = rbuff1 * 2 + 1 rbuff2.clip(-30000, 30000) test.seek(0, 0, 'r') test.write_frames(rbuff2) tbuff1 = test.read_frames(n, dtype = np.int16) try: tbuff2 = test.read_frames(n, dtype = np.int16) except IOError, e: msg = "write pointer was updated in read seek !" msg += "\n(msg is %s)" % e raise AssertionError(msg) assert_array_equal(rbuff1, tbuff1) assert_array_equal(rbuff2, tbuff2) if np.all(rbuff2 == tbuff1): raise AssertionError("write pointer was updated"\ " in read seek !") # Test seeking only write pointer rbuff3 = rbuff1 * 2 - 1 rbuff3.clip(-30000, 30000) test.seek(0, 0, 'rw') test.seek(n, 0, 'w') test.write_frames(rbuff3) tbuff1 = test.read_frames(n, np.int16) try: assert_array_equal(tbuff1, rbuff1) except AssertionError: raise AssertionError("read pointer was updated in write seek !") try: tbuff3 = test.read_frames(n, np.int16) except IOError, e: msg = "read pointer was updated in write seek !" msg += "\n(msg is %s)" % e raise AssertionError(msg)
def test_int_io(self): # TODO: check if neg or pos value is the highest in abs rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav') try: nb = 2**25 nbuff = 22050 fs = 22050 a = np.random.random_integers(-nb, nb, nbuff) a = a.astype(np.int32) # Open the file for writing format = audio_format('wav', 'pcm32') b = sndfile(fd, 'write', format, 1, fs) b.write_frames(a, nbuff) b.close() b = sndfile(cfilename, 'read') read_a = b.read_frames(nbuff, dtype=np.int32) b.close() assert_array_equal(a, read_a) finally: close_tmp_file(rfd, cfilename)
def test_int_io(self): # TODO: check if neg or pos value is the highest in abs rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav') try: nb = 2 ** 25 nbuff = 22050 fs = 22050 a = np.random.random_integers(-nb, nb, nbuff) a = a.astype(np.int32) # Open the file for writing format = audio_format('wav', 'pcm32') b = sndfile(fd, 'write', format, 1, fs) b.write_frames(a, nbuff) b.close() b = sndfile(cfilename, 'read') read_a = b.read_frames(nbuff, dtype = np.int32) b.close() assert_array_equal(a, read_a) finally: close_tmp_file(rfd, cfilename)
def make_rawaudio_segment(self, seg_time=1): ''' make audio segments each of which is length 1s, 2s, 3s, .... 10s. ''' samplerate = 0 stack = np.array(()) with closing(sndfile(self.filename)) as f: fs = f.get_samplerate() #number of samples in 1 second samplerate = fs num_samples = fs * seg_time # i th seconds total number samples nframes = f.get_nframes() j = 0 if num_samples < nframes: while j < nframes: if num_samples < (nframes - j): samples = f.read_frames(num_samples) if j == 0: stack = np.column_stack(np.array(samples)).T else: stack = np.column_stack((stack, np.array(samples))) j = j + num_samples else: samples = f.read_frames(nframes - j) # stack = np.column_stack((stack,samples)) j = j + num_samples else: samples = f.read_frames(nframes) stack = np.column_stack((samples)) return stack, samplerate
def create_png(input_filename, output_filename_w, output_filename_s, image_width, image_height, fft_size, f_max, f_min, wavefile, palette, channel): print "processing file %s:\n\t" % input_file, audio_file = audiolab.sndfile(input_filename, 'read') samples_per_pixel = audio_file.get_nframes() / float(image_width) nyquist_freq = (audio_file.get_samplerate() / 2) + 0.0 processor = AudioProcessor(audio_file, fft_size, channel, numpy.hanning) if wavefile==1: waveform = WaveformImage(image_width, image_height, palette) spectrogram = SpectrogramImage(image_width, image_height, fft_size, f_max, f_min, nyquist_freq, palette) for x in range(image_width): if x % (image_width/10) == 0: sys.stdout.write('.') sys.stdout.flush() seek_point = int(x * samples_per_pixel) next_seek_point = int((x + 1) * samples_per_pixel) (spectral_centroid, db_spectrum) = processor.spectral_centroid(seek_point) if wavefile==1: peaks = processor.peaks(seek_point, next_seek_point) waveform.draw_peaks(x, peaks, spectral_centroid) spectrogram.draw_spectrum(x, db_spectrum) if wavefile==1: waveform.save(output_filename_w) spectrogram.save(output_filename_s) print " done"
def main(): # Run some tests # Combine same-size arrays w/various offsets a = 1 * np.ones(3) b = 2 * np.ones(3) assert (crude_combine(a, b, 0, 0) == np.array([1.5, 1.5, 1.5])).all() assert (crude_combine(a, b, 0, 1) == np.array([2, 1.5, 1.5, 1])).all() assert (crude_combine(a, b, 1, 0) == np.array([1, 1.5, 1.5, 2])).all() assert (crude_combine(a, b, 0, 2) == np.array([2, 2, 1.5, 1, 1])).all() assert (crude_combine(a, b, 2, 0) == np.array([1, 1, 1.5, 2, 2])).all() assert (crude_combine(a, b, 0, 3) == np.array([])).all() assert (crude_combine(a, b, 3, 0) == np.array([])).all() # Now try it out with some audio import scikits.audiolab as audiolab s0 = audiolab.sndfile("AfterTheBattle01.wav") f0 = s0.read_frames(s0.get_nframes()) # Break into overlapping pieces f1 = f0[:200] f2 = f0[100:] # Recombine f3 = crude_combine(f1, f2, 100, 0) assert (f3 == f0).all()
def main(): # open original signal sound = audiolab.sndfile("AfterTheBattle01.wav") signal = sound.read_frames(sound.get_nframes()) cutoff_set = np.logspace(np.log10(500), np.log10(2e4), 20) offset_set = np.zeros(cutoff_set.size) i = 0 for cutoff in cutoff_set: print "Cutoff:", cutoff filtered = butter_lowpass(signal, cutoff) offset = find_offset(filtered, signal) print "Offset:", offset offset_set[i] = offset i += 1 # attempt to plot pyplot.subplot(2, 1, 1) pyplot.plot(cutoff_set, offset_set) pyplot.xscale('log') pyplot.xlabel("Lowpass butterworth cutoff freq") pyplot.ylabel("Frame offset from unfiltered signal") #pyplot.yscale('log') pyplot.show() # Save cutoff_set and offset_set to file pass
def test_nofile(self): """ Check the failure when opening a non existing file.""" try: f = sndfile("floupi.wav", "read") raise AssertionError("call to non existing file should not succeed") except IOError: pass except Exception, e: raise AssertionError("opening non existing file should raise a IOError exception, got %s instead" % e.__class__)
def show(self, example): sound = audiolab.sndfile(self.base + example.file) frames = sound.read_frames(sound.get_nframes()) * 0.8 mfcc = features.mfcc(frames[example.start:example.stop:2], fs=41000) print mfcc[0].shape fig = plt.figure() fig.set_size_inches(20, 20) ax = fig.add_subplot(111) ax.imshow(mfcc[0].transpose()[:, :100])
def show(self, example): sound = audiolab.sndfile(self.base + example.file) frames = sound.read_frames(sound.get_nframes()) * 0.8 mfcc = features.mfcc(frames[example.start: example.stop:2], fs=41000) print mfcc[0].shape fig = plt.figure() fig.set_size_inches(20, 20) ax = fig.add_subplot(111) ax.imshow(mfcc[0].transpose()[:, :100])
def test_float64(self): """Check float64 write/read works""" # dirty ! ofilename = join(TEST_DATA_DIR, 'test.wav') rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav') try: nbuff = 22050 # Open the test file for reading a = sndfile(ofilename, 'read') nframes = a.get_nframes() # Open the copy file for writing format = audio_format('wav', 'float64') b = sndfile(fd, 'write', format, a.get_channels(), a.get_samplerate()) # Copy the data in the wav file for i in range(nframes / nbuff): tmpa = a.read_frames(nbuff, dtype = np.float64) assert tmpa.dtype == np.float64 b.write_frames(tmpa, nbuff) nrem = nframes % nbuff tmpa = a.read_frames(nrem) b.write_frames(tmpa, nrem) a.close() b.close() # Now, reopen both files in for reading, and check data are # the same a = sndfile(ofilename, 'read') b = sndfile(cfilename, 'read') for i in range(nframes / nbuff): tmpa = a.read_frames(nbuff, dtype = np.float64) tmpb = b.read_frames(nbuff, dtype = np.float64) assert_array_equal(tmpa, tmpb) a.close() b.close() finally: close_tmp_file(rfd, cfilename)
def test_float64(self): """Check float64 write/read works""" # dirty ! ofilename = join(TEST_DATA_DIR, 'test.wav') rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav') try: nbuff = 22050 # Open the test file for reading a = sndfile(ofilename, 'read') nframes = a.get_nframes() # Open the copy file for writing format = audio_format('wav', 'float64') b = sndfile(fd, 'write', format, a.get_channels(), a.get_samplerate()) # Copy the data in the wav file for i in range(nframes / nbuff): tmpa = a.read_frames(nbuff, dtype=np.float64) assert tmpa.dtype == np.float64 b.write_frames(tmpa, nbuff) nrem = nframes % nbuff tmpa = a.read_frames(nrem) b.write_frames(tmpa, nrem) a.close() b.close() # Now, reopen both files in for reading, and check data are # the same a = sndfile(ofilename, 'read') b = sndfile(cfilename, 'read') for i in range(nframes / nbuff): tmpa = a.read_frames(nbuff, dtype=np.float64) tmpb = b.read_frames(nbuff, dtype=np.float64) assert_array_equal(tmpa, tmpb) a.close() b.close() finally: close_tmp_file(rfd, cfilename)
def show_specgram(file): """ Reads in wav and displays spectrogram """ sound = audiolab.sndfile(file, 'read') """ get_nframes() returns number of frames """ sound_info = sound.read_frames(sound.get_nframes()) spectrogram = specgram(sound_info) sound.close() show() return spectrogram
def create_png(input_filename, output_filename_w, image_width, image_height, channels, fft_size, f_max, f_min): print "processing file %s:\n\t" % input_filename audio_file = audiolab.sndfile(input_filename, 'read') samples_per_pixel = audio_file.get_nframes() / float(image_width) nyquist_freq = (audio_file.get_samplerate() / 2) + 0.0 processor = AudioProcessor(audio_file, fft_size, numpy.hanning) path_split = os.path.split(output_filename_w) for channel in range(channels): waveform = WaveformImage(image_width, image_height / channels) for x in range(image_width): if x % (image_width / 10) == 0: sys.stdout.write('.') sys.stdout.flush() seek_point = int(x * samples_per_pixel) next_seek_point = int((x + 1) * samples_per_pixel) (spectral_centroid, db_spectrum) = processor.spectral_centroid(seek_point, channel) peaks = processor.peaks(seek_point, next_seek_point, channel) waveform.draw_peaks(x, peaks, spectral_centroid) # If we have only one channel, don't bother with renaming if channels == 1: waveform.save(output_filename_w) else: waveform.save( os.path.join(path_split[0], str(channel) + path_split[1])) print " done" if channels > 1: combined = Image.new("RGBA", (image_width, image_height)) # Delete the segments for channel in range(channels): cur = Image.open( os.path.join(path_split[0], str(channel) + path_split[1])) combined.paste(cur, (0, channel * (image_height / channels))) os.remove(os.path.join(path_split[0], str(channel) + path_split[1])) combined.save(output_filename_w) print " done"
def get_framer_audio(filename, size, hop): from scikits import audiolab loader = audiolab.sndfile(filename) sr = loader.get_sampleRate() nframes = loader.get_nframes() nchannels = loader.get_channels() framer = framer_audio(loader, size, hop) return framer, sr, nframes, nchannels, loader
def test_nofile(self): """ Check the failure when opening a non existing file.""" try: f = sndfile("floupi.wav", "read") raise AssertionError( "call to non existing file should not succeed") except IOError: pass except Exception, e: raise AssertionError( "opening non existing file should raise a IOError exception, got %s instead" % e.__class__)
def processWav(filename, channel): """ filename: path to a wav file Channel: 1 for left, 2 for right Returns centroids, frequencies, volumes """ #open file audio_file = audiolab.sndfile(filename, 'read') #should be length of audiofile in seconds * 60. will fix this later import contextlib import wave with contextlib.closing(wave.open(filename, 'r')) as f: frames = f.getnframes() rate = f.getframerate() duration = frames / float(rate) duration *= 30 #30 data points for every second of audio yay duration = int( duration) #can only return an integer number of frames so yeah #print duration #Not really samples per pixel but I'll let that slide samples_per_pixel = audio_file.get_nframes() / float(duration) #some rule says this frequency has to be half of the sample rate nyquist_freq = (audio_file.get_samplerate() / 2) + 0.0 #fft_size stays 2048; smaller size == more efficient, fewer frequency samples processor = AudioProcessor(audio_file, 2048, channel, numpy.hanning) centroids = [] frequencies = [] volumes = [] for x in range(duration): seek_point = int(x * samples_per_pixel) next_seek_point = int((x + 1) * samples_per_pixel) (spectral_centroid, db_spectrum) = processor.spectral_centroid(seek_point) peaks = processor.peaks(seek_point, next_seek_point) centroids.append(spectral_centroid) frequencies.append(db_spectrum) volumes.append(peaks) #print "Centroids:" + str(centroids) #print "Frequencies:" + str(frequencies) #print "Volumes:" + str(volumes) #convert volumes[] from peaks to actual volumes for i in range(len(volumes)): volumes[i] = abs(volumes[i][0]) + abs(volumes[i][1]) #round frequencies to save resources for i in range(len(frequencies)): for j in range(len(frequencies[i])): frequencies[i][j] = round(frequencies[i][j], 4) return centroids, frequencies, volumes
def test_bigframes(self): """ Try to seek really far""" rawname = join(TEST_DATA_DIR, 'test.wav') a = sndfile(rawname, 'read') try: try: a.seek(2 ** 60) raise Exception("Seek really succeded ! This should not happen") except PyaudioIOError, e: pass finally: a.close()
def show_specgram(speech): sound = audiolab.sndfile(speech, 'read') sound_info = sound.read_frames(sound.get_nframes()) #spectrogram = plt.specgram(sound_info) mfcc = talkfeat.mfcc(sound_info) #print mfcc plt.imshow(mfcc[0].transpose()) plt.title('Spectrogram of %s' % sys.argv[1]) plt.show() sound.close()
def parse_audio(self): """ """ with closing(sndfile(self.filename)) as f: # print("sampling rate = {} Hz \nlength = {} samples\nchannels = {}\nencoding={}\nendianness={}\n".format(f.get_samplerate(), f.get_nframes(), f.get_channels(),f.get_encoding(),f.get_endianness())) sig = f.read_frames(f.get_nframes()) # self.plot_time_domain_signal(sig) # plt.plot(sig) # plt.show() return sig, f.get_samplerate(), f.get_channels()
def test_bigframes(self): """ Try to seek really far""" rawname = join(TEST_DATA_DIR, 'test.wav') a = sndfile(rawname, 'read') try: try: a.seek(2**60) raise Exception( "Seek really succeded ! This should not happen") except PyaudioIOError, e: pass finally: a.close()
def processWav(filename, channel): """ filename: path to a wav file Channel: 1 for left, 2 for right Returns centroids, frequencies, volumes """ #open file audio_file = audiolab.sndfile(filename, 'read') #should be length of audiofile in seconds * 60. will fix this later import contextlib import wave with contextlib.closing(wave.open(filename, 'r')) as f: frames = f.getnframes() rate = f.getframerate() duration = frames / float(rate) duration *= 30 #30 data points for every second of audio yay duration = int(duration) #can only return an integer number of frames so yeah #print duration #Not really samples per pixel but I'll let that slide samples_per_pixel = audio_file.get_nframes() / float(duration) #some rule says this frequency has to be half of the sample rate nyquist_freq = (audio_file.get_samplerate() / 2) + 0.0 #fft_size stays 2048; smaller size == more efficient, fewer frequency samples processor = AudioProcessor(audio_file, 2048, channel, numpy.hanning) centroids = [] frequencies = [] volumes = [] for x in range(duration): seek_point = int(x * samples_per_pixel) next_seek_point = int((x + 1) * samples_per_pixel) (spectral_centroid, db_spectrum) = processor.spectral_centroid(seek_point) peaks = processor.peaks(seek_point, next_seek_point) centroids.append(spectral_centroid) frequencies.append(db_spectrum) volumes.append(peaks) #print "Centroids:" + str(centroids) #print "Frequencies:" + str(frequencies) #print "Volumes:" + str(volumes) #convert volumes[] from peaks to actual volumes for i in range(len(volumes)): volumes[i] = abs(volumes[i][0]) + abs(volumes[i][1]) #round frequencies to save resources for i in range(len(frequencies)): for j in range(len(frequencies[i])): frequencies[i][j] = round(frequencies[i][j], 4) return centroids, frequencies, volumes
def create_png(input_filename, output_filename_w, image_width, image_height, channels, fft_size, f_max, f_min): print "processing file %s:\n\t" % input_filename audio_file = audiolab.sndfile(input_filename, 'read') samples_per_pixel = audio_file.get_nframes() / float(image_width) nyquist_freq = (audio_file.get_samplerate() / 2) + 0.0 processor = AudioProcessor(audio_file, fft_size, numpy.hanning) path_split = os.path.split(output_filename_w) for channel in range(channels): waveform = WaveformImage(image_width, image_height/channels) for x in range(image_width): if x % (image_width/10) == 0: sys.stdout.write('.') sys.stdout.flush() seek_point = int(x * samples_per_pixel) next_seek_point = int((x + 1) * samples_per_pixel) (spectral_centroid, db_spectrum) = processor.spectral_centroid(seek_point, channel) peaks = processor.peaks(seek_point, next_seek_point, channel) waveform.draw_peaks(x, peaks, spectral_centroid) # If we have only one channel, don't bother with renaming if channels == 1: waveform.save(output_filename_w) else: waveform.save(os.path.join(path_split[0], str(channel) + path_split[1])) print " done" if channels > 1: combined = Image.new("RGBA", (image_width, image_height)) # Delete the segments for channel in range(channels): cur = Image.open(os.path.join(path_split[0], str(channel) + path_split[1])) combined.paste(cur, (0, channel * (image_height/channels))) os.remove(os.path.join(path_split[0], str(channel) + path_split[1])) combined.save(output_filename_w) print " done"
def split_wav(wav_file, ref_file, is_save=False, dir_name="split"): from scikits.audiolab import formatinfo as format import scikits.audiolab as audiolab import shutil fr = audiolab.sndfile(wav_file, "read") n_channels = fr.get_channels() fmt = format("wav", fr.get_encoding()) fs = fr.get_samplerate() if is_save: shutil.rmtree(dir_name, ignore_errors=True) shutil.os.mkdir(dir_name) slices = dict() with open(ref_file, "r") as ref_read: for i, line in enumerate(ref_read.readlines()): fields = line.strip().split(" ") assert len(fields) == 3 begin = int(fields[0]) end = int(fields[1]) word = fields[2] fr.seek(begin) sli = fr.read_frames(end - begin + 1) if not slices.has_key(word): slices[word] = [] # if(len(sli) == 1): # print word, line slices[word].append(sli) if is_save: path = shutil.os.path.join(dir_name, str(i) + "-" + word + ".wav") afile = audiolab.sndfile(path, "write", fmt, n_channels, fs) afile.write_frames(sli, len(sli)) afile.close() return slices
def test_float_frames(self): """ Check nframes can be a float""" rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav') try: # Open the file for writing format = audio_format('wav', 'pcm16') a = sndfile(fd, 'rwrite', format, channels=1, samplerate=22050) tmp = np.random.random_integers(-100, 100, 1000) tmp = tmp.astype(np.short) a.write_frames(tmp, tmp.size) a.seek(0) a.sync() ctmp = a.read_frames(1e2, dtype=np.short) a.close() finally: close_tmp_file(rfd, cfilename)
def play_rand(file, name): sound = audiolab.sndfile(file) limit = sound.get_nframes() frames = sound.read_frames(sound.get_nframes()) * 0.8 if limit < size: return for i in range(5): start = random.randint(0, limit - size) print("Is this a voice? [(y)es, (n)o, " + "(s)kip/significant-portions-of-both/can't-tell, (r)eplay]") audiolab.play(frames[start:start + size][:, 0]) while True: input = raw_input() if input == "r": audiolab.play(frames[start:start + size][:, 0]) elif input in ["y", "n", "s"]: break print >> outfile, name, start, start + size, input
def play_rand(file, name): sound = audiolab.sndfile(file) limit = sound.get_nframes() frames = sound.read_frames(sound.get_nframes()) * 0.8 if limit < size: return for i in range(5): start = random.randint(0, limit - size) print("Is this a voice? [(y)es, (n)o, " + "(s)kip/significant-portions-of-both/can't-tell, (r)eplay]") audiolab.play(frames[start: start + size][:,0]) while True: input = raw_input() if input == "r": audiolab.play(frames[start: start + size][:,0]) elif input in ["y", "n", "s"]: break print >> outfile, name, start, start + size, input
def test_mismatch(self): # This test open a file for writing, but with bad args (channels and # nframes inverted) rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav') try: # Open the file for writing format = audio_format('wav', 'pcm16') try: b = sndfile(fd, 'write', \ format, channels = 22000, samplerate = 1) raise Exception("Try to open a file with more than 256 "\ "channels, this should not succeed !") except ValueError, e: #print "Gave %d channels, error detected is \"%s\"" % (22000, e) pass finally: close_tmp_file(rfd, cfilename)
def test_float_frames(self): """ Check nframes can be a float""" rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav') try: # Open the file for writing format = audio_format('wav', 'pcm16') a = sndfile(fd, 'rwrite', format, channels = 1, samplerate = 22050) tmp = np.random.random_integers(-100, 100, 1000) tmp = tmp.astype(np.short) a.write_frames(tmp, tmp.size) a.seek(0) a.sync() ctmp = a.read_frames(1e2, dtype = np.short) a.close() finally: close_tmp_file(rfd, cfilename)
def main(): # Run some tests # Combine same-size arrays w/various offsets a = 1 * numpy.ones(3) b = 2 * numpy.ones(3) assert (crude_combine(a, b, 0, 0) == numpy.array([1.5, 1.5, 1.5])).all() assert (crude_combine(a, b, 0, 1) == numpy.array([2, 1.5, 1.5, 1])).all() assert (crude_combine(a, b, 1, 0) == numpy.array([1, 1.5, 1.5, 2])).all() assert (crude_combine(a, b, 0, 2) == numpy.array([2, 2, 1.5, 1, 1])).all() assert (crude_combine(a, b, 2, 0) == numpy.array([1, 1, 1.5, 2, 2])).all() assert (crude_combine(a, b, 0, 3) == numpy.array([])).all() assert (crude_combine(a, b, 3, 0) == numpy.array([])).all() # Let's try with one bigger than the other a = 1 * numpy.ones(5) b = 2 * numpy.ones(2) assert (crude_combine(a, b, 0, 0) == numpy.array([1.5, 1.5, 1, 1, 1])).all() assert (crude_combine(a, b, 0, 1) == numpy.array([2, 1.5, 1, 1, 1, 1])).all() assert (crude_combine(a, b, 1, 0) == numpy.array([1, 1.5, 1.5, 1, 1])).all() # Now try it out with some audio import scikits.audiolab as audiolab s0 = audiolab.sndfile("AfterTheBattle01.wav") f0 = s0.read_frames(s0.get_nframes()) # Break into overlapping pieces f1 = f0[:200] f2 = f0[100:] # Recombine f3 = crude_combine(f1, f2, 100, 0) assert (f3 == f0).all() # Found a broken case # Fixed, now it passes f1 = f0 f2 = f0[f0.size - 160000 : f0.size - 160000 + 80000] f3 = crude_combine(f2, f1, 0, f1.size - 160000) assert (f3 == f0).all()
def the_soothing_sound_of_hash_collisions(): # it's a 3 minute song SECONDS_PER_FRAME = 4 TREE_SIZE = 256 frames = [] with open('/dev/urandom', 'rb') as fp: output = a.sndfile("merkle_chord.wav", format=a.formatinfo(), mode='write', channels=1, samplerate=44100) round_size = TREE_SIZE; round_hashes = [] # we will seed this with 2 * TREE_SIZE random values that will be used # by the algorithm to compute the leaves for i in range(0, TREE_SIZE * 2): this_seed = fp.read(32) round_hashes.append(this_seed) while (True): round_hashes = compute_tree_level(round_hashes) # we exhausted the tree by now if not round_hashes: break frame_frequencies = compute_frequencies_for_tree_level(round_hashes) print(frame_frequencies) frame_pcm = compute_and_flatten_notes(frame_frequencies, SECONDS_PER_FRAME) output.write_frames(frame_pcm) output.sync()
def play(self, example): sound = audiolab.sndfile(self.base + example.file) frames = sound.read_frames(sound.get_nframes()) * 0.8 audiolab.play(frames[example.start:example.stop][:, 0])
def create_png(input_filename, output_filename_w, output_filename_s, image_width, image_height, fft_size, f_max, f_min, wavefile, palette, channel): """ Given command line arguments this basically does everything. WHAT I HAVE GATHERED: db_spectrum has the frequencies of the sound file. spectral_centroid tells us what the color of the sound is. peaks tell us what the amplitude of the sound is. Should be trivial to adapt this from image output to output to our JavaScript visualizer now. """ print "processing file %s:\n\t" % input_file, audio_file = audiolab.sndfile( input_filename, 'read') #opens the wavfile; audio_file is an object now samples_per_pixel = audio_file.get_nframes() / float(image_width) nyquist_freq = (audio_file.get_samplerate() / 2) + 0.0 """ Initializes AudioProcessor class, which does FFT analysis and spits out amplitudes and frequencies to the SpectrogramImage and WaveformImage classes below later. For a stereo wav file, this selects a single channel to analyze. We might want to analyze both channels to give more input to the visualizer,though. """ processor = AudioProcessor(audio_file, fft_size, channel, numpy.hanning) if wavefile == 1: waveform = WaveformImage(image_width, image_height, palette) spectrogram = SpectrogramImage(image_width, image_height, fft_size, f_max, f_min, nyquist_freq, palette) for x in range(image_width): #shows progress if x % (image_width / 10) == 0: sys.stdout.write('.') sys.stdout.flush() seek_point = int(x * samples_per_pixel) next_seek_point = int((x + 1) * samples_per_pixel) (spectral_centroid, db_spectrum) = processor.spectral_centroid(seek_point) #let's have a look at the spectral centroid and the db_spectrum #print "Spectral Centroid:" + str(spectral_centroid) #print "DB Spectrum:" + str(db_spectrum) if wavefile == 1: #aha! The peaks and spectral centroid make up the waveform. #Since the spectral centroid indicates timbre (often referred to as color), #it's probably what colors the waveform. peaks = processor.peaks(seek_point, next_seek_point) #let's have a look at these peaks #print "Peaks:" + str(peaks) waveform.draw_peaks(x, peaks, spectral_centroid) spectrogram.draw_spectrum(x, db_spectrum) if wavefile == 1: waveform.save(output_filename_w) spectrogram.save(output_filename_s) print " done"
#!/usr/bin/env python from numpy import * from scipy import * # after numpy to import scipy's fft from scikits import audiolab filename = 'audio/lv1.aif' fft_window_size = 512 window_size = 2 * fft_window_size audiofile = audiolab.sndfile(filename) samples = audiofile.read_frames(audiofile.get_nframes()) # pad to right size so we can reshape it easily extras = repeat(0, window_size - len(samples) % window_size) samples = append(samples, extras) fft_shape = (samples.size / window_size, window_size) fft_windows = map(fft, samples.reshape(fft_shape)) print "got %d fft windows" % len(fft_windows)
def test_rw(self): """Test read/write pointers for seek.""" ofilename = join(TEST_DATA_DIR, 'test.wav') rfd, fd, cfilename = open_tmp_file('rwseektest.wav') try: ref = sndfile(ofilename, 'read') test = sndfile(fd, 'rwrite', format=ref._format, channels=ref.get_channels(), samplerate=ref.get_samplerate()) n = 1024 rbuff = ref.read_frames(n, dtype=np.int16) test.write_frames(rbuff) tbuff = test.read_frames(n, dtype=np.int16) assert_array_equal(rbuff, tbuff) # Test seeking both read and write pointers test.seek(0, 0) test.write_frames(rbuff) tbuff = test.read_frames(n, dtype=np.int16) assert_array_equal(rbuff, tbuff) # Test seeking only read pointer rbuff1 = rbuff.copy() rbuff2 = rbuff1 * 2 + 1 rbuff2.clip(-30000, 30000) test.seek(0, 0, 'r') test.write_frames(rbuff2) tbuff1 = test.read_frames(n, dtype=np.int16) try: tbuff2 = test.read_frames(n, dtype=np.int16) except IOError, e: msg = "write pointer was updated in read seek !" msg += "\n(msg is %s)" % e raise AssertionError(msg) assert_array_equal(rbuff1, tbuff1) assert_array_equal(rbuff2, tbuff2) if np.all(rbuff2 == tbuff1): raise AssertionError("write pointer was updated"\ " in read seek !") # Test seeking only write pointer rbuff3 = rbuff1 * 2 - 1 rbuff3.clip(-30000, 30000) test.seek(0, 0, 'rw') test.seek(n, 0, 'w') test.write_frames(rbuff3) tbuff1 = test.read_frames(n, np.int16) try: assert_array_equal(tbuff1, rbuff1) except AssertionError: raise AssertionError( "read pointer was updated in write seek !") try: tbuff3 = test.read_frames(n, np.int16) except IOError, e: msg = "read pointer was updated in write seek !" msg += "\n(msg is %s)" % e raise AssertionError(msg)
import numpy as N import scikits.audiolab as audiolab filename = 'test.wav' a = audiolab.sndfile(filename, 'read') tmp = a.read_frames(1e4) float_tmp = a.read_frames(1e4, dtype = N.float32) import pylab as P P.plot(tmp[:])
# This example plts 2 wav files, one above the other import numpy import scikits.audiolab as audiolab import pylab s1 = audiolab.sndfile("AfterTheBattle01.wav") s2 = audiolab.sndfile("AfterTheBattle02.wav") f1 = s1.read_frames(s1.get_nframes()) f2 = s2.read_frames(s2.get_nframes()) # subplot(nrows, ncols, plot_number) # f1 will be plotted above f2 pylab.subplot(2, 1, 1) pylab.plot(f1) pylab.subplot(2, 1, 2) pylab.plot(f2) pylab.show()
def create_png(input_filename, output_filename_w, output_filename_s, image_width, image_height, fft_size, f_max, f_min, wavefile, palette, channel): """ Given command line arguments this basically does everything. WHAT I HAVE GATHERED: db_spectrum has the frequencies of the sound file. spectral_centroid tells us what the color of the sound is. peaks tell us what the amplitude of the sound is. Should be trivial to adapt this from image output to output to our JavaScript visualizer now. """ print "processing file %s:\n\t" % input_file, audio_file = audiolab.sndfile(input_filename, 'read') #opens the wavfile; audio_file is an object now samples_per_pixel = audio_file.get_nframes() / float(image_width) nyquist_freq = (audio_file.get_samplerate() / 2) + 0.0 """ Initializes AudioProcessor class, which does FFT analysis and spits out amplitudes and frequencies to the SpectrogramImage and WaveformImage classes below later. For a stereo wav file, this selects a single channel to analyze. We might want to analyze both channels to give more input to the visualizer,though. """ processor = AudioProcessor(audio_file, fft_size, channel, numpy.hanning) if wavefile==1: waveform = WaveformImage(image_width, image_height, palette) spectrogram = SpectrogramImage(image_width, image_height, fft_size, f_max, f_min, nyquist_freq, palette) for x in range(image_width): #shows progress if x % (image_width/10) == 0: sys.stdout.write('.') sys.stdout.flush() seek_point = int(x * samples_per_pixel) next_seek_point = int((x + 1) * samples_per_pixel) (spectral_centroid, db_spectrum) = processor.spectral_centroid(seek_point) #let's have a look at the spectral centroid and the db_spectrum #print "Spectral Centroid:" + str(spectral_centroid) #print "DB Spectrum:" + str(db_spectrum) if wavefile==1: #aha! The peaks and spectral centroid make up the waveform. #Since the spectral centroid indicates timbre (often referred to as color), #it's probably what colors the waveform. peaks = processor.peaks(seek_point, next_seek_point) #let's have a look at these peaks #print "Peaks:" + str(peaks) waveform.draw_peaks(x, peaks, spectral_centroid) spectrogram.draw_spectrum(x, db_spectrum) if wavefile==1: waveform.save(output_filename_w) spectrogram.save(output_filename_s) print " done"
def frames_from_wav(filename): sound = audiolab.sndfile(filename) return sound.read_frames(sound.get_nframes())
#!python import numpy as np import scikits.audiolab as audiolab import pylab as plt import mel from scipy.fftpack import dct sound = audiolab.sndfile('prueba.wav') data = sound.read_frames(sound.get_samplerate()) data = data[:, 0] mfccs = [] for i in range(len(data) / 512 - 1): win = data[512 * i:512 * i + 1024] s = np.fft.rfft(win * ham, 512) p = (s.real**2 + s.imag**2) / len(win) m = np.log(np.dot(p, mel.MELfilterbank_speech).clip(1e-5, np.inf)) d = dct(m) mfcc = d[1:13] mfccs.append(mfcc) plt.imshow(np.array(mfccs).T) plt.show()
import scikits.audiolab as audiolab filename = 'test.wav' a = audiolab.sndfile(filename, 'read') print a
def test_basic_io_fd(self): """ Check open from fd works""" ofilename = join(TEST_DATA_DIR, 'test.wav') fd = os.open(ofilename, os.O_RDONLY) hdl = sndfile(fd, 'read') hdl.close()
import scikits.audiolab as audiolab a = audiolab.sndfile('test.wav', 'read') data = a.read_frames(1000) a.close()
import scikits.audiolab as al import numpy as np fps = 30.0 nband = 256 fft_size = 2**11 bandsize = fft_size/(2*nband) w = al.sndfile('loop.wav', 'read') sr = w.get_samplerate() ns = w.get_nframes() s = w.read_frames(ns) binsize = int(sr/fps) nbins = int(ns/binsize) window = 2*binsize tmp = np.zeros(window+len(s), dtype=np.float32) tmp[binsize:-binsize] = s[:,0] #might want to avg channels? s = tmp ffts = np.zeros((nbins, nband), dtype=np.float32) print nbins for i in xrange(nbins): tmp = s[i*binsize:i*binsize+window] clip = (window-fft_size)/2 tmp = tmp[clip:-clip] * np.hanning(fft_size) tmp2 = np.zeros(nband, dtype=np.float32) fft_tmp = np.fft.fft(tmp)[:fft_size/2].real**2 for j in xrange(nband): tmp2[j] = np.log(np.average(fft_tmp[j*bandsize:(j+1)*bandsize])) ffts[i] = tmp2 bpm = 120.0
from tempfile import mkstemp from os import remove import numpy as N from scikits.audiolab import formatinfo as format import scikits.audiolab as audiolab # Create a temp file in the system temporary dir, and always remove # it at the end cd, filename = mkstemp('tmptest.wav') try: fmt = format('wav', 'pcm24') nchannels = 2 fs = 44100 afile = audiolab.sndfile(filename, 'write', fmt, nchannels, fs) # Create a stereo white noise, with Gaussian distribution tmp = 0.1 * N.random.randn(1000, nchannels) # Write the first 500 frames of the signal # Note that the write_frames method uses tmp's numpy dtype to determine how # to write to the file; sndfile also converts the data on the fly if necessary afile.write_frames(tmp, 500) afile.close() # Let's check that the written file has the expected meta data afile = audiolab.sndfile(filename, 'read') assert(afile.get_samplerate() == fs) assert(afile.get_channels() == nchannels)
def test_raw(self): rawname = join(TEST_DATA_DIR, 'test.raw') format = audio_format('raw', 'pcm16', 'little') a = sndfile(rawname, 'read', format, 1, 11025) assert a.get_nframes() == 11290 a.close()
from tempfile import mkstemp from os import remove import numpy as N from scikits.audiolab import formatinfo as format import scikits.audiolab as audiolab # Create a temp file in the system temporary dir, and always remove # it at the end cd, filename = mkstemp('tmptest.wav') try: fmt = format('wav', 'pcm24') nchannels = 2 fs = 44100 afile = audiolab.sndfile(filename, 'write', fmt, nchannels, fs) # Create a stereo white noise, with Gaussian distribution tmp = 0.1 * N.random.randn(1000, nchannels) # Write the first 500 frames of the signal # Note that the write_frames method uses tmp's numpy dtype to determine how # to write to the file; sndfile also converts the data on the fly if necessary afile.write_frames(tmp, 500) afile.close() # Let's check that the written file has the expected meta data afile = audiolab.sndfile(filename, 'read') assert (afile.get_samplerate() == fs) assert (afile.get_channels() == nchannels)
import matplotlib.pyplot as plt import scikits.audiolab as audiolab sound = audiolab.sndfile('Violin_for_spectogram.ogg', 'read') y = sound.read_frames(sound.get_nframes()) Pxx, freqs, bins, im = plt.specgram(y, NFFT=512, Fs=44100) plt.xlim(0, len(y) / 44100.0) plt.ylim(0, 22050.0) plt.colorbar(im).set_label(u'Intensidad (dB') plt.xlabel(u'Tiempo(s') plt.ylabel(u'Frecuencia(Hz)')