def test_buf_size_not_power_of_two(self): win_s = 320 hop_s = win_s // 2 try: with self.assertRaises(RuntimeError): pvoc(win_s, hop_s) except AssertionError: # when compiled with fftw3, aubio supports non power of two fft sizes self.skipTest('creating aubio.pvoc with size %d did not fail' % win_s)
def mfcc(self, **options): nFilters = options.get("nFilters") or 40 # must be 40 for mfcc nCoeffs = options.get("nCoefs") or 13 sourceBuffer = source(self.audioFilename, self.samplerate, self.hopSize) pvocBuffer = pvoc(self.fftWindowSize, self.hopSize) mfccBuffer = mfcc(self.fftWindowSize, nFilters, nCoeffs, self.samplerate) mfccs = np.zeros([nCoeffs, ]) timings = [] frames = [] totalFrames = 0 while True: samples, read = sourceBuffer() spec = pvocBuffer(samples) mfcc_out = mfccBuffer(spec) mfccs = np.vstack((mfccs, mfcc_out)) totalFrames += read timings += [float(totalFrames) / self.samplerate] frames += [totalFrames] if read < self.hopSize: break return mfccs
def Extract_MFCC(self,player_pos_x,player_pos_y): import sys from aubio import source, pvoc, mfcc from numpy import vstack, zeros, diff player=[player_pos_x,player_pos_y] target=[target_position_x,target_position_y] distance=scipy.spatial.distance.euclidean(player, target) n_filters = 40 # must be 40 for mfcc n_coeffs = 13 source_filename = './Audios/Hello.wav' samplerate = 44100 win_s = 512 hop_s = 128 s = source(source_filename, samplerate, hop_s) samplerate = s.samplerate p = pvoc(win_s, hop_s) m = mfcc(win_s, n_filters, n_coeffs, samplerate) mfccs = zeros([n_coeffs, ]) frames_read = 0 while True: samples, read = s() spec = p(samples) mfcc_out = m(spec) mfccs = vstack((mfccs, mfcc_out)) frames_read += read if read < hop_s: break if(distance==0): factor=1 else: factor=1/distance return mfccs*factor
def vocoder(file_id): in_file = '/tmp/don_robot/output/' + file_id + ".mp3" out_file = '/tmp/don_robot/output/' + file_id + "-voc.mp3" samplerate = 44100 f = source(in_file, samplerate, 256) g = sink(out_file, samplerate) total_frames, read = 0, 256 win_s = 512 # fft size hop_s = win_s // 2 # hop size pv = pvoc(win_s, hop_s) # phase vocoder while read: samples, read = f() spectrum = pv(samples) # compute spectrum spectrum.norm *= .5 # reduce amplitude a bit .8 spectrum.phas[:] = 0. # zero phase new_samples = pv.rdo(spectrum) # compute modified samples g(new_samples, read) # write to output total_frames += read format_str = "read {:d} samples from {:s}, written to {:s}" print(format_str.format(total_frames, f.uri, g.uri)) return out_file
def get_spectrogram(row, col): samplerate = 0 from aubio import pvoc, source, float_type from numpy import zeros, log10, vstack win_s = 512 # fft window size hop_s = win_s // 2 # hop size fft_s = win_s // 2 + 1 # spectrum bins filename = 'Test.wav' a = source(filename, samplerate, hop_s) # source file if samplerate == 0: samplerate = a.samplerate pv = pvoc(win_s, hop_s) # phase vocoder specgram = zeros([0, fft_s], dtype=float_type) # numpy array to store spectrogram spec = [] # analysis while True: samples, read = a() # read file specgram = vstack( (specgram, pv(samples).norm)) # store new norm vector if read < a.hop_size: break if (row == grid_size.nRow - 1 and grid_size == grid_size.nCol - 1): print("True") print("row {} col {}".format(row, col)) specgram = specgram + 100 return specgram
def __init__(self): self.redis = redis.StrictRedis(host=redishost, port=6379, password="", decode_responses=True) self.p = pyaudio.PyAudio() stream = self.p.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, output=True, input_device_index = self.get_input_device_index(), output_device_index = self.get_output_device_index(), frames_per_buffer = self.CHUNK, stream_callback=self.callback) self.a_onset = aubio.onset("default", self.CHUNK, self.hop_s, self.RATE) self.a_tempo = aubio.tempo("specflux", self.CHUNK, self.hop_s, self.RATE) self.a_pitch = aubio.pitch("default", self.CHUNK, self.hop_s, self.RATE) self.a_notes = aubio.notes("default", self.CHUNK, self.hop_s, self.RATE) n_filters = 40 # required n_coeffs = 13 # I wonder if i made this 1.... self.a_pvoc = aubio.pvoc(self.CHUNK, self.hop_s) self.a_mfcc = aubio.mfcc(self.CHUNK, n_filters, n_coeffs, self.RATE) self.tolerance = 0.8 self.a_pitch.set_tolerance(self.tolerance) self.highest_pitch = 0 self.lowest_pitch = 99999999 self.average_pitch = 0 self.average_pitch_samples = 0 self.last_average = 0 self.colors = None self.pitch_range = None self.range_counter = 0 self.all_notes = set() stream.start_stream()
def Extract_MFCC(row, col): import sys from aubio import source, pvoc, mfcc from numpy import vstack, zeros, diff n_filters = 40 # must be 40 for mfcc n_coeffs = 13 source_filename = 'Test.wav' samplerate = 44100 win_s = 512 hop_s = 128 s = source(source_filename, samplerate, hop_s) samplerate = s.samplerate p = pvoc(win_s, hop_s) m = mfcc(win_s, n_filters, n_coeffs, samplerate) mfccs = zeros([ n_coeffs, ]) frames_read = 0 while True: samples, read = s() spec = p(samples) mfcc_out = m(spec) mfccs = vstack((mfccs, mfcc_out)) frames_read += read if read < hop_s: break if (row == grid_size.nRow - 1 and grid_size == grid_size.nCol - 1): mfccs = mfccs + 100 return mfccs
def getMelEnergy(path): win_s = 512 #fft size hop_s = win_s / 4 #hop size samplerate = 0 s = source(path, samplerate, hop_s) samplerate = s.samplerate pv = pvoc(win_s, hop_s) f = filterbank(40, win_s) f.set_mel_coeffs_slaney(samplerate) energies = np.zeros((40, )) o = {} total_frames = 0 downsample = 2 while True: samples, read = s() fftgrain = pv(samples) new_energies = f(fftgrain) energies = np.vstack([energies, new_energies]) total_frames += read if read < hop_s: break return energies
def aubio(source_filename): # print("Usage: %s <source_filename> [samplerate] [win_s] [hop_s] [mode]" % sys.argv[0]) # print(" where [mode] can be 'delta' or 'ddelta' for first and second derivatives")7 n_filters = 40 # must be 40 for mfcc n_coeffs = 26 win_s = 512 hop_s = win_s // 4 # mode = "default" samplerate = 0 s = source(source_filename, samplerate, hop_s) samplerate = s.samplerate p = pvoc(win_s, hop_s) m = mfcc(win_s, n_filters, n_coeffs, samplerate) mfccs = zeros([ n_coeffs, ]) frames_read = 0 while True: samples, read = s() spec = p(samples) mfcc_out = m(spec) mfccs = vstack((mfccs, mfcc_out)) frames_read += read if read < hop_s: break return mfccs
def __init__(self, samplerate=44100, winsize=1024, hopsize=512, filters=40, coeffs=13): super(AubioAnalyser, self).__init__() self.winsize = winsize self.hopsize = hopsize self.coeffs = coeffs self.filters = filters self.descriptors = {} self.methods = [ "default", "energy", "hfc", "complex", "phase", "specdiff", "kl", "mkl", "specflux", "centroid", "slope", "rolloff", "spread", "skewness", "kurtosis", "decrease" ] for method in self.methods: self.descriptors[method] = aubio.specdesc(method, self.winsize) self.pvocoder = aubio.pvoc(self.winsize, self.hopsize) self.mfcc_feature = aubio.mfcc(winsize, filters, coeffs, samplerate) self.mfccs = numpy.zeros([ self.coeffs, ])
def mfcc(frame, audiofile): ''' Computes the MEL FREQUENCY CEPSTRAL COEFFICIENTS for the frame, the frame is zero padded to achieve a frame lenght which is a power of two if this is not already the case. The power spectrum is then computed and this is placed into filterbanks on a mel-scale. The coefficents of 12 of the banks is then returned. ''' coefficientsCount = 12 sampleRate = audiofile['sample_rate'] frame_size = audiofile['frame_size'] fftsize = pow(2, int(math.log(frame_size, 2) + 0.5)) # Round to nearest power of 2 to facilitate FFT m = aub.mfcc(fftsize, 40, coefficientsCount, sampleRate) #first we need to convert this frame to the power spectrum using a DFT p = aub.pvoc(fftsize, int(frame_size)) #in order to compute DFT the frame must be of a length which is a power of 2, so expand to fftsize using zero padding if len(frame) != 16000: frame = np.pad(frame, (0, frame_size - len(frame)), 'constant', constant_values=0) #compute the power spectrum spec = p(frame.astype(np.float32)) #compute the MFCC, which returns the coefficents of each of the 12 coefficents mfcc_out = m(spec) return mfcc_out
def dataIn(): pv = pvoc(self.win_s, self.hop_s) while True: samples, read = self.source() # Read the file cvec = pv(samples) if read < self.source.hop_size: break # Out of samples yield (cvec)
def test_no_overlap(self): win_s, hop_s = 1024, 1024 f = pvoc(win_s, hop_s) t = fvec(hop_s) for _ in range(4): s = f(t) r = f.rdo(s) assert_equal(t, 0.)
def test_no_overlap(self): win_s, hop_s = 1024, 1024 f = pvoc (win_s, hop_s) t = fvec (hop_s) for _ in range(4): s = f(t) r = f.rdo(s) assert_equal ( t, 0.)
def get_spectrogram(filename, samplerate = 0): win_s = 512 # fft window size hop_s = win_s // 2 # hop size fft_s = win_s // 2 + 1 # spectrum bins a = source(filename, samplerate, hop_s) # source file if samplerate == 0: samplerate = a.samplerate pv = pvoc(win_s, hop_s) # phase vocoder specgram = zeros([0, fft_s], dtype=float_type) # numpy array to store spectrogram # analysis while True: samples, read = a() # read file specgram = vstack((specgram,pv(samples).norm)) # store new norm vector if read < a.hop_size: break # plotting fig = plt.imshow(log10(specgram.T + .001), origin = 'bottom', aspect = 'auto', cmap=plt.cm.gray_r) ax = fig.axes ax.axis([0, len(specgram), 0, len(specgram[0])]) # show axes in Hz and seconds time_step = hop_s / float(samplerate) total_time = len(specgram) * time_step outstr = "total time: %0.2fs" % total_time print(outstr + ", samplerate: %.2fkHz" % (samplerate / 1000.)) n_xticks = 10 n_yticks = 10 def get_rounded_ticks( top_pos, step, n_ticks ): top_label = top_pos * step # get the first label ticks_first_label = top_pos * step / n_ticks # round to the closest .1 ticks_first_label = round ( ticks_first_label * 10. ) / 10. # compute all labels from the first rounded one ticks_labels = [ ticks_first_label * n for n in range(n_ticks) ] + [ top_label ] # get the corresponding positions ticks_positions = [ ticks_labels[n] / step for n in range(n_ticks) ] + [ top_pos ] # convert to string ticks_labels = [ "%.1f" % x for x in ticks_labels ] # return position, label tuple to use with x/yticks return ticks_positions, ticks_labels # apply to the axis x_ticks, x_labels = get_rounded_ticks ( len(specgram), time_step, n_xticks ) y_ticks, y_labels = get_rounded_ticks ( len(specgram[0]), (samplerate / 1000. / 2.) / len(specgram[0]), n_yticks ) ax.set_xticks( x_ticks ) ax.set_yticks ( y_ticks ) ax.set_xticklabels( x_labels ) ax.set_yticklabels ( y_labels ) ax.set_ylabel('Frequency (kHz)') ax.set_xlabel('Time (s)') ax.set_title(os.path.basename(filename)) for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] + ax.get_xticklabels() + ax.get_yticklabels()): item.set_fontsize('x-small') return fig
def get_spectrogram(filename, samplerate=0): win_s = 512 # fft window size hop_s = win_s // 2 # hop size fft_s = win_s // 2 + 1 # spectrum bins a = source(filename, samplerate, hop_s) # source file if samplerate == 0: samplerate = a.samplerate pv = pvoc(win_s, hop_s) # phase vocoder specgram = zeros([0, fft_s], dtype=float_type) # numpy array to store spectrogram # analysis while True: samples, read = a() # read file specgram = vstack((specgram, pv(samples).norm)) # store new norm vector if read < a.hop_size: break # plotting fig = plt.imshow(log10(specgram.T + .001), origin='bottom', aspect='auto', cmap=plt.cm.gray_r) ax = fig.axes ax.axis([0, len(specgram), 0, len(specgram[0])]) # show axes in Hz and seconds time_step = hop_s / float(samplerate) total_time = len(specgram) * time_step outstr = "total time: %0.2fs" % total_time print(outstr + ", samplerate: %.2fkHz" % (samplerate / 1000.)) n_xticks = 10 n_yticks = 10 def get_rounded_ticks(top_pos, step, n_ticks): top_label = top_pos * step # get the first label ticks_first_label = top_pos * step / n_ticks # round to the closest .1 ticks_first_label = round(ticks_first_label * 10.) / 10. # compute all labels from the first rounded one ticks_labels = [ticks_first_label * n for n in range(n_ticks)] + [top_label] # get the corresponding positions ticks_positions = [ticks_labels[n] / step for n in range(n_ticks)] + [top_pos] # convert to string ticks_labels = ["%.1f" % x for x in ticks_labels] # return position, label tuple to use with x/yticks return ticks_positions, ticks_labels # apply to the axis x_ticks, x_labels = get_rounded_ticks(len(specgram), time_step, n_xticks) y_ticks, y_labels = get_rounded_ticks(len(specgram[0]), (samplerate / 1000. / 2.) / len(specgram[0]), n_yticks) ax.set_xticks(x_ticks) ax.set_yticks(y_ticks) ax.set_xticklabels(x_labels) ax.set_yticklabels(y_labels) ax.set_ylabel('Frequency (kHz)') ax.set_xlabel('Time (s)') ax.set_title(os.path.basename(filename)) for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] + ax.get_xticklabels() + ax.get_yticklabels()): item.set_fontsize('x-small') return fig
def activate(self): if self._audio is None: self._audio = pyaudio.PyAudio() # Setup a pre-emphasis filter to help balance the highs self.pre_emphasis = None if self._config['pre_emphasis']: self.pre_emphasis = aubio.digital_filter(3) # old, do not use #self.pre_emphasis.set_biquad(1., -self._config['pre_emphasis'], 0, 0, 0) # USE THESE FOR SCOTT_MEL OR OTHERS #self.pre_emphasis.set_biquad(1.3662, -1.9256, 0.5621, -1.9256, 0.9283) # USE THESE FOR MATT_MEl # weaker bass, good for vocals, highs #self.pre_emphasis.set_biquad(0.87492, -1.74984, 0.87492, -1.74799, 0.75169) # bass heavier overall more balanced self.pre_emphasis.set_biquad(0.85870, -1.71740, 0.85870, -1.71605, 0.71874) # Setup the phase vocoder to perform a windowed FFT self._phase_vocoder = aubio.pvoc( self._config['fft_size'], self._config['mic_rate'] // self._config['sample_rate']) self._frequency_domain_null = aubio.cvec(self._config['fft_size']) self._frequency_domain = self._frequency_domain_null self._frequency_domain_x = np.linspace( 0, self._config['mic_rate'], (self._config["fft_size"] // 2) + 1) # Enumerate all of the input devices and find the one matching the # configured device index _LOGGER.info("Audio Input Devices:") info = self._audio.get_host_api_info_by_index(0) for i in range(0, info.get('deviceCount')): if (self._audio.get_device_info_by_host_api_device_index( 0, i).get('maxInputChannels')) > 0: _LOGGER.info(" [{}] {}".format( i, self._audio.get_device_info_by_host_api_device_index( 0, i).get('name'))) # Open the audio stream and start processing the input self._stream = self._audio.open( input_device_index=self._config['device_index'], format=pyaudio.paFloat32, channels=1, rate=self._config['mic_rate'], input=True, frames_per_buffer=self._config['mic_rate'] // self._config['sample_rate'], stream_callback=self._audio_sample_callback) self._stream.start_stream() _LOGGER.info("Audio source opened.")
def __init__(self, sample_rate=11025, buf_size=1024, hop_size=256, n_mfcc_filters=40, n_mfcc_coeffs=13, pitch_method='yin'): """Initialize feature extractor.""" self._sample_rate = int(sample_rate) self._pvoc = aubio.pvoc(buf_size, hop_size) self._mfcc = aubio.mfcc(buf_size, n_mfcc_filters, n_mfcc_coeffs, self._sample_rate) self._pitch = aubio.pitch( method=pitch_method, buf_size=buf_size, hop_size=hop_size, samplerate=self._sample_rate)
def get_spectrogram(filename, samplerate=0): win_s = 512 # fft window size hop_s = win_s / 2 # hop size fft_s = win_s / 2 + 1 # spectrum bins a = source(filename, samplerate, hop_s) # source file if samplerate == 0: samplerate = a.samplerate pv = pvoc(win_s, hop_s) # phase vocoder specgram = zeros([0, fft_s], dtype='float32') # numpy array to store spectrogram # analysis while True: samples, read = a() # read file specgram = vstack( (specgram, pv(samples).norm)) # store new norm vector if read < a.hop_size: break # plotting imshow(log10(specgram.T + .001), origin='bottom', aspect='auto', cmap=cm.gray_r) axis([0, len(specgram), 0, len(specgram[0])]) # show axes in Hz and seconds time_step = hop_s / float(samplerate) total_time = len(specgram) * time_step print "total time: %0.2fs" % total_time, print ", samplerate: %.2fkHz" % (samplerate / 1000.) n_xticks = 10 n_yticks = 10 def get_rounded_ticks(top_pos, step, n_ticks): top_label = top_pos * step # get the first label ticks_first_label = top_pos * step / n_ticks # round to the closest .1 ticks_first_label = round(ticks_first_label * 10.) / 10. # compute all labels from the first rounded one ticks_labels = [ticks_first_label * n for n in range(n_ticks)] + [top_label] # get the corresponding positions ticks_positions = [ticks_labels[n] / step for n in range(n_ticks)] + [top_pos] # convert to string ticks_labels = ["%.1f" % x for x in ticks_labels] # return position, label tuple to use with x/yticks return ticks_positions, ticks_labels # apply to the axis xticks(*get_rounded_ticks(len(specgram), time_step, n_xticks)) yticks(*get_rounded_ticks(len(specgram[0]), (samplerate / 2. / 1000.) / len(specgram[0]), n_yticks)) ylabel('Frequency (kHz)') xlabel('Time (s)')
def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None): super(AubioMelEnergy, self).setup( channels, samplerate, blocksize, totalframes) self.n_filters = 40 self.n_coeffs = 13 self.pvoc = pvoc(self.input_blocksize, self.input_stepsize) self.melenergy = filterbank(self.n_filters, self.input_blocksize) self.melenergy.set_mel_coeffs_slaney(samplerate) self.block_read = 0 self.melenergy_results = []
def activate(self): if self._audio is None: self._audio = pyaudio.PyAudio() # Setup a pre-emphasis filter to help balance the highs self.pre_emphasis = None if self._config['pre_emphasis']: self.pre_emphasis = aubio.digital_filter(3) self.pre_emphasis.set_biquad(1., -self._config['pre_emphasis'], 0, 0, 0) # Setup the phase vocoder to perform a windowed FFT self._phase_vocoder = aubio.pvoc( self._config['fft_size'], self._config['mic_rate'] // self._config['sample_rate']) self._frequency_domain_null = aubio.cvec(self._config['fft_size']) self._frequency_domain = self._frequency_domain_null self._frequency_domain_x = np.linspace( 0, self._config['mic_rate'], (self._config["fft_size"] // 2) + 1) # Enumerate all of the input devices and find the one matching the # configured device index _LOGGER.info("Audio Input Devices:") info = self._audio.get_host_api_info_by_index(0) for i in range(0, info.get('deviceCount')): if (self._audio.get_device_info_by_host_api_device_index( 0, i).get('maxInputChannels')) > 0: _LOGGER.info(" [{}] {}".format( i, self._audio.get_device_info_by_host_api_device_index( 0, i).get('name'))) # PyAudio may segfault, reset device index if it seems implausible if self._config['device_index'] >= info.get( 'deviceCount' ) or self._audio.get_device_info_by_host_api_device_index( 0, self._config['device_index']).get('maxInputChannels') <= 0: _LOGGER.warn("Invalid device_index setting, resetting it to 0") self._config['device_index'] = 0 # Open the audio stream and start processing the input self._stream = self._audio.open( input_device_index=self._config['device_index'], format=pyaudio.paFloat32, channels=1, rate=self._config['mic_rate'], input=True, frames_per_buffer=self._config['mic_rate'] // self._config['sample_rate'], stream_callback=self._audio_sample_callback) self._stream.start_stream() _LOGGER.info("Audio source opened.")
def test_zeros(self): win_s, hop_s = 1024, 256 f = pvoc(win_s, hop_s) t = fvec(hop_s) for time in range(4 * win_s / hop_s): s = f(t) r = f.rdo(s) assert_equal(array(t), 0) assert_equal(s.norm, 0) assert_equal(s.phas, 0) assert_equal(r, 0)
def reconstruction(self, sigin, hop_s, ratio): buf_s = hop_s * ratio f = pvoc(buf_s, hop_s) zeros = fvec(hop_s) r2 = f.rdo(f(sigin)) for _ in range(1, ratio): r2 = f.rdo(f(zeros)) # compute square errors sq_error = (r2 - sigin)**2 # make sure all square errors are less than desired precision assert_array_less(sq_error, max_sq_error)
def analyzeMFCC(grain): windowSize = int(float(grain["frameCount"])) s = source(grain["file"], int(grain["sampleRate"]), windowSize) sampleRate = s.samplerate p = pvoc(windowSize, windowSize) m = mfcc(windowSize, 40, 13, s.samplerate) samples, read = s() spec = p(samples) mfcc_out = m(spec) mfccs = mfcc_out.tolist() return mfccs
def reconstruction(self, sigin, hop_s, ratio): buf_s = hop_s * ratio f = pvoc(buf_s, hop_s) zeros = fvec(hop_s) r2 = f.rdo( f(sigin) ) for _ in range(1, ratio): r2 = f.rdo( f(zeros) ) # compute square errors sq_error = (r2 - sigin)**2 # make sure all square errors are less than desired precision assert_array_less(sq_error, max_sq_error)
def analyzeMFCC(grain): windowSize = int(float(grain["frameCount"])) s = source(grain["file"], int(grain["sampleRate"]), windowSize - 1) sampleRate = s.samplerate p = pvoc(windowSize, windowSize - 1) m = mfcc(windowSize, 40, 13, s.samplerate) samples, read = s() spec = p(samples) mfcc_out = m(spec) mfccs = mfcc_out.tolist() return mfccs
def test_zeros(self): win_s, hop_s = 1024, 256 f = pvoc (win_s, hop_s) t = fvec (hop_s) for time in range( 4 * win_s / hop_s ): s = f(t) r = f.rdo(s) assert_equal ( array(t), 0) assert_equal ( s.norm, 0) assert_equal ( s.phas, 0) assert_equal ( r, 0)
def __init__(self): super(AubioMfcc, self).__init__() self.input_blocksize = 1024 self.input_stepsize = self.input_blocksize // 4 # Aubio MFCC Initialisation self.n_filters = 40 self.n_coeffs = 13 self.pvoc = pvoc(self.input_blocksize, self.input_stepsize) self.mfcc = None self.block_read = 0 self.mfcc_results = np.zeros([self.n_coeffs, ])
def test_steps_three_random_channels(self): from random import random f = pvoc(64, 16) t0 = fvec(16) t1 = fvec(16) for i in xrange(16): t1[i] = random() * 2. - 1. t2 = f.rdo(f(t1)) t2 = f.rdo(f(t0)) t2 = f.rdo(f(t0)) t2 = f.rdo(f(t0)) assert_almost_equal(t1, t2, decimal=6)
def test_zeros(self): """ check the resynthesis of zeros gives zeros """ win_s, hop_s = 1024, 256 f = pvoc(win_s, hop_s) t = fvec(hop_s) for time in range(4 * win_s / hop_s): s = f(t) r = f.rdo(s) assert_equal(array(t), 0) assert_equal(s.norm, 0) assert_equal(s.phas, 0) assert_equal(r, 0)
def __init__(self): super(AubioMfcc, self).__init__() self.input_blocksize = 1024 self.input_stepsize = self.input_blocksize / 4 # Aubio MFCC Initialisation self.n_filters = 40 self.n_coeffs = 13 self.pvoc = pvoc(self.input_blocksize, self.input_stepsize) self.mfcc = None self.block_read = 0 self.mfcc_results = np.zeros([self.n_coeffs, ])
def test_zeros(self): """ check the resynthesis of zeros gives zeros """ win_s, hop_s = 1024, 256 f = pvoc (win_s, hop_s) t = fvec (hop_s) for time in range( 4 * win_s / hop_s ): s = f(t) r = f.rdo(s) assert_equal ( array(t), 0) assert_equal ( s.norm, 0) assert_equal ( s.phas, 0) assert_equal ( r, 0)
def test_steps_three_random_channels(self): from random import random f = pvoc(64, 16) t0 = fvec(16) t1 = fvec(16) for i in xrange(16): t1[i] = random() * 2. - 1. t2 = f.rdo(f(t1)) t2 = f.rdo(f(t0)) t2 = f.rdo(f(t0)) t2 = f.rdo(f(t0)) assert_almost_equal( t1, t2, decimal = 6 )
def __init__(self): super(AubioMelEnergy, self).__init__() self.input_blocksize = 1024 self.input_stepsize = self.input_blocksize / 4 # Aubio Melenergy Initialisation self.n_filters = 40 self.n_coeffs = 13 self.pvoc = pvoc(self.input_blocksize, self.input_stepsize) self.melenergy = filterbank(self.n_filters, self.input_blocksize) self.block_read = 0 self.melenergy_results = []
def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None): super(AubioMfcc, self).setup( channels, samplerate, blocksize, totalframes) self.n_filters = 40 self.n_coeffs = 13 self.pvoc = pvoc(self.input_blocksize, self.input_stepsize) self.mfcc = mfcc(self.input_blocksize, self.n_filters, self.n_coeffs, samplerate) self.block_read = 0 self.mfcc_results = numpy.zeros([self.n_coeffs, ])
def mfccs(windows): p = aubio.pvoc(window_size, hop_size) m = aubio.mfcc(window_size, n_filters, n_coeffs, sample_rate) windows = np.float32(windows) mfccs = np.zeros([13,]) for i in range(len(windows)): samples = windows[i] spec = p(samples) mfcc_out = m(spec) mfccs = np.vstack((mfccs, mfcc_out)) del p del m return mfccs
def __init__(self, args): valid_opts1 = ['hop_size', 'buf_size'] self.parse_options(args, valid_opts1) self.remap_pvoc_options(self.options) self.pv = aubio.pvoc(**self.options) valid_opts2 = ['buf_size', 'n_filters', 'n_coeffs', 'samplerate'] self.parse_options(args, valid_opts2) self.mfcc = aubio.mfcc(**self.options) # remember all options self.parse_options(args, list(set(valid_opts1 + valid_opts2))) super(process_mfcc, self).__init__(args)
def test_resynth_three_steps(self): """ check the resynthesis of steps is correct with 25% overlap """ hop_s = 16 buf_s = hop_s * 4 sigin = fvec(hop_s) zeros = fvec(hop_s) f = pvoc(buf_s, hop_s) for i in xrange(hop_s): sigin[i] = random() * 2. - 1. t2 = f.rdo( f(sigin) ) t2 = f.rdo( f(zeros) ) t2 = f.rdo( f(zeros) ) t2 = f.rdo( f(zeros) ) assert_almost_equal( sigin, t2, decimal = precision )
def __init__(self, args): self.args = args valid_opts = ['hop_size', 'buf_size'] self.parse_options(args, valid_opts) self.remap_pvoc_options(self.options) self.pv = aubio.pvoc(**self.options) valid_opts = ['buf_size', 'n_filters'] self.parse_options(args, valid_opts) self.remap_pvoc_options(self.options) self.filterbank = aubio.filterbank(**self.options) self.filterbank.set_mel_coeffs_slaney(args.samplerate) super(process_melbands, self).__init__(args)
def test_resynth_three_steps(self): """ check the resynthesis of steps is correct with 25% overlap """ hop_s = 16 buf_s = hop_s * 4 sigin = fvec(hop_s) zeros = fvec(hop_s) f = pvoc(buf_s, hop_s) for i in range(hop_s): sigin[i] = random() * 2. - 1. t2 = f.rdo(f(sigin)) t2 = f.rdo(f(zeros)) t2 = f.rdo(f(zeros)) t2 = f.rdo(f(zeros)) assert_almost_equal(sigin, t2, decimal=precision)
def __init__(self, options): if options.settings['fbuckets'] == 'third-octave': options.settings['fbuckets'] = [22.4, 25, 31.5, 40, 50, 63, 80, 100, 125, 160, 200, 250, 315, 400, 500, 630, 800, 1000, 1250, 1600, 2000, 2500, 3150, 4000, 5000, 6300, 8000, 10000, 12500, 16000, 20000, 22390] elif options.settings['fbuckets'] == 'octave': options.settings['fbuckets'] = [22, 31.5, 63, 125, 250, 500, 1000, 2000, 4000, 8000, 16000, 22720] self.midi_processor = None self.sysex_command_array = [] for command in options.settings['fsysexnum'].split(' '): self.sysex_command_array.append(int(command, 0)) self.filter_bank = filterbank(len(options.settings['fbuckets']) - 2, (int(options.settings['framesize']) * int(options.settings['fframemult']))) self.frequencies = fvec(options.settings['fbuckets']) self.filter_bank.set_triangle_bands(self.frequencies, int(options.settings[ 'samplerate'])) self.phase_vocoder = pvoc(int(float(options.settings['framesize']) * float(options.settings['fframemult'])), int(float(options.settings['framesize']) * float(options.settings['fframemult']) * float(options.settings['fhopmult']))) self.frame_arrays = np.zeros( (int(float(options.settings['fframemult'])), int(float(options.settings['framesize']))), dtype=np.float32) self.frame_count = 0 self.maximum_frequencies = np.zeros( (len(options.settings['fbuckets']) - 2,), dtype=np.float32) self.last_energies = np.zeros((len(options.settings['fbuckets']) - 2,), dtype=np.float32) self.count_energies = np.zeros((int(options.settings['fcount']), (len(options.settings[ 'fbuckets']) - 2)), dtype=np.float32) self.energy_count = 0 self.rest_stop = 0 self.frame_multiplier = int(options.settings['fframemult']) self.count = int(options.settings['fcount']) self.graceful = float(options.settings['fgraceful'])
def __iter__(self): if self.verbose: print('[DEBUG] PhaseVocPR.__iter__()') pv = pvoc(self.win_s, self.hop_s) if self.verbose: print( ' Created Phase Vocoder (pv = pvoc(self.win_s, self.hop_s)' ) while True: samples, read = self.source() # Read the file cvec = pv(samples) if read < self.source.hop_size: break # Out of samples yield (cvec)
def test_steps_two_channels(self): """ check the resynthesis of steps is correct """ f = pvoc(1024, 512) t1 = fvec(512) t2 = fvec(512) # positive step in first channel t1[100:200] = .1 # positive step in second channel t1[20:50] = -.1 s1 = f(t1) r1 = f.rdo(s1) s2 = f(t2) r2 = f.rdo(s2) #self.plot_this ( s1.norm.T ) assert_almost_equal ( t1, r2, decimal = 6 )
def test_steps_two_channels(self): """ check the resynthesis of steps is correct """ f = pvoc(1024, 512) t1 = fvec(512) t2 = fvec(512) # positive step in first channel t1[100:200] = .1 # positive step in second channel t1[20:50] = -.1 s1 = f(t1) r1 = f.rdo(s1) s2 = f(t2) r2 = f.rdo(s2) #self.plot_this ( s1.norm.T ) assert_almost_equal(t1, r2, decimal=6)
def mfccs(windows): p = aubio.pvoc(window_size, hop_size) m = aubio.mfcc(window_size, n_filters, n_coeffs, sample_rate) windows = np.float32(windows) mfccs = np.zeros([ 13, ]) for i in range(len(windows)): samples = windows[i] spec = p(samples) mfcc_out = m(spec) mfccs = np.vstack((mfccs, mfcc_out)) del p del m return mfccs
def get_spectrogram(filename, samplerate = 0): win_s = 512 # fft window size hop_s = win_s / 2 # hop size fft_s = win_s / 2 + 1 # spectrum bins a = source(filename, samplerate, hop_s) # source file if samplerate == 0: samplerate = a.samplerate pv = pvoc(win_s, hop_s) # phase vocoder specgram = zeros([0, fft_s], dtype='float32') # numpy array to store spectrogram # analysis while True: samples, read = a() # read file specgram = vstack((specgram,pv(samples).norm)) # store new norm vector if read < a.hop_size: break # plotting imshow(log10(specgram.T + .001), origin = 'bottom', aspect = 'auto', cmap=cm.gray_r) axis([0, len(specgram), 0, len(specgram[0])]) # show axes in Hz and seconds time_step = hop_s / float(samplerate) total_time = len(specgram) * time_step print "total time: %0.2fs" % total_time, print ", samplerate: %.2fkHz" % (samplerate / 1000.) n_xticks = 10 n_yticks = 10 def get_rounded_ticks( top_pos, step, n_ticks ): top_label = top_pos * step # get the first label ticks_first_label = top_pos * step / n_ticks # round to the closest .1 ticks_first_label = round ( ticks_first_label * 10. ) / 10. # compute all labels from the first rounded one ticks_labels = [ ticks_first_label * n for n in range(n_ticks) ] + [ top_label ] # get the corresponding positions ticks_positions = [ ticks_labels[n] / step for n in range(n_ticks) ] + [ top_pos ] # convert to string ticks_labels = [ "%.1f" % x for x in ticks_labels ] # return position, label tuple to use with x/yticks return ticks_positions, ticks_labels # apply to the axis xticks( *get_rounded_ticks ( len(specgram), time_step, n_xticks ) ) yticks( *get_rounded_ticks ( len(specgram[0]), (samplerate / 2. / 1000.) / len(specgram[0]), n_yticks ) ) ylabel('Frequency (kHz)') xlabel('Time (s)')
def test_resynth_two_steps(self): """ check the resynthesis of steps is correct with 50% overlap """ hop_s = 512 buf_s = hop_s * 2 f = pvoc(buf_s, hop_s) sigin = fvec(hop_s) zeros = fvec(hop_s) # negative step sigin[20:50] = -.1 # positive step sigin[100:200] = .1 s1 = f(sigin) r1 = f.rdo(s1) s2 = f(zeros) r2 = f.rdo(s2) #self.plot_this ( s2.norm.T ) assert_almost_equal ( r2, sigin, decimal = precision )
def __init__(self): super(AubioSpecdesc, self).__init__() self.input_blocksize = 1024 self.input_stepsize = self.input_blocksize / 4 # Aubio Specdesc Initialisation self.block_read = 0 self.pvoc = pvoc(self.input_blocksize, self.input_stepsize) self.methods = [ 'default', 'energy', 'hfc', 'complex', 'phase', 'specdiff', 'kl', 'mkl', 'specflux', 'centroid', 'slope', 'rolloff', 'spread', 'skewness', 'kurtosis', 'decrease'] self.specdesc = {} self.specdesc_results = {} for method in self.methods: self.specdesc[method] = specdesc(method, self.input_blocksize) self.specdesc_results[method] = []
def get_spectrogram(filename, samplerate=0): win_s = 320 # fft window size hop_s = 160 # hop size fft_s = win_s // 2 + 1 # spectrum bins a = aubio.source(filename, samplerate, hop_s) # source file if samplerate == 0: samplerate = a.samplerate pv = aubio.pvoc(win_s, hop_s) # phase vocoder specgram = np.zeros([0, fft_s], dtype=aubio.float_type) # numpy array to store spectrogram # analysis while True: samples, read = a() # read file sp0 = pv(samples) specgram = np.vstack((specgram, sp0.norm)) # store new norm vector if read < a.hop_size: break np.savetxt("specgram.txt", specgram.flatten(), fmt='%f')
def __init__(self, samplerate=44100, winsize=1024, hopsize=512, filters=40, coeffs=13): super(AubioAnalyser, self).__init__() self.winsize = winsize self.hopsize = hopsize self.coeffs = coeffs self.filters = filters self.descriptors = {} self.methods = ["default", "energy", "hfc", "complex", "phase", "specdiff", "kl", "mkl", "specflux", "centroid", "slope", "rolloff", "spread", "skewness", "kurtosis", "decrease"] for method in self.methods: self.descriptors[method] = aubio.specdesc(method, self.winsize) self.pvocoder = aubio.pvoc(self.winsize, self.hopsize) self.mfcc_feature = aubio.mfcc(winsize, filters, coeffs, samplerate) self.mfccs = numpy.zeros([self.coeffs, ])
def test_zeros(self): """ check the resynthesis of zeros gives zeros """ win_s, hop_s = 1024, 256 f = pvoc (win_s, hop_s) t = fvec (hop_s) for _ in range( int ( 4 * win_s / hop_s ) ): s = f(t) r = f.rdo(s) assert_equal ( t, 0.) assert_equal ( s.norm, 0.) try: assert_equal ( s.phas, 0 ) except AssertionError: assert_equal (s.phas[s.phas > 0], +np.pi) assert_equal (s.phas[s.phas < 0], -np.pi) assert_equal (np.abs(s.phas[np.abs(s.phas) != np.pi]), 0) self.skipTest('pvoc(fvec(%d)).phas != +0, ' % win_s \ + 'This is expected when using fftw3 on powerpc.') assert_equal ( r, 0.)
def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None): super( AubioSpecdesc, self).setup( channels, samplerate, blocksize, totalframes) self.block_read = 0 self.pvoc = pvoc(self.input_blocksize, self.input_stepsize) self.methods = [ 'default', 'energy', 'hfc', 'complex', 'phase', 'specdiff', 'kl', 'mkl', 'specflux', 'centroid', 'slope', 'rolloff', 'spread', 'skewness', 'kurtosis', 'decrease'] self.specdesc = {} self.specdesc_results = {} for method in self.methods: self.specdesc[method] = specdesc(method, self.input_blocksize) self.specdesc_results[method] = []
def __init__(self, sample_rate=None, buffersize=None): self.sample_rate = sample_rate or self.sample_rate self.buffersize = buffersize or self.buffersize self.window_size = self.buffersize * 2 self.stream = None self.onset = aubio.onset( 'specflux', self.window_size, self.buffersize, self.sample_rate) self.onset.set_threshold(0.3) self.onset.set_silence(-20.) self.tempo = aubio.tempo( 'default', self.window_size, self.buffersize, self.sample_rate) self.energy = aubio.specdesc('specflux', self.buffersize * 2) self.pv = aubio.pvoc(self.buffersize * 2, self.buffersize) self.pitch = aubio.pitch( "yinfft", self.window_size, self.buffersize, self.sample_rate) self.pitch.set_unit("midi") self.pitch.set_tolerance(0.8) self.py_audio = pyaudio.PyAudio()