def run(self): global hitFound #Initialize and set the properties of PCM object card = 'default' audioInput = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NORMAL, card) audioInput.setchannels(2) audioInput.setrate(44100) audioInput.setformat(alsaaudio.PCM_FORMAT_S16_LE) audioInput.setperiodsize(160) oldL = 1 oldR = 1 try: #Start an infite loop that gets and analyzes audio data count = 0 spike = 0 while True: l, data = audioInput.read() if l > 0: spike = 0 lchan = audioop.tomono(data, 2, 1, 0) rchan = audioop.tomono(data, 2, 0, 1) lmax = audioop.max(lchan, 2) rmax = audioop.max(rchan, 2) #print str(lmax) + " " + str(rmax) if spike < lmax: spike = lmax if spike < rmax: spike = rmax #print spike if spike > 1000: hitFound = True time.sleep(.001) #audio refresh rate except KeyboardInterrupt : sys.exit() #TODO make it actually exit
def load_wav(fn, stereo=True): with wave.open(fn, mode='rb') as audio_file: params = audio_file.getparams() print(f'''Audio file "{fn}": Channels : {params.nchannels} Framerate : {params.framerate} Sample width : {params.sampwidth} Bytes Sample num : {params.nframes} ''') sample_bytes = audio_file.readframes(params.nframes) ch_left = audioop.tomono(sample_bytes, params.sampwidth, 1, 0) if stereo: ch_right = audioop.tomono(sample_bytes, params.sampwidth, 0, 1) samples = [(l, r) for l, r in zip(list_samples(ch_left, params), list_samples(ch_right, params))] else: samples = list_samples(ch_left, params) return samples, params
def play(self,fileName): # Initialise matrix matrix=[0,0,0,0,0,0,0,0] # Set up audio wavfile = wave.open(fileName,'r') chunk = 1024 output = aa.PCM(aa.PCM_PLAYBACK, aa.PCM_NORMAL) output.setchannels(1) output.setrate(22050) output.setformat(aa.PCM_FORMAT_S16_LE) output.setperiodsize(chunk) data = wavfile.readframes(chunk) try: while data!='': output.write(data) # Split channel data and find maximum volume channel_l=audioop.tomono(data, 2, 1.0, 0.0) channel_r=audioop.tomono(data, 2, 0.0, 1.0) max_vol_factor =5000 max_l = audioop.max(channel_l,2)/max_vol_factor max_r = audioop.max(channel_r,2)/max_vol_factor for i in range (1,8): self.generateMouthSignal((1<<max_r)-1) data = wavfile.readframes(chunk) except: data = None os.system( '/etc/init.d/alsa-utils restart' ) sleep( .25 )
def listen(): #Initialize and set the properties of PCM object card = 'default' audioInput = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NORMAL, card) audioInput.setchannels(2) audioInput.setrate(44100) audioInput.setformat(alsaaudio.PCM_FORMAT_S16_LE) audioInput.setperiodsize(160) oldL = 1 oldR = 1 try: #Start an big loop that gets and analyzes audio data for i in range(0, 5000): l, data = audioInput.read() if l > 0: spike = 0 lchan = audioop.tomono(data, 2, 1, 0) rchan = audioop.tomono(data, 2, 0, 1) lmax = audioop.max(lchan, 2) rmax = audioop.max(rchan, 2) if spike < lmax: spike = lmax if spike < rmax: spike = rmax if spike > 1000: startStopTime() time.sleep(.001) #audio refresh rate except KeyboardInterrupt : sys.exit() #TODO make it actually exit
def write_sound(self, path, suffix='.wav', devide_stereo=False) : i = 0 if devide_stereo == True : if self.__channels == 2 : frames_r = [] frames_l = [] for frames in self.__sound_frames : for frame in frames : frame_l = audioop.tomono(frame, 2, 1, 0) frames_l.append(frame_l) frame_r = audioop.tomono(frame, 2, 0, 1) frames_r.append(frame_r) filename = path + str(i) self.__write_soundfile(frames_r, filename + '_r' + suffix, 1) self.__write_soundfile(frames_l, filename + '_l' + suffix, 1) i = i + 1 frames_l.clear() frames_r.clear() else : print('Recording Sound is not Stereo Sound.') print('So, Write only Monaural Sound') i = 0 for frame in self.__sound_frames : filename = path + str(i) + suffix self.__write_soundfile(frame, filename) i = i + 1 self.__write_log(path) return
def __db_level(self, rms_mode: bool = False) -> Tuple[float, float]: """ Returns the average audio volume level measured in dB (range -60 db to 0 db) If the sample is stereo, you get back a tuple: (left_level, right_level) If the sample is mono, you still get a tuple but both values will be the same. This method is probably only useful if processed on very short sample fragments in sequence, so the db levels could be used to show a level meter for the duration of the sample. """ maxvalue = 2**(8 * self.__samplewidth - 1) if self.nchannels == 1: if rms_mode: peak_left = peak_right = (audioop.rms( self.__frames, self.__samplewidth) + 1) / maxvalue else: peak_left = peak_right = (audioop.max( self.__frames, self.__samplewidth) + 1) / maxvalue else: left_frames = audioop.tomono(self.__frames, self.__samplewidth, 1, 0) right_frames = audioop.tomono(self.__frames, self.__samplewidth, 0, 1) if rms_mode: peak_left = (audioop.rms(left_frames, self.__samplewidth) + 1) / maxvalue peak_right = (audioop.rms(right_frames, self.__samplewidth) + 1) / maxvalue else: peak_left = (audioop.max(left_frames, self.__samplewidth) + 1) / maxvalue peak_right = (audioop.max(right_frames, self.__samplewidth) + 1) / maxvalue # cut off at the bottom at -60 instead of all the way down to -infinity return max(20.0 * math.log(peak_left, 10), -60.0), max(20.0 * math.log(peak_right, 10), -60.0)
def __init__(self): '''AudioStream allows to stream audiodata received from a microphone via ROS''' self._running = False # global audio recorder (initialized in startStream) self._recorder = None # init ROS rospy.init_node('audiostream', anonymous=True) # global publisher variables (initialized in startStream) self._publisherLeft = None self._publisherRight= None # global service variables (startStream initializes the other ones) self._getWidthSrv = None self._getRateSrv = None self._startSrv = rospy.Service('nico/audiostream/startStream', nicomsg.srv.StartAudioStream, self._ROSPY_startStream) self._stopSrv = None # actual streaming sampleIndex = 0 while not rospy.is_shutdown(): if self._running and self._recorder.get_number_of_samples() > sampleIndex: sample = self._recorder.get_chunk(sampleIndex) sampleIndex += 1 msg = nicomsg.msg.hs() msg.header.stamp = rospy.Time.now() if self._channels[0]: msg.param1 = audioop.tomono(sample, self._recorder.get_sample_width(), 1, 0) self._publisherLeft.publish(msg) if self._channels[1]: msg.param1 = audioop.tomono(sample, self._recorder.get_sample_width(), 0, 1) self._publisherRight.publish(msg) if self._running: self.stopStream()
def get_recognition_result(self): print("Convert bytes to 1 channel") audio_bytes_4channels = b''.join(self.audio_fragment_array) audio_bytes_2channels = audioop.tomono(audio_bytes_4channels, 2, 0.5, 0.5) audio_bytes_1channel = audioop.tomono(audio_bytes_2channels, 2, 0.5, 0.5) print("Start speech interaction request to google") request_json = self.services.google_handle.speech_to_text_api( base64.b64encode(audio_bytes_1channel), self.sample_frequency) dict_output = {"success": False, "results": ()} if request_json: print(request_json) dict_output["success"] = True recognition_result = str( request_json['results'][0]['alternatives'][0]['transcript']) confidence_result = float( request_json['results'][0]['alternatives'][0]['confidence']) language_result = request_json['results'][0]['languageCode'] print("Request succeed, result: {}, confidence: {}".format( recognition_result, confidence_result)) dict_output["results"] = (recognition_result, language_result, confidence_result) return dict_output
def __db_level(self, rms_mode=False): """ Returns the average audio volume level measured in dB (range -60 db to 0 db) If the sample is stereo, you get back a tuple: (left_level, right_level) If the sample is mono, you still get a tuple but both values will be the same. This method is probably only useful if processed on very short sample fragments in sequence, so the db levels could be used to show a level meter for the duration of the sample. """ maxvalue = 2**(8*self.__samplewidth-1) if self.nchannels == 1: if rms_mode: peak_left = peak_right = (audioop.rms(self.__frames, self.__samplewidth)+1)/maxvalue else: peak_left = peak_right = (audioop.max(self.__frames, self.__samplewidth)+1)/maxvalue else: left_frames = audioop.tomono(self.__frames, self.__samplewidth, 1, 0) right_frames = audioop.tomono(self.__frames, self.__samplewidth, 0, 1) if rms_mode: peak_left = (audioop.rms(left_frames, self.__samplewidth)+1)/maxvalue peak_right = (audioop.rms(right_frames, self.__samplewidth)+1)/maxvalue else: peak_left = (audioop.max(left_frames, self.__samplewidth)+1)/maxvalue peak_right = (audioop.max(right_frames, self.__samplewidth)+1)/maxvalue # cut off at the bottom at -60 instead of all the way down to -infinity return max(20.0*math.log(peak_left, 10), -60.0), max(20.0*math.log(peak_right, 10), -60.0)
def play(self, fileName): # Initialise matrix matrix = [0, 0, 0, 0, 0, 0, 0, 0] # Set up audio wavfile = wave.open(fileName, 'r') chunk = 1024 output = aa.PCM(aa.PCM_PLAYBACK, aa.PCM_NORMAL) output.setchannels(1) output.setrate(22050) output.setformat(aa.PCM_FORMAT_S16_LE) output.setperiodsize(chunk) data = wavfile.readframes(chunk) try: while data != '': output.write(data) # Split channel data and find maximum volume channel_l = audioop.tomono(data, 2, 1.0, 0.0) channel_r = audioop.tomono(data, 2, 0.0, 1.0) max_vol_factor = 5000 max_l = audioop.max(channel_l, 2) / max_vol_factor max_r = audioop.max(channel_r, 2) / max_vol_factor for i in range(1, 8): self.generateMouthSignal((1 << max_r) - 1) data = wavfile.readframes(chunk) except: data = None os.system('/etc/init.d/alsa-utils restart') sleep(.25)
def recv() : global client, inp, etc, trig_this_time, trig_last_time, sin if aout_jack: # get audio (with 16 bit signed format) data_l = 32767 * inp[0].get_array() data_r = 32767 * inp[1].get_array() else: # get audio from alsa l,data = inp.read() if l > 0: ldata = audioop.tomono(data, 2, 1, 0) rdata = audioop.tomono(data, 2, 0, 1) peak = 0 # try : for i in range(0,100) : avg_l = 0 avg_r = 0 for j in range(3): if aout_jack: avg_l += data_l[3 * i + j] avg_r += data_r[3 * i + j] else: if l > 0: # the following 2 lines crashed the python engine -> optimize alsa settings? probably wrong buffer size? # avg_l +=audioop.getsample(ldata, 2, (i * 3) + j) # avg_r +=audioop.getsample(rdata, 2, (i * 3) + j) # this is not a solution, only a fix avg_l = avg_l avg_r = avg_r # except : # pass avg_l = avg_l / 3 avg_r = avg_r / 3 avg = (avg_l + avg_r) / 2 # scale it avg = int(avg * etc.audio_scale) avg_l = int(avg_l * etc.audio_scale) avg_r = int(avg_r * etc.audio_scale) if (avg > 20000) : trig_this_time = time.time() if (trig_this_time - trig_last_time) > .05: if etc.audio_trig_enable: etc.audio_trig = True trig_last_time = trig_this_time if avg > peak : etc.audio_peak = avg peak = avg # if the trigger button is held if (etc.trig_button) : etc.audio_in[i] = sin[i] etc.audio_left[i] = sin[i] etc.audio_right[i] = sin[i] else : etc.audio_in[i] = avg etc.audio_left[i] = avg_l etc.audio_right[i] = avg_r
def mul_stereo(fileName, width, lfactor, rfactor): lsample = audioop.tomono(fileName, width, 1, 0) rsample = audioop.tomono(fileName, width, 0, 1) lsample = audioop.mul(lsample, width, lfactor) rsample = audioop.mul(rsample, width, rfactor) lsample = audioop.tostereo(lsample, width, 1, 0) rsample = audioop.tostereo(rsample, width, 0, 1) return audioop.add(lsample, rsample, width)
def mul_stereo(fileName,width,lfactor,rfactor): lsample = audioop.tomono(fileName, width, 1, 0) rsample = audioop.tomono(fileName,width, 0, 1) lsample = audioop.mul(lsample,width,lfactor) rsample = audioop.mul(rsample, width,rfactor) lsample = audioop.tostereo(lsample, width, 1, 0) rsample = audioop.tostereo(rsample, width, 0, 1) return audioop.add(lsample,rsample,width)
def get_dr(filename, floats=False): with wave.open(filename, "rb") as f: channels = f.getnchannels() if channels not in (1, 2): # TODO unpack n channels raise NotImplementedError( "We only handle mono or stereo at the moment") framesize = f.getsampwidth() if framesize != 2: # TODO map framesize to struct module constants raise NotImplementedError( "We only handle 16 bit formats at the moment") framerate = f.getframerate() total = f.getnframes() read = 0 peaks = [[] for i in range(channels)] rmss = [[] for i in range(channels)] while True: # read three seconds of data block = f.readframes(framerate * 3) expected = framerate * 3 * channels * framesize if len(block) < expected: # EOF break read += 3 * framerate # unpack if channels == 2: chansamples = [ audioop.tomono(block, framesize, 1, 0), audioop.tomono(block, framesize, 0, 1) ] else: chansamples = [block] for i, chan in enumerate(chansamples): peak = audioop.max(chan, framesize) / NORM rms = math.sqrt(2) * audioop.rms(chan, framesize) / NORM peaks[i].append(peak) rmss[i].append(rms) drs = [] for c in range(channels): peaks[c].sort() rmss[c].sort() p2 = peaks[c][-2] if p2 == 0: raise SilentTrackError N = int(0.2 * len(peaks[c])) if N == 0: raise TooShortError r = math.sqrt(sum(i**2 for i in rmss[c][-N:]) / N) dr = -to_db(r / p2) drs.append(dr) if not floats: fdr = round(sum(drs) / len(drs)) else: fdr = sum(drs) / len(drs) return fdr
def callback(in_data, frame_count, time_info, status): if self.state == self.RECORDING: self._wavefile.writeframes(in_data) self._time_info = time_info lsample = audioop.tomono(in_data, self._sample_width, 1, 0) rsample = audioop.tomono(in_data, self._sample_width, 0, 1) l_max = audioop.max(lsample, self._sample_width)/self._int_max r_max = audioop.max(rsample, self._sample_width)/self._int_max self.levelUpdated.emit([l_max, r_max]) return (in_data, pyaudio.paContinue)
def recv(): global client, inp, etc, trig_this_time, trig_last_time, sin if aout_norns: # get audio (with 16 bit signed format) data_l = 32767 * inp[0].get_array() data_r = 32767 * inp[1].get_array() else: # get audio from alsa l, data = inp.read() if l > 0: ldata = audioop.tomono(data, 2, 1, 0) rdata = audioop.tomono(data, 2, 0, 1) peak = 0 # try : for i in range(0, 100): avg_l = 0 avg_r = 0 for j in range(3): if aout_norns: avg_l += data_l[3 * i + j] avg_r += data_r[3 * i + j] else: if l > 0: avg_l += audioop.getsample(ldata, 2, (i * 3) + j) avg_r += audioop.getsample(rdata, 2, (i * 3) + j) # except : # pass avg_l = avg_l / 3 avg_r = avg_r / 3 avg = (avg_l + avg_r) / 2 # scale it avg = int(avg * etc.audio_scale) avg_l = int(avg_l * etc.audio_scale) avg_r = int(avg_r * etc.audio_scale) if (avg > 20000): trig_this_time = time.time() if (trig_this_time - trig_last_time) > .05: if etc.audio_trig_enable: etc.audio_trig = True trig_last_time = trig_this_time if avg > peak: etc.audio_peak = avg peak = avg # if the trigger button is held if (etc.trig_button): etc.audio_in[i] = sin[i] etc.audio_left[i] = sin[i] etc.audio_right[i] = sin[i] else: etc.audio_in[i] = avg etc.audio_left[i] = avg_l etc.audio_right[i] = avg_r
def pan(slice, pan_pos=0.5, amp=1.0): amps = pantamp(pan_pos) lslice = audioop.tomono(slice, audio_params[1], 1, 0) lslice = audioop.tostereo(lslice, audio_params[1], amps[0], 0) rslice = audioop.tomono(slice, audio_params[1], 0, 1) rslice = audioop.tostereo(rslice, audio_params[1], 0, amps[1]) slice = audioop.add(lslice, rslice, audio_params[1]) return audioop.mul(slice, audio_params[1], amp)
def test_tomono(self): for w in 1, 2, 4: data1 = datas[w] data2 = bytearray(2 * len(data1)) for k in range(w): data2[k :: 2 * w] = data1[k::w] self.assertEqual(audioop.tomono(data2, w, 1, 0), data1) self.assertEqual(audioop.tomono(data2, w, 0, 1), b"\0" * len(data1)) for k in range(w): data2[k + w :: 2 * w] = data1[k::w] self.assertEqual(audioop.tomono(data2, w, 0.5, 0.5), data1)
def test_tomono(self): for w in 1, 2, 4: data1 = datas[w] data2 = bytearray(2 * len(data1)) for k in range(w): data2[k::2*w] = data1[k::w] self.assertEqual(audioop.tomono(str(data2), w, 1, 0), data1) self.assertEqual(audioop.tomono(str(data2), w, 0, 1), b'\0' * len(data1)) for k in range(w): data2[k+w::2*w] = data1[k::w] self.assertEqual(audioop.tomono(str(data2), w, 0.5, 0.5), data1)
def db_level(data, samplewidth=2, rms_mode=False): maxvalue = 2**(8 * samplewidth - 1) left_frames = audioop.tomono(data, samplewidth, 1, 0) right_frames = audioop.tomono(data, samplewidth, 0, 1) if rms_mode: peak_left = (audioop.rms(left_frames, samplewidth) + 1) / maxvalue peak_right = (audioop.rms(right_frames, samplewidth) + 1) / maxvalue else: peak_left = (audioop.max(left_frames, samplewidth) + 1) / maxvalue peak_right = (audioop.max(right_frames, samplewidth) + 1) / maxvalue return peak_left * 1000, peak_right * 1000
def _convert_data(self, data: bytes, to_depth: int, to_channels: int, to_rate: int, to_unsigned: bool = False) -> bytes: """Convert audio data.""" out_width = to_depth // 8 if self._from_float: ldata = audioop.tomono(data, self._width, 1, 0) rdata = audioop.tomono(data, self._width, 0, 1) for mono_data in [ldata, rdata]: float_array = array('f', mono_data) out_array = array('i' if self._out_depth > 16 else 'h') for i in float_array: if i > 1.0: i = 1.0 elif i < -1.0: i = -1.0 out_array.append(round(i * 32767.0)) mono_data = out_array.tobytes() ldata = audioop.tostereo(ldata, self._width, 1, 0) rdata = audioop.tostereo(rdata, self._width, 0, 1) data = audioop.add(ldata, rdata, self._width) if self._to_alaw: data = audioop.lin2alaw(data, self._width) if self._depth != to_depth: data = audioop.lin2lin( data, self._width, out_width ) if self._unsigned != to_unsigned: data = audioop.bias(data, out_width, 128) # Make it stereo if self._channels < to_channels: data = audioop.tostereo(data, out_width, 1, 1) # Make it mono elif self._channels > to_channels: data = audioop.tomono(data, out_width, 1, 1) # Convert the sample rate of the data to the requested rate. if self._rate != to_rate and data: data, self._state = audioop.ratecv( data, out_width, to_channels, self._rate, to_rate, self._state, ) return data
def get_sound(): data = stream.read(frames) lsampe = audioop.tomono(data, 2, 1, 0) rsample = audioop.tomono(data, 2, 0, 1) temp_l = np.frombuffer(lsampe, np.int16).astype(np.float) temp_r = np.frombuffer(rsample, np.int16).astype(np.float) lrms = np.sqrt((temp_l * temp_l).sum() / len(temp_l)) rrms = np.sqrt((temp_r * temp_r).sum() / len(temp_r)) return (lrms, rrms)
def recv() : global inp, etc, trig_this_time, trig_last_time, sin # get audio l,data = inp.read() peak = 0 while l: try : ldata = audioop.tomono(data, 2, 1, 0) rdata = audioop.tomono(data, 2, 0, 1) for i in range(0,100) : avg_l = 0 avg_r = 0 idx = i * 3 avg_l +=audioop.getsample(ldata,2,idx) avg_r +=audioop.getsample(rdata,2,idx) idx += 1 avg_l +=audioop.getsample(ldata,2,idx) avg_r +=audioop.getsample(rdata,2,idx) idx += 1 avg_l +=audioop.getsample(ldata,2,idx) avg_r +=audioop.getsample(rdata,2,idx) avg_l = avg_l / 3 avg_r = avg_r / 3 avg = (avg_l + avg_r) / 2 if (avg > 20000) : trig_this_time = time.time() if (trig_this_time - trig_last_time) > .05: etc.audio_trig = True trig_last_time = trig_this_time if avg > peak : etc.audio_peak = avg peak = avg # if the trigger button is held if (etc.trig_button) : etc.audio_in[i] = sin[i] etc.audio_left[i] = sin[i] etc.audio_right[i] = sin[i] else : etc.audio_in[i] = avg etc.audio_left[i] = avg_l etc.audio_right[i] = avg_r except : pass l,data = inp.read()
def convert_to(self, data: bytes, to_depth: int, to_channels: int, to_rate: int, to_unsigned: bool = False) -> bytes: """Convert audio data.""" dest_width = to_depth // 8 print(to_depth, self._depth) if self._depth != to_depth: if self._depth == 8: data = audioop.bias(data, 1, 128) data = audioop.lin2lin(data, self._width, dest_width) if to_depth == 8: data = audioop(data, 1, 128) if self._unsigned != to_unsigned: data = audioop.bias(data, dest_width, 128) # Make it stereo if self._channels < to_channels: data = audioop.tostereo(data, dest_width, 1, 1) # Make it mono elif self._channels > to_channels: data = audioop.tomono(data, dest_width, 1, 1) # print(dest_width) # Convert the sample rate of the data to the requested rate. if self._rate != to_rate and data: data, self._state = audioop.ratecv(data, dest_width, to_channels, self._rate, to_rate, self._state, 2, 1) return data
def read(self, size=-1): buffer = self.wav_reader.readframes(self.wav_reader.getnframes() if size == -1 else size) if self.wav_reader.getnchannels() != 1: # stereo audio buffer = audioop.tomono( buffer, self.wav_reader.getsampwidth(), 1, 1 ) # convert stereo audio data to mono return buffer
def raw_read(self): """Return some amount of data as a raw audio string""" buf = self.source.raw_read() if buf is None: self.eof = True return None # Convert channels as needed if self.set_channels and self.source.channels() != self.set_channels: if self.set_channels == 1: buf = audioop.tomono(buf, self.source.raw_width(), .5, .5) else: buf = audioop.tostereo(buf, self.source.raw_width(), 1, 1) # Convert sampling rate as needed if self.set_sampling_rate and self.source.sampling_rate() != self.set_sampling_rate: (buf, self.ratecv_state) = audioop.ratecv(buf, self.source.raw_width(), self.channels(), self.source.sampling_rate(), self.set_sampling_rate, self.ratecv_state) if self.set_raw_width and self.source.raw_width() != self.set_raw_width: if self.source.raw_width() == 1 and self.source.has_unsigned_singles(): buf = audioop.bias(buf, 1, -128) buf = audioop.lin2lin(buf, self.source.raw_width(), self.set_raw_width) if self.set_raw_width == 1 and self.source.has_unsigned_singles(): buf = audioop.bias(buf, 1, 128) return buf
def openwavfile(self, filename): # open wave file try: infile = wave.open(filename, "r") except wave.Error as e: dialog = gtk.MessageDialog(self.window, gtk.DIALOG_DESTROY_WITH_PARENT, gtk.MESSAGE_ERROR, gtk.BUTTONS_CLOSE, "Error: %s" % e) dialog.run() dialog.destroy() return # get raw frame data (binary string of integers) frames = infile.readframes(infile.getnframes()) # mix down to mono if it's stereo if infile.getnchannels() > 1: frames = audioop.tomono(frames, infile.getsampwidth(), 0.5, 0.5) # get list of integer samples samples = binary_unsigned_integers_to_list(frames, infile.getsampwidth()) # trigger the waveform setup etc self.newaudio(Audio(samples, infile.getframerate(), infile.getsampwidth(), filename))
def to_mono(self): 'Convert stereo audio to mono' if self.params.nchannels == 1: return self return self.__class__(self.params, audioop.tomono(self.frames, self.params.sampwidth, 0.5, 0.5), nchannels=1)
def coerce_lin(source_aiff, template_obj): '''Read data from source, and convert it to match template's params.''' import audioop frag = source_aiff.read_lin() Ss = source_aiff.stream St = template_obj.stream # Sample width if Ss.getsampwidth() != St.getsampwidth(): print 'coerce sampwidth %i -> %i' %(Ss.getsampwidth(), St.getsampwidth()) frag = audioop.lin2lin(frag, Ss.getsampwidth(), St.getsampwidth()) width = St.getsampwidth() # Channels if Ss.getnchannels() != St.getnchannels(): print 'coerce nchannels %i -> %i' %(Ss.getnchannels(), St.getnchannels()) if Ss.getnchannels()==2 and St.getnchannels()==1: frag = audioop.tomono(frag, width, 0.5, 0.5) elif Ss.getnchannels()==1 and St.getnchannels()==2: frag = audioop.tostereo(frag, width, 1.0, 1.0) else: print "Err: can't match channels" # Frame rate if Ss.getframerate() != St.getframerate(): print 'coerce framerate %i -> %i' %(Ss.getframerate(), St.getframerate()) frag,state = audioop.ratecv( frag, width, St.getnchannels(), Ss.getframerate(), # in rate St.getframerate(), # out rate None, 2,1 ) return frag
def __init__(self, fname, newrate=0): """Initializes an audio file from an uncompressed wavefile on disk. The file is converted to mono, and if newrate is positive, then the rate is converted""" import wave, audioop try: # see if we have numpy from numpy import array self.numpy = 1 except ImportError: self.numpy = 0 # read data f = wave.open(fname, "rb") nchans, w, fps, nframes, comptype, compname = f.getparams() print "Read audio file %s with %d chans, %d width, %d fps and %d frames" % (fname, nchans, w, fps, nframes) self.width, self.fps = w, fps self.dat = f.readframes(nframes) print " Original data length was %d" % (len(self.dat)) # convert to mono and (optionally) convert the rate self.dat = audioop.tomono(self.dat, w, 0.5, 0.5) print " After mono, data length is %d" % (len(self.dat)) if newrate > 0: self.dat, junk = audioop.ratecv(self.dat, w, 1, fps, newrate, None) self.fps = newrate print " Converted to new rate %s, and data length is now %d" % (self.fps, len(self.dat)) # now extract the data into a simple array from audioop import getsample self.dat = [abs(getsample(self.dat, w, i)) for i in range(len(self.dat) // w)] print " Final data length is now of length %s" % (len(self.dat),) if self.numpy: self.dat = array(self.dat)
def downsampleWav(data, dst, inrate=sampleRate, outrate=16000, inchannels=1, outchannels=1): try: s_write = wave.open(dst, 'wb') except: print('Failed to open files!') return False try: converted = audioop.ratecv(data, 2, inchannels, inrate, outrate, None) if outchannels == 1 and inchannels != 1: converted = audioop.tomono(converted[0], 2, 1, 0) except: print('Failed to downsample wav') return False try: s_write.setparams((outchannels, 2, outrate, 0, 'NONE', 'Uncompressed')) s_write.writeframes(converted[0]) except Exception as e: print(e) print('Failed to write wav') return False try: s_write.close() except: print('Failed to close wav files') return False return True
def calc_volume(wav, vol_rate=DEFAULT_VOL_RATE): ''' ボリュームデータを計算する。 二乗平均平方根(Root Mean Square)を計算。 ''' w = wav.getsampwidth() rate = wav.getframerate() buf = wav.readframes(wav.getnframes()) if wav.getnchannels() == 2: buf = audioop.tomono(buf, w, 0.5, 0.5) vol_nframes = wav.getnframes() * vol_rate / rate step = len(buf) / vol_nframes step = step + (step % w) vol = [] for i in range(vol_nframes): sec = float(i) / vol_rate wav_f = int(sec * rate) st = wav_f * w ed = st + step rms = audioop.rms(buf[st: ed], w) vol.append(rms) return vol
def normalize(self) -> 'Sample': """ Normalize the sample, meaning: convert it to the default samplerate, sample width and number of channels. When mixing samples, they should all have the same properties, and this method is ideal to make sure of that. """ if self.__locked: raise RuntimeError("cannot modify a locked sample") self.resample(params.norm_samplerate) if self.samplewidth != params.norm_samplewidth: # Convert to desired sample size. self.__frames = audioop.lin2lin(self.__frames, self.samplewidth, params.norm_samplewidth) self.__samplewidth = params.norm_samplewidth if params.norm_nchannels not in (1, 2): raise ValueError( "norm_nchannels has invalid value, can only be 1 or 2") if self.nchannels == 1 and params.norm_nchannels == 2: # convert to stereo self.__frames = audioop.tostereo(self.__frames, self.samplewidth, 1, 1) self.__nchannels = 2 elif self.nchannels == 2 and params.norm_nchannels == 1: # convert to mono self.__frames = audioop.tomono(self.__frames, self.__samplewidth, 1, 1) self.__nchannels = 1 return self
def encode(self, frame: Frame, force_keyframe: bool = False) -> Tuple[List[bytes], int]: assert isinstance(frame, AudioFrame) assert frame.format.name == "s16" assert frame.layout.name in ["mono", "stereo"] channels = len(frame.layout.channels) data = bytes(frame.planes[0]) timestamp = frame.pts # resample at 8 kHz if frame.sample_rate != SAMPLE_RATE: data, self.rate_state = audioop.ratecv( data, SAMPLE_WIDTH, channels, frame.sample_rate, SAMPLE_RATE, self.rate_state, ) timestamp = (timestamp * SAMPLE_RATE) // frame.sample_rate # convert to mono if channels == 2: data = audioop.tomono(data, SAMPLE_WIDTH, 1, 1) data = self._convert(data, SAMPLE_WIDTH) return [data], timestamp
def splitStereoAudio(path, fn, outputPath=None): if outputPath is None: outputPath = join(path, "split_audio") if not os.path.exists(outputPath): os.mkdir(outputPath) name = os.path.splitext(fn)[0] fnFullPath = join(path, fn) leftOutputFN = join(outputPath, "%s_L.wav" % name) rightOutputFN = join(outputPath, "%s_R.wav" % name) audiofile = wave.open(fnFullPath, "r") params = audiofile.getparams() sampwidth = params[1] nframes = params[3] audioFrames = audiofile.readframes(nframes) for leftFactor, rightFactor, outputFN in ((1, 0, leftOutputFN), (0, 1, rightOutputFN)): monoAudioFrames = audioop.tomono(audioFrames, sampwidth, leftFactor, rightFactor) params = tuple([1, ] + list(params[1:])) outputAudiofile = wave.open(outputFN, "w") outputAudiofile.setparams(params) outputAudiofile.writeframes(monoAudioFrames)
def read(self, size=-1): #@noter:崔冰 #@description: 将待处理的音频文件的字节序和位数进行相应处理 buffer = self.audio_reader.readframes( self.audio_reader.getnframes() if size == -1 else size) if not isinstance(buffer, bytes): buffer = b"" # 解决方法 https://bugs.python.org/issue24608 sample_width = self.audio_reader.getsampwidth() if not self.little_endian: # 大端字节序格式转换成小端字节序 if hasattr( audioop, "byteswap" ): # ``audioop.byteswap``只在Python 3.4 中有(python3.4+ 支持24位) buffer = audioop.byteswap(buffer, sample_width) else: # 手动反转每个样本的字节数,但速度较慢 buffer = buffer[sample_width - 1::-1] + b"".join( buffer[i + sample_width:i:-1] for i in range(sample_width - 1, len(buffer), sample_width)) # 解决方法 https://bugs.python.org/issue12866 if self.samples_24_bit_pretending_to_be_32_bit: # 我们需要将样本从24位转换为32位,然后才能使用“audioop”功能处理 buffer = b"".join(b"\x00" + buffer[i:i + sample_width] for i in range(0, len(buffer), sample_width) ) # 由于我们处于小端字节序,因此我们在每个24位采样前添加一个零字节以获取32位采样 sample_width = 4 # 确保我们将缓冲区转换为32位音频,然后将其转换为24位音频 if self.audio_reader.getnchannels() != 1: # 立体声音频 buffer = audioop.tomono(buffer, sample_width, 1, 1) # 将立体声音频转换成单声道 return buffer
def to_mono(self): if self.channels == 1: return self elif self.channels == 2: return Audio(channels=1, width=self.width, rate=self.rate, data=audioop.tomono(self.data, self.width, 0.5, 0.5)) else: raise ValueError(f"Can't convert audio with channels={self.channels}")
def read(self, size=-1): buffer = self.audio_reader.readframes( self.audio_reader.getnframes() if size == -1 else size) if not isinstance(buffer, bytes): buffer = b"" # workaround for https://bugs.python.org/issue24608 sample_width = self.audio_reader.getsampwidth() if not self.little_endian: # big endian format, convert to little endian on the fly if hasattr( audioop, "byteswap" ): # ``audioop.byteswap`` was only added in Python 3.4 (incidentally, that also means that we don't need to worry about 24-bit audio being unsupported, since Python 3.4+ always has that functionality) buffer = audioop.byteswap(buffer, sample_width) else: # manually reverse the bytes of each sample, which is slower but works well enough as a fallback buffer = buffer[sample_width - 1::-1] + b"".join( buffer[i + sample_width:i:-1] for i in range(sample_width - 1, len(buffer), sample_width)) # workaround for https://bugs.python.org/issue12866 if self.samples_24_bit_pretending_to_be_32_bit: # we need to convert samples from 24-bit to 32-bit before we can process them with ``audioop`` functions buffer = b"".join( "\x00" + buffer[i:i + sample_width] for i in range(0, len(buffer), sample_width) ) # since we're in little endian, we prepend a zero byte to each 24-bit sample to get a 32-bit sample if self.audio_reader.getnchannels() != 1: # stereo audio buffer = audioop.tomono(buffer, sample_width, 1, 1) # convert stereo audio data to mono return buffer
def downsampleWav(src, dst, inrate=44100, outrate=22050, inchannels=1, outchannels=1): if not os.path.exists(src): print 'Source not found!' return False s_read = wave.open(src, 'r') s_write = wave.open(dst, 'w') n_frames = s_read.getnframes() data = s_read.readframes(n_frames) try: converted = audioop.ratecv(data, 2, inchannels, inrate, outrate, None) if outchannels == 1: converted = audioop.tomono(converted[0], 2, 1, 0) except: print 'Failed to downsample wav' return False try: s_write.setparams((outchannels, 2, outrate, 0, 'NONE', 'Uncompressed')) s_write.writeframes(converted) except: print 'Failed to write wav' return False try: s_read.close() s_write.close() except: print 'Failed to close wav files' return False return True
def testtomono(data): data2 = '' for d in data[0]: data2 = data2 + d + d if audioop.tomono(data2, 1, 0.5, 0.5) <> data[0]: return 0 return 1
def calculate_volume(sphfile,leftch,rightch,SIR): # compute volume level from sph header. # Useful to compute the signal-to-interference # level of stereo sph files. with open(sphfile) as s: bytes = s.read() s1_bytes1 = audioop.tomono(bytes,2,leftch,rightch) s2_bytes1 = audioop.tomono(bytes,2,rightch,leftch) s1_bytes = s1_bytes1[1024:] s2_bytes = s2_bytes1[1024:] e1 = audioop.rms(s1_bytes,2)*1.0 # make float by multiplying by 1.0 e2 = audioop.rms(s2_bytes,2)*1.0 print e1,e2 vol = math.exp(-1.0*float(SIR)/10)*e1/e2 return vol
def write(self, audio): if self._user_resample: # The user can also specify to have ULAW encoded source audio # converted to linear encoding upon being written. if self._user_resample.ulaw2lin: # Possibly skip downsampling if this was triggered, as # while ULAW encoded audio can be sampled at rates other # than 8KHz, since this is telephony related, it's unlikely. audio = audioop.ulaw2lin(audio, 2) # If the audio isn't already sampled at 8KHz, # then it needs to be downsampled first if self._user_resample.rate != 8000: audio, self._user_resample.ratecv_state = audioop.ratecv( audio, 2, self._user_resample.channels, self._user_resample.rate, 8000, self._user_resample.ratecv_state, ) # If the audio isn't already in mono, then # it needs to be downmixed as well if self._user_resample.channels == 2: audio = audioop.tomono(audio, 2, 1, 1) self._tx_q.put(audio)
def splitStereoAudio(path, fn, outputPath=None): if outputPath is None: outputPath = join(path, "split_audio") if not os.path.exists(outputPath): os.mkdir(outputPath) name = os.path.splitext(fn)[0] fnFullPath = join(path, fn) leftOutputFN = join(outputPath, "%s_L.wav" % name) rightOutputFN = join(outputPath, "%s_R.wav" % name) audiofile = wave.open(fnFullPath, "r") params = audiofile.getparams() sampwidth = params[1] nframes = params[3] audioFrames = audiofile.readframes(nframes) for leftFactor, rightFactor, outputFN in ((1, 0, leftOutputFN), (0, 1, rightOutputFN)): monoAudioFrames = audioop.tomono(audioFrames, sampwidth, leftFactor, rightFactor) params = tuple([ 1, ] + list(params[1:])) outputAudiofile = wave.open(outputFN, "w") outputAudiofile.setparams(params) outputAudiofile.writeframes(monoAudioFrames)
def _play(self, data, rate=16000, channels=1, width=2, spectrum=True): stream = self.pyaudio_instance.open( format=self.pyaudio_instance.get_format_from_width(width), channels=channels, rate=rate, output=True, # output_device_index=1, frames_per_buffer=CHUNK_SIZE, ) if isinstance(data, types.GeneratorType): for d in data: if self.stop_event.is_set(): break stream.write(d) if spectrum: if channels == 2: d = audioop.tomono(d, 2, 0.5, 0.5) self.queue.put(d) else: stream.write(data) stream.close()
def getLevel(in_data, leftChannel=True): normalize = 32767.0 leftFactor = 1 if leftChannel else 0 rightFactor = 1 if not leftChannel else 0 data = audioop.tomono(in_data, 2, leftFactor, rightFactor) amplitudel = ((audioop.max(data, 2))/normalize) level = (int(41+(20*(math.log10(amplitudel+(1e-40)))))) return level
def _push_up_some_data(self): if self.readdev is None: return (l, data,) = self.readdev.read() if self.readchannels == 2: data = audioop.tomono(data, 2, 1, 1) if self.encoder and data: self.encoder.handle_audio(data)
def findfit(scratch_frag, final_frag, sound_file): '''Calculates the offset (in seconds) between scratch_frag & final_frag. Both fragments are assumed to contain the same, loud "clapper" event. The SoundFile object is used for common stream parameters.''' import audioop nchannels = sound_file.stream.getnchannels() framerate = sound_file.stream.getframerate() width = sound_file.stream.getsampwidth() assert(width==2) # Simplify the sound streams to make it quicker to find a match. # Left channel only. if nchannels > 1: scratch_frag_ = audioop.tomono(scratch_frag, width, 1, 0) final_frag_ = audioop.tomono(final_frag, width, 1, 0) else: scratch_frag_ = scratch_frag final_frag_ = final_frag nchannels_ = 1 # Downsample to 8000/sec framerate_ = 8000 scratch_frag_,state =\ audioop.ratecv(scratch_frag_, width, nchannels_, framerate, framerate_, None) final_frag_,state =\ audioop.ratecv(final_frag_, width, nchannels_, framerate, framerate_, None) bytes_per_second_ = nchannels_ * framerate_ * width # Find the clapper in final length_samples = int(0.001 * framerate * nchannels_) # 0.1 sec final_off_samples = audioop.findmax(final_frag_, length_samples) # Search for a 2 second 'needle' centred on where we found the 'clapper' needle_bytes = 2 * bytes_per_second_ b0 = max(0, final_off_samples * width - int(needle_bytes/2)) print '"clapper" at final:', 1.0*b0/bytes_per_second_, 'sec' b1 = b0 + needle_bytes final_clapper_frag = final_frag_[b0:b1] scratch_off_samples,factor = audioop.findfit(scratch_frag_, final_clapper_frag) scratch_off_bytes = scratch_off_samples * width print 'match at scratch:', 1.0*scratch_off_bytes/bytes_per_second_, 'sec', " factor =",factor # Calculate the offset (shift) between the two fragments. shift_sec = (scratch_off_bytes - b0) * 1.0 / bytes_per_second_ print 'shift =', shift_sec, 'seconds' return shift_sec
def testtomono(data): if verbose: print 'tomono' data2 = '' for d in data[0]: data2 = data2 + d + d if audioop.tomono(data2, 1, 0.5, 0.5) <> data[0]: return 0 return 1
def read_audio( filename, stereo=True ): """reads a stereo audio file but returns a mono buffer""" wfile = wave.open( filename, 'r' ) buf = wfile.readframes( wfile.getnframes() ) # below, parameters are ( buffer, width, lfactor, rfactor) if stereo: return audioop.tomono( buf, 2, 1, 0 ) else: return buf
def _push_up_some_data(self): from audioop import tomono try: data = self.dev.read(320*self._channels) except IOError: return None if self._channels == 2: data = tomono(data, 2, 1, 1) if self.encoder and data: self.encoder.handle_audio(data)
def mono(self, left_factor=1.0, right_factor=1.0): """Make the sample mono (1-channel) applying the given left/right channel factors when downmixing""" assert not self.__locked if self.__nchannels == 1: return self if self.__nchannels == 2: self.__frames = audioop.tomono(self.__frames, self.__samplewidth, left_factor, right_factor) self.__nchannels = 1 return self raise ValueError("sample must be stereo or mono already")
def split_channels(adc_data): """ Args: adc_data: Bunch with the following fields: - data (binary string or list of binary strings): raw ADC data - sample_width (int): number of bytes per sample Returns: A Bunch with fields: - voltage (binary string): raw ADC data - current (binary string): raw ADC data - sample_width (int): number of bytes per sample """ data = Bunch() data.voltage = audioop.tomono(adc_data.data, adc_data.sample_width, 1, 0) data.current = audioop.tomono(adc_data.data, adc_data.sample_width, 0, 1) data.sample_width = adc_data.sample_width return data
def extractSubwav(fn, outputFN, startT, endT, singleChannelFlag, soxPath=None): if soxPath is None: soxPath = "sox" # Assumes it is in the user's path path, fnNoPath = os.path.split(fn) resampledAudioPath = join(path, "resampledAudio") resampledFN = join(resampledAudioPath, fnNoPath) audiofile = wave.open(fn, "r") params = audiofile.getparams() nchannels = params[0] sampwidth = params[1] framerate = params[2] comptype = params[4] compname = params[5] # If you are not using the default Julius training model, you might # need to change the value here. Results will fail if the sampling # frequency is different. if framerate != DEFAULT_FRAMERATE: if not os.path.exists(resampledFN): utils.makeDir(resampledAudioPath) sr = str(DEFAULT_FRAMERATE) soxCmd = "%s %s -r %s %s rate -v 96k" % (soxPath, fn, sr, resampledFN) os.system(soxCmd) if not os.path.exists(resampledFN): raise IncompatibleSampleFrequencyError(framerate, DEFAULT_FRAMERATE) audiofile = wave.open(resampledFN, "r") params = audiofile.getparams() nchannels = params[0] sampwidth = params[1] framerate = params[2] comptype = params[4] compname = params[5] # Extract the audio frames audiofile.setpos(int(framerate * startT)) audioFrames = audiofile.readframes(int(framerate * (endT - startT))) # Convert to single channel if needed if singleChannelFlag is True and nchannels > 1: audioFrames = audioop.tomono(audioFrames, sampwidth, 1, 0) nchannels = 1 outParams = [nchannels, sampwidth, framerate, len(audioFrames), comptype, compname] outWave = wave.open(outputFN, "w") outWave.setparams(outParams) outWave.writeframes(audioFrames)
def read(self, buf, source_channels): source_sample_width = pyaudio.get_sample_size(pyaudio.paInt16) * source_channels audio = buf[3:] try: # sometimes the data received is incomplete so reusing state # data from ratecv() sometimes results in errors (audio, _) = audioop.ratecv(audio, source_sample_width, source_channels, 48000, self.listener.sample_rate, None) audio = audioop.tomono(audio, self.listener.sample_width, 0.5, 0.5) self.listener.read(audio) except audioop.error, e: logger.warn("Error preparing sample", exc_info=True)
def convert_wave_data(f_rate,frame_count,sample_width,channels,data): """ Convert wave sample data into pleo format """ if channels==2: data = audioop.tomono(data,sample_width,1,1) data = audioop.mul(data,sample_width,0.97999999999999998) data = audioop.ratecv(data,sample_width,1,f_rate,11025,None,4,4)[0] if sample_width==1: data = audioop.bias(data,1,-128) data = audioop.lin2lin(data,1,2) data = audioop.mul(data,2,(1.0/256)) data = audioop.lin2adpcm(data,2,None)[0] return (11025,frame_count,sample_width,1,data)
def localize(): #Returns an error between -1 and 1 that specifies which direction to move the head to try: #Start an infite loop that gets and analyzes audio data alpha = .05 #The width of the sigmoid. Gotta tweak it a little. #Average difference avgDiff = 0 for i in range(0, 100): l, data = audioInput.read() if l > 0: lchan = audioop.tomono(data, 2, 1, 0) rchan = audioop.tomono(data, 2, 0, 1) lmax = audioop.max(lchan, 2) rmax = audioop.max(rchan, 2) #lowpass filter if lmax < 70 and rmax < 70: continue #calculage the difference in intensity. Positive difference means the source #is further to the left diff = lmax - rmax avgDiff = avgDiff + diff time.sleep(.001) #audio refresh rate avgDiff = avgDiff / 1000 print avgDiff #Logistic Sigmoid function! z = math.exp(-1 * alpha * avgDiff) error = (1 / (1 + z)) - 0.5 return error except Exception, e: print e
def calculate_max_min(self): self.Sound.write(".temp.wav") S = wave.open(".temp.wav","rb") Length = S.getnframes() BufferLen = Length / self.FragmentFactor Stream = S.readframes(BufferLen) while len(Stream) : Stream = audioop.lin2lin(Stream,2,1) Stream = audioop.tomono(Stream,1,1,-1) min_val, max_val = audioop.minmax(Stream, 1) self.Min_List.append(min_val) self.Max_List.append(max_val) Stream = S.readframes(BufferLen) S.close() os.remove(".temp.wav")