def get_swipe(dev='/dev/audio'): audio = ossaudiodev.open(dev, 'r') audio.setparameters(ossaudiodev.AFMT_S16_LE, 1, 44100) baselines = deque([2**15] * 4) bias = 0 while 1: data, power = get_chunk(audio, bias) baseline = sum(baselines) / len(baselines) * THRESHOLD_FACTOR print power, baseline, power / (baseline or 1) chunks = [] while power > baseline: print power, baseline, power / (baseline or 1), '*' chunks.append(data) data, power = get_chunk(audio, bias) if len(chunks) > 1: data = old_data + ''.join(chunks) + data while audioop.maxpp(data[:3000], 2) < baseline / 2: data = data[1000:] while audioop.maxpp(data[-3000:], 2) < baseline / 2: data = data[:-1000] return audioop.bias(data, 2, -audioop.avg(data, 2)) old_data = data bias = -audioop.avg(data, 2) baselines.popleft() baselines.append(power)
def get_swipe(audio): print("READY") baselines = deque([2**15] * 4) bias = 0 old_data = b"" while 1: data, power = get_chunk(audio, bias) baseline = sum(baselines) / len(baselines) * THRESHOLD_FACTOR logging.debug((power, baseline, power / (baseline or 1))) chunks = [] while power > baseline: logging.debug((power, baseline, power / (baseline or 1), "*")) chunks.append(data) data, power = get_chunk(audio, bias) if len(chunks) > 1: data = old_data + b"".join(chunks) + data while audioop.maxpp(data[:3000], 2) < baseline / 2: data = data[1000:] while audioop.maxpp(data[-3000:], 2) < baseline / 2: data = data[:-1000] return audioop.bias(data, 2, -audioop.avg(data, 2)) old_data = data bias = -audioop.avg(data, 2) baselines.popleft() baselines.append(power)
def transcribe(): p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) print("INFO: Starting Transcrition..") WS_WHISPER.send("start") subprocess.call(["aplay", "-q", "/home/pi/snowboy/resources/ding.wav"]) # Listen for four seconds or until threshold is no longer breached t_end = time.time() + 4 slid_win = deque(maxlen=SILENCE_LIMIT * (RATE / CHUNK)) read = stream.read(CHUNK) slid_win.append(math.sqrt(abs(audioop.avg(read, 4)))) while time.time() < t_end or sum([x > THRESHOLD for x in slid_win]) > 0: #if time.time() < t_end: #print("Relying on threshold here") read = stream.read(CHUNK) slid_win.append(math.sqrt(abs(audioop.avg(read, 4)))) WS_WHISPER.send_binary(read) # Sending one extra packet as a bit of sillence on the end greatly improves accuracy. read = stream.read(CHUNK) WS_WHISPER.send_binary(read) WS_WHISPER.send("stop") # Waiting to Recieve the text responce from Whisper. command_handler(WS_WHISPER.recv()) stream.close() p.terminate()
def test_bias(self): # Note: this test assumes that avg() works d1 = audioop.bias(data[0], 1, 100) d2 = audioop.bias(data[1], 2, 100) d4 = audioop.bias(data[2], 4, 100) self.assertEqual(audioop.avg(d1, 1), 101) self.assertEqual(audioop.avg(d2, 2), 101) self.assertEqual(audioop.avg(d4, 4), 101)
def testavg(data): if verbose: print 'avg' if audioop.avg(data[0], 1) <> 1 or \ audioop.avg(data[1], 2) <> 1 or \ audioop.avg(data[2], 4) <> 1: return 0 return 1
def testavg(data): if verbose: print 'avg' if audioop.avg(data[0], 1) != 1 or \ audioop.avg(data[1], 2) != 1 or \ audioop.avg(data[2], 4) != 1: return 0 return 1
def testbias(data): # Note: this test assumes that avg() works d1 = audioop.bias(data[0], 1, 100) d2 = audioop.bias(data[1], 2, 100) d4 = audioop.bias(data[2], 4, 100) if audioop.avg(d1, 1) <> 101 or \ audioop.avg(d2, 2) <> 101 or \ audioop.avg(d4, 4) <> 101: return 0 return 1
def testbias(data): if verbose: print 'bias' # Note: this test assumes that avg() works d1 = audioop.bias(data[0], 1, 100) d2 = audioop.bias(data[1], 2, 100) d4 = audioop.bias(data[2], 4, 100) if audioop.avg(d1, 1) != 101 or \ audioop.avg(d2, 2) != 101 or \ audioop.avg(d4, 4) != 101: return 0 return 1
def test_avg(self): for w in 1, 2, 3, 4: self.assertEqual(audioop.avg(b"", w), 0) self.assertEqual(audioop.avg(bytearray(), w), 0) self.assertEqual(audioop.avg(memoryview(b""), w), 0) p = packs[w] self.assertEqual(audioop.avg(p(5), w), 5) self.assertEqual(audioop.avg(p(5, 8), w), 6) self.assertEqual(audioop.avg(p(5, -8), w), -2) self.assertEqual(audioop.avg(p(maxvalues[w], maxvalues[w]), w), maxvalues[w]) self.assertEqual(audioop.avg(p(minvalues[w], minvalues[w]), w), minvalues[w]) self.assertEqual(audioop.avg(packs[4](0x50000000, 0x70000000), 4), 0x60000000) self.assertEqual(audioop.avg(packs[4](-0x50000000, -0x70000000), 4), -0x60000000)
def audio_int(num_samples=50): """ Gets average audio intensity of your mic sound. You can use it to get average intensities while you're talking and/or silent. The average is the avg of the 20% largest intensities recorded. """ print("Getting intensity values from mic.") p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) values = [ math.sqrt(abs(audioop.avg(stream.read(CHUNK), 4))) for x in range(num_samples) ] values = sorted(values, reverse=True) r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2) print(" Finished ") print(" Average audio intensity is ", r) stream.close() p.terminate() return r
def loop(self, stream): is_speaking = False num_silent_frames = 0 word_start_index = None word_end_index = None frames = [] i = 0 threshold = 700 while True: frame = stream.read(Listener.CHUNK) frames.append(frame) number = math.sqrt(abs(audioop.avg(frame, 4))) if is_speaking: if number < threshold: # Speech is quiet. num_silent_frames += 1 if num_silent_frames >= Listener.NUM_POST_FRAMES: # Speech has stopped. is_speaking = False word_end_index = i break else: # Speech is continuing. num_silent_frames = 0 elif number >= threshold: # Speech has started. is_speaking = True word_start_index = i i += 1 self.isolate(frames, word_start_index, word_end_index)
def get_idle_intensity(): p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) # Number of samples to be taken samples = 50 # Obtains and stores frequency intensities values = [] for i in range(samples): values.append(math.sqrt(abs(audioop.avg(stream.read(CHUNK), 4)))) # Sorts in descending order so that lowest intensities are first values = sorted(values, reverse=True) # idle intensity is the average of the lowest 20% of intensities idle_int = sum(values[:int(samples * 0.2)]) / int(samples * 0.2) # closes input stream stream.close() p.terminate() # returns the idle intensity return idle_int
def audio_int(num_samples=50): """ Gets average audio intensity of your mic sound. You can use it to get average intensities while you're talking and/or silent. The average is the avg of the 20% largest intensities recorded. """ p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, input_device_index=2, frames_per_buffer=CHUNK) cur_data = stream.read(CHUNK) values = [ math.sqrt(abs(audioop.avg(cur_data, 4))) for x in range(num_samples) ] values = sorted(values, reverse=True) r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2) #print (' Average audio intensity is r', int(r)) time.sleep(.1) stream.close() p.terminate() return r
def setup_mic(self, num_samples=10): """ Gets average audio intensity of your mic sound. You can use it to get average intensities while you're talking and/or silent. The average is the avg of the .2 of the largest intensities recorded. """ rospy.loginfo("Getting intensity values from mic.") p = pyaudio.PyAudio() stream = p.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RECORD_RATE, input=True, frames_per_buffer=self.CHUNK) values = [math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4))) for x in range(num_samples)] values = sorted(values, reverse=True) r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2) rospy.loginfo(" Finished ") rospy.loginfo(" Average audio intensity is " + str(r)) stream.close() p.terminate() # if r < 3000: # self.THRESHOLD = 3500 # else: # self.THRESHOLD = r + 100 self.THRESHOLD = r + 100 rospy.loginfo('Threshold:' + str(self.THRESHOLD))
def gerar_matriz(diretorio, nome_famoso, min_silence_len=100): silence_thresh = -30 sound_file = AudioSegment.from_wav(diretorio) audio_chunks = split_on_silence(sound_file, min_silence_len, silence_thresh) matriz = [] for chunk in enumerate(audio_chunks): c = chunk[1] linha = [] linha_pai = [] amplitude = c.max mediaItencidade = c.rms dBFS = c.dBFS max_dBFS = c.max_dBFS avg = audioop.avg(c.raw_data, c.sample_width) linha_pai.append(nome_famoso) linha.append(amplitude) linha.append(mediaItencidade) linha.append(avg) linha.append(dBFS) linha.append(max_dBFS) linha_pai.append(linha) matriz.append(linha_pai) #print(matriz) return matriz
def listen_command(stream, threshold=THRESHOLD): print("* Listening mic. ") cur_data = '' rel = RATE / CHUNK slid_win = deque(maxlen=int(SILENCE_LIMIT * rel)) prev_audio = deque(maxlen=int(PREV_AUDIO * rel)) started = False result = [] while True: cur_data = stream.read(CHUNK) slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4)))) if (sum([x > THRESHOLD for x in slid_win]) > 0): if (not started): print("Starting record of phrase") started = True result = list(prev_audio) result.append(cur_data) elif (started is True): print("Finished") break else: prev_audio.append(cur_data) print("* Done recording") return result
def audio_int(self, num_samples=50): """ Gets average audio intensity of your mic sound. You can use it to get average intensities while you're talking and/or silent. The average is the avg of the 20% largest intensities recorded. """ import math import audioop print("Getting intensity values from mic.") p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024) values = [math.sqrt(abs(audioop.avg(stream.read(1024), 4))) for x in range(num_samples)] values = sorted(values, reverse=True) r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2) print(" Finished ") print(" Average audio intensity is ", r) stream.close() p.terminate() return r
def auto_threshold(samples=50, avgintensities=0.2, padding=100): if __debug__: print("Auto-thresholding...") # start a stream. # # TODO: if we are to wrap these functions in a class, maybe # we should just create one pyaudio stream and open it in the # constructor. p = pyaudio.PyAudio() stream = p.open( format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK ) # Get a number of chunks from the stream as determined by the samples arg, # and calculate intensity. intensities = [math.sqrt(abs(audioop.avg(stream.read(CHUNK), 4))) for x in range(samples)] # sort the list from greatest to least. intensities = sorted(intensities, reverse=True) # get the first avgintensities percent values from the list. THRESHOLD = sum( intensities[:int(samples * avgintensities)] ) / int(samples * avgintensities) + padding # clean up stream.close() p.terminate() if __debug__: print("Threshold: ", THRESHOLD)
def setup_mic(self, num_samples=50): """ Gets average audio intensity of mic sound. The average is the avg of the .2 of the largest intensities recorded. """ print "Getting intensity values from mic." p = pyaudio.PyAudio() stream = p.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, frames_per_buffer=self.CHUNK) values = [math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4))) for x in range(num_samples)] values = sorted(values, reverse=True) r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2) print " Finished " print " Average audio intensity is ", r stream.close() p.terminate() if r < 3000: self.THRESHOLD = 3500 else: self.THRESHOLD = r
def calibrate_threshold(stream): """ Args: stream: """ global THRESHOLD print '* re-calibrating threshold (5s) *' old = THRESHOLD chunk = stream_config['frames_per_buffer'] rate = stream_config['rate'] frame_avgs = [] for _ in xrange(0, int(rate / chunk * 5)): data = stream.read(chunk) frame_avgs.append(math.sqrt(abs(audioop.avg(data, 4)))) avg = None if frame_avgs: avg = sum(frame_avgs) / len(frame_avgs) if not avg: print '[unable to re-calibrate - threshold remaining at {}]'.format( old) else: THRESHOLD = avg + THRESHOLD_OFFSET print '[successfully re-calibrated threshold (was {}, now is {})]'.format( old, THRESHOLD)
def setup_mic(self, num_samples=50): # Gets average audio intensity of your mic sound. print "Getting intensity values from mic." device_index = None for i in range(self.pyaudio_instance.get_device_count()): dev = self.pyaudio_instance.get_device_info_by_index(i) name = dev['name'].encode('utf-8') if dev['maxInputChannels'] == self.CHANNELS: device_index = i break if device_index is None: raise Exception( 'can not find input device with {} channel(s)'.format( self.CHANNELS)) p = pyaudio.PyAudio() stream = p.open( input=True, format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, frames_per_buffer=self.CHUNK, input_device_index=device_index, ) values = [ math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4))) for x in range(num_samples) ] values = sorted(values, reverse=True) r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2) print " Finished getting intensity values from mic" stream.close() p.terminate() return r
def background_thread(): p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) cur_data = '' rel = RATE / CHUNK slid_win = deque(maxlen=SILENCE_LIMIT * rel) avg_count = 4 ts = time.time() while True: cur_data = stream.read(CHUNK) slid_win.append(math.sqrt(abs(audioop.avg(cur_data, avg_count)))) s = sum([round(x) for x in slid_win]) status = None nstatus = 1 if s > THRESHOLD else 0 print(round(s)) if nstatus != status and (nstatus == 1 or time.time() - ts >= MIN_ANIMATION): status = nstatus ts = time.time() socketio.emit('response', { 'data': status == 1, 'level': s, 'count': status }, namespace='/skelly') stream.close() p.terminate()
def trimAllSilence(filename): p = pyaudio.PyAudio() wf = wave.open(filename, 'rb') chunk = 300 frames = [] data = wf.readframes(chunk) silence = True while len(data) > 0: data = wf.readframes(chunk) # adapted from https://docs.python.org/2/library/audioop.html#audioop.avg if (audioop.avg(data, 4) > 160000): frames.append(data) outfile = wave.open(filename[:-4] + '_trimmed.wav', 'wb') outfile.setnchannels(2) outfile.setsampwidth(p.get_sample_size(pyaudio.paInt16)) outfile.setframerate(44100) outfile.writeframes(b''.join(frames)) outfile.close() return filename[:-4] + '_trimmed.wav'
def audio_int(num_samples=50): """ Gets average audio intensity of your mic sound. You can use it to get average intensities while you're talking and/or silent. The average is the avg of the 20% largest intensities recorded. """ print "Getting intensity values from mic." p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input_device_index = 2, input=True, frames_per_buffer=CHUNK) values = [math.sqrt(abs(audioop.avg(stream.read(CHUNK), 4))) for x in range(num_samples)] values = sorted(values, reverse=True) r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2) print " Finished " print " Average audio intensity is ", r stream.close() p.terminate() return r
def setup_mic(self, num_samples=50): # """ Gets average audio intensity of your mic sound. You can use it to get # average intensities while you're talking and/or silent. The average # is the avg of the .2 of the largest intensities recorded. # """ print("Getting intensity values from mic.") p = pyaudio.PyAudio() stream = p.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, frames_per_buffer=self.CHUNK) values = [ math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4))) for x in range(num_samples) ] values = sorted(values, reverse=True) r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2) print(" Finished ") print(" Average audio intensity is ", r) stream.close() p.terminate() if r < 3000: self.THRESHOLD = 3500 else: self.THRESHOLD = r + 100
def listen_for_command(self, send=True): audio2send = [] check_thresh = collections.deque(maxlen=self.rel * self.silence_limit) prev_audio = collections.deque(maxlen=int(self.rel * self.prev_audio_limit)) started = False while True: while len(self.frames) == 0: pass current_audio = self.frames.pop() #self.stream_record() check_thresh.append(np.sqrt(np.abs(audioop.avg(current_audio, 4)))) val = sum([i > self.threshold for i in check_thresh]) print(val, end='\r') if val > 0: audio2send.append(current_audio) started = True elif started: msg = b"".join(list(prev_audio) + audio2send) if send: msg = b"Command " + msg self._send(msg) self._send(b"End") break else: return msg else: prev_audio.append(current_audio)
def auto_threshold(self, samples=100, avgintensities=0.2, padding=10): logging.debug("Auto-thresholding...") stream = self.p.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, frames_per_buffer=self.CHUNK) # Get a number of chunks from the stream as determined by the samples # arg, and calculate intensity. intensities = [ math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4))) for x in range(samples) ] #intensities = [math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4)))] # sort the list from greatest to least. intensities = sorted(intensities, reverse=True) print(intensities) # get the first avgintensities percent values from the list. self.THRESHOLD = sum(intensities[:int(samples * avgintensities)] ) / int(samples * avgintensities) + padding # clean up stream.close() logging.debug("Threshold: {}".format(self.THRESHOLD))
def setup_mic(self, num_samples=50): """ Gets average audio intensity of your mic sound. You can use it to get average intensities while you're talking and/or silent. The average is the avg of the .2 of the largest intensities recorded. FOR COMPUTER MIC: MIN threshold is around 3000. FOR USB MIC: MIN threshold is around 350 -change the required values on line 80 """ print "Getting intensity values from mic." p = pyaudio.PyAudio() stream = p.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, frames_per_buffer=self.CHUNK) values = [math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4))) for x in range(num_samples)] values = sorted(values, reverse=True) r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2) print(r) print " Finished " print " Average audio intensity is ", r stream.close() p.terminate() print "THRESHOLD IS: ", r if r < 350: self.THRESHOLD = 350 else: self.THRESHOLD = r + 100
def silence_analysys(self): self.log.info("* Listening mic. ") cur_data = '' # current chunk of audio data rel = self.RATE / self.CHUNK slid_win = deque(maxlen=self.SILENCE_LIMIT * rel) #Prepend audio from self.PREV_AUDIO secs before noise was detected prev_audio = deque(maxlen=self.PREV_AUDIO * rel) started = False audio2send = [] while True: cur_data = self.audioQueue.get() #Checks for keyboard interruption on main thread if cur_data == None: break slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4)))) #Mic is not silent if (sum([x > self.THRESHOLD for x in slid_win]) > 0): #New recording has started if (not started): self.log.info("Starting record of phrase") #VM's IP and port self.s = connection.Client('192.168.0.98', 5007) self.s.connect() started = True self.s.send_message(''.join(prev_audio)) self.s.send_message(cur_data) if self.save: audio2send.append(cur_data) #Recording was happening and mic became silent elif (started is True): #Save time of command issuing inittime = timeit.default_timer() self.timeQueue.put(inittime) self.log.info("Finished") self.s.destroy() if self.save: self.save_speech(list(prev_audio) + audio2send) audio2send = [] # Reset all started = False slid_win = deque(maxlen=self.SILENCE_LIMIT * rel) prev_audio = deque(maxlen=0.5 * rel) self.log.info("Listening ...") #Mic is silent else: prev_audio.append(cur_data)
def pre_processing_function(self): cur_data = '' rel = self.RATE / self.CHUNK slid_win = deque(maxlen=(self.SILENCE_LIMIT * rel)) audio2send = '' #slid_win.append(self.THRESHOLD) prev_audio = '' started = False while True: cur_data = self.pre_processing_q.get() slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4)))) #print ('s-'+str(slid_win[-1])) if (sum([x > self.THRESHOLD for x in slid_win]) > 0): if (not started): print("Starting record of phrase") started = True audio2send += cur_data elif (started is True): print("Finished") self.processing_q.put(prev_audio + audio2send) started = False slid_win = deque(maxlen=self.SILENCE_LIMIT * rel) prev_audio = '' audio2send = '' else: prev_audio += cur_data
def audio_int(self, num_samples=25, offset=1000): """ Gets average audio intensity of your mic sound. You can use it to get average intensities while you're talking and/or silent. The average is the avg of the 20% largest intensities recorded. """ self.log.info("Getting intensity values from mic.") p = pyaudio.PyAudio() self.log.info("RATE == " + str(self.RATE)) stream = p.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, frames_per_buffer=self.CHUNK) values = [ math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4))) for x in range(num_samples) ] values = sorted(values, reverse=True) r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2) self.log.info(" Finished ") self.log.info(" Average audio intensity is " + str(r)) stream.close() p.terminate() self.THRESHOLD = r + offset return r
def setup_mic(self, num_samples=10): """ Gets average audio intensity of your mic sound. You can use it to get average intensities while you're talking and/or silent. The average is the avg of the .2 of the largest intensities recorded. """ loginfo("Getting intensity values from mic.") p = pyaudio.PyAudio() stream = p.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RECORD_RATE, input=True, frames_per_buffer=self.CHUNK,) #input_device_index=5) values = [math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4))) for x in range(num_samples)] values = sorted(values, reverse=True) r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2) loginfo(" Finished ") loginfo(" Average audio intensity is " + str(r)) stream.close() p.terminate() # if r < 3000: # self.THRESHOLD = 3500 # else: # self.THRESHOLD = r + 100 self.THRESHOLD = r + 100 loginfo('Threshold:' + str(self.THRESHOLD))
def listen_for_speech(path, threshold=THRESHOLD, num_phrases=-1): """ Listens to Microphone, extracts phrases from it and sends it to Google's TTS service and returns response. a "phrase" is sound surrounded by silence (according to threshold). num_phrases controls how many phrases to process before finishing the listening process (-1 for infinite). """ #Open stream p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) print "* Listening mic. " audio2send = [] cur_data = '' # current chunk of audio data rel = RATE / CHUNK slid_win = deque(maxlen=SILENCE_LIMIT * rel) #Prepend audio from 0.5 seconds before noise was detected prev_audio = deque(maxlen=PREV_AUDIO * rel) started = False n = num_phrases response = [] while (num_phrases == -1 or n > 0): cur_data = stream.read(CHUNK) slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4)))) #print slid_win[-1] if (sum([x > THRESHOLD for x in slid_win]) > 0): if (not started): print "Starting record of phrase" started = True audio2send.append(cur_data) elif (started is True): print "Finished" # The limit was reached, finish capture and deliver. filename = save_speech(path, list(prev_audio) + audio2send, p) print("Saved to " + filename) # Reset all started = False slid_win = deque(maxlen=SILENCE_LIMIT * rel) prev_audio = deque(maxlen=0.5 * rel) audio2send = [] n -= 1 print "Listening ..." else: prev_audio.append(cur_data) print "* Done recording" stream.close() p.terminate() return response
def run(self): """ Listens to Microphone, extracts phrases from it and calls pocketsphinx to decode the sound """ self.setup_mic() #Open stream p = pyaudio.PyAudio() stream = p.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, frames_per_buffer=self.CHUNK) print "* Mic set up and listening. " audio2send = [] cur_data = '' # current chunk of audio data rel = self.RATE / self.CHUNK slid_win = deque(maxlen=self.SILENCE_LIMIT * rel) #Prepend audio from 0.5 seconds before noise was detected prev_audio = deque(maxlen=self.PREV_AUDIO * rel) started = False while True: cur_data = stream.read(self.CHUNK) slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4)))) if sum([x > self.THRESHOLD for x in slid_win]) > 0: if started == False: print "Starting recording of phrase" started = True audio2send.append(cur_data) elif started: print "Finished recording, decoding phrase" filename = self.save_speech(list(prev_audio) + audio2send, p) r = self.decode_phrase(filename) # dj #with open("./output.txt", 'a+') as f: # rr = ' '.join(r) # f.write(rr+'\n') print "DETECTED: ", r # Removes temp audio file os.remove(filename) # Reset all started = False slid_win = deque(maxlen=self.SILENCE_LIMIT * rel) prev_audio = deque(maxlen=0.5 * rel) audio2send = [] print "Listening ..." else: prev_audio.append(cur_data) print "* Done listening" stream.close() p.terminate()
def test_avg(self): for w in 1, 2, 3, 4: self.assertEqual(audioop.avg(b'', w), 0) self.assertEqual(audioop.avg(bytearray(), w), 0) self.assertEqual(audioop.avg(memoryview(b''), w), 0) p = packs[w] self.assertEqual(audioop.avg(p(5), w), 5) self.assertEqual(audioop.avg(p(5, 8), w), 6) self.assertEqual(audioop.avg(p(5, -8), w), -2) self.assertEqual(audioop.avg(p(maxvalues[w], maxvalues[w]), w), maxvalues[w]) self.assertEqual(audioop.avg(p(minvalues[w], minvalues[w]), w), minvalues[w]) self.assertEqual(audioop.avg(packs[4](0x50000000, 0x70000000), 4), 0x60000000) self.assertEqual(audioop.avg(packs[4](-0x50000000, -0x70000000), 4), -0x60000000)
def avg(self): """ Return the average of all the frames. @return the average """ return audioop.avg(self.frames, self.sampwidth)
def drop_first_last(grades): """ 求课程平均分,去掉最高分,去掉最低分 :param grades: 课程分数序列 :return: 平均分 """ first, *middle, last = grades return avg(*middle)
def listen_for_speech(threshold=THRESHOLD, num_phrases=1): """ Listens to Microphone, extracts phrases from it and sends it to Google's TTS service and returns response. a "phrase" is sound surrounded by silence (according to threshold). num_phrases controls how many phrases to process before finishing the listening process (-1 for infinite). """ #Open stream p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) print ("* Listening mic. ") audio2send = [] cur_data = '' # current chunk of audio data rel = int(RATE/CHUNK) slid_win = deque(maxlen=SILENCE_LIMIT * rel) #Prepend audio from 0.5 seconds before noise was detected prev_audio = deque(maxlen=int(PREV_AUDIO * rel)) started = False n = num_phrases response = [] while (num_phrases == -1 or n > 0): cur_data = stream.read(CHUNK) slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4)))) #print slid_win[-1] if(sum([x > THRESHOLD for x in slid_win]) > 0): if(not started): print ("Starting record of phrase") started = True audio2send.append(cur_data) elif (started is True): print ("Finished") # The limit was reached, finish capture and deliver. filename = save_speech(list(prev_audio) + audio2send, p) # Send file to Google and get response r = stt_google_wav(filename) # Reset all started = False slid_win = deque(maxlen=SILENCE_LIMIT * rel) prev_audio = deque(maxlen=int(0.5 * rel) ) audio2send = [] n -= 1 else: prev_audio.append(cur_data) print ("* Done recording") stream.close() p.terminate() return r
def listen_for_speech(): """ Does speech recognition using Google's speech recognition service. Records sound from microphone until silence is found and save it as WAV and then converts it to FLAC. Finally, the file is sent to Google and the result is returned. """ #config chunk = 512 FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 48000 THRESHOLD = 180 #The threshold intensity that defines silence signal (lower than). SILENCE_LIMIT = 2 #Silence limit in seconds. The max ammount of seconds where only silence is recorded. When this time passes the recording finishes and the file is delivered. #open stream p = pyaudio.PyAudio() stream = p.open(format = FORMAT, channels = CHANNELS, rate = RATE, input = True, frames_per_buffer = chunk) print "* listening. CTRL+C to finish." all_m = [] data = '' SILENCE_LIMIT = 2 rel = RATE/chunk slid_win = deque(maxlen=SILENCE_LIMIT*rel) started = False os.system('mplayer -ao alsa:device=hw=0.0 /home/pi/miri/sounds/miri_start.mp3 &') while (True): data = stream.read(chunk) slid_win.append (abs(audioop.avg(data, 2))) if(True in [ x>THRESHOLD for x in slid_win]): if(not started): print "starting record" started = True all_m.append(data) elif (started==True): print "finished" #the limit was reached, finish capture and deliver filename = save_speech(all_m,p) #reset all started = False slid_win = deque(maxlen=SILENCE_LIMIT*rel) all_m= [] stream.close() p.terminate() return 0 #print "listening ..." print "* done recording" stream.close() p.terminate()
def listen_for_speech(): """ Does speech recognition using Google's speech recognition service. Records sound from microphone until silence is found and save it as WAV and then converts it to FLAC. Finally, the file is sent to Google and the result is returned. """ #config chunk = 1024 rate = 16000 threshold = 180 #The threshold intensity that defines silence signal silence_limit = 3 #Silence limit in seconds which stop the recording ERROR_HANDLER_FUNC = CFUNCTYPE(None, c_char_p, c_int, c_char_p, c_int, c_char_p) c_error_handler = ERROR_HANDLER_FUNC(py_error_handler) asound = cdll.LoadLibrary('/usr/lib32/libasound.so.2') asound.snd_lib_error_set_handler(c_error_handler) #open stream pya = pyaudio.PyAudio() stream = pya.open(format=pyaudio.paInt16, channels=1, rate=rate, input=True, frames_per_buffer=chunk) #print("* listening. CTRL+C to finish.") all_m = [] data = '' rel = int(rate/chunk) slid_win = deque(maxlen=silence_limit*rel) started = False while True: data = stream.read(chunk) slid_win.append(abs(audioop.avg(data, 2))) if True in [x > threshold for x in slid_win]: if not started: print("starting record") started = True all_m.append(data) elif started: print("finished") #the limit was reached, finish capture and deliver filename = save_speech(all_m, pya) stt_google_wav(filename) #reset all started = False slid_win = deque(maxlen=silence_limit*rel) all_m = [] print("listening ...") #print("* done recording") stream.close() pya.terminate()
def listen_for_speech(): """ Does speech recognition using Google's speech recognition service. Records sound from microphone until silence is found and save it as WAV and then converts it to FLAC. Finally, the file is sent to Google and the result is returned. """ #config chunk = 1024 FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 16000 THRESHOLD = 250 #The threshold intensity that defines silence signal (lower than). SILENCE_LIMIT = 3 #Silence limit in seconds. The max ammount of seconds where only silence is recorded. When this time passes the recording finishes and the file is delivered. #open stream p = pyaudio.PyAudio() stream = p.open(format = FORMAT, channels = CHANNELS, rate = RATE, input = True, frames_per_buffer = chunk) all_m = [] data = '' rel = RATE/chunk slid_win = deque(maxlen=SILENCE_LIMIT*rel) started = False print "listening ..." while (True): data = stream.read(chunk) slid_win.append (abs(audioop.avg(data, 2))) # print slid_win[-1] # print last sample if(True in [ x>THRESHOLD for x in slid_win]): if(not started): print "start recording ..." started = True all_m.append(data) elif (started==True): print "... finish recording" #the timeout limit was reached, finish capture and deliver filename = save_speech(all_m,p) stt_google_wav(filename) #reset all started = False slid_win = deque(maxlen=SILENCE_LIMIT*rel) all_m= [] print "listening ..." print "* done recording" stream.close() p.terminate()
def listen_for_speech(): """ Does speech recognition using Google's speech recognition service. Records sound from microphone until silence is found and save it as WAV and then converts it to FLAC. Finally, the file is sent to Google and the result is returned. """ #config chunk = 1024 FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 16000 THRESHOLD = 180 #The threshold intensity that defines silence signal (lower than). SILENCE_LIMIT = 3 #Silence limit in seconds. The max ammount of seconds where only silence is recorded. When this time passes the recording finishes and the file is delivered. ERROR_HANDLER_FUNC = CFUNCTYPE(None, c_char_p, c_int, c_char_p, c_int, c_char_p) c_error_handler = ERROR_HANDLER_FUNC(py_error_handler) asound = cdll.LoadLibrary('/usr/lib32/libasound.so.2') asound.snd_lib_error_set_handler(c_error_handler) #open stream p = pyaudio.PyAudio() stream = p.open(format = FORMAT, channels = CHANNELS, rate = RATE, input = True, frames_per_buffer = chunk) #print("* listening. CTRL+C to finish.") all_m = [] data = '' SILENCE_LIMIT = 3 rel = int(RATE/chunk) slid_win = deque(maxlen=SILENCE_LIMIT*rel) started = False while (True): data = stream.read(chunk) slid_win.append (abs(audioop.avg(data, 2))) if(True in [ x>THRESHOLD for x in slid_win]): if(not started): print("starting record") started = True all_m.append(data) elif (started==True): print("finished") #the limit was reached, finish capture and deliver filename = save_speech(all_m,p) stt_google_wav(filename) #reset all started = False slid_win = deque(maxlen=SILENCE_LIMIT*rel) all_m= [] print("listening ...") #print("* done recording") stream.close() p.terminate()
def listen_for_speech(): """ Does speech recognition using Google's speech recognition service. Records sound from microphone until silence is found and save it as WAV and then converts it to FLAC. Finally, the file is sent to Google and the result is returned. """ # Open stream audio = pyaudio.PyAudio() stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) print "* listening. CTRL+C to finish." samples = [] chunks_per_second = RATE / CHUNK # 2s buffer for checking sound is louder than threshold silence_buffer = deque(maxlen=SILENCE_LIMIT * chunks_per_second) # Buffer used to append data before detection samples_buffer = deque(maxlen=SILENCE_LIMIT * RATE) started = False while (True): data = stream.read(CHUNK) silence_buffer.append(abs(audioop.avg(data, 2))) samples_buffer.extend(data) if (True in [x > THRESHOLD for x in silence_buffer]): if (not started): print "starting record" started = True samples.extend(samples_buffer) samples_buffer.clear() else: samples.extend(data) elif (started == True): print "finished" # The limit was reached, finish capture and deliver stream.stop_stream() submit_samples(samples, audio) # Reset all stream.start_stream() started = False silence_buffer.clear() samples = [] print "done" break print "* done recording" stream.close() audio.terminate()
def run(self): """ Listens to Microphone, extracts phrases from it and calls pocketsphinx to decode the sound """ self.setup_mic() #Open stream p = pyaudio.PyAudio() stream = p.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, frames_per_buffer=self.CHUNK) print "* Mic set up and listening. " audio2send = [] cur_data = '' # current chunk of audio data rel = self.RATE/self.CHUNK slid_win = deque(maxlen=self.SILENCE_LIMIT * rel) #Prepend audio from 0.5 seconds before noise was detected prev_audio = deque(maxlen=self.PREV_AUDIO * rel) started = False while True: cur_data = stream.read(self.CHUNK) slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4)))) if sum([x > self.THRESHOLD for x in slid_win]) > 0: if started == False: print "Starting recording of phrase" started = True audio2send.append(cur_data) elif started: print "Finished recording, decoding phrase" filename = self.save_speech(list(prev_audio) + audio2send, p) r = self.decode_phrase(filename) print "DETECTED: ", r # Removes temp audio file os.remove(filename) # Reset all started = False slid_win = deque(maxlen=self.SILENCE_LIMIT * rel) prev_audio = deque(maxlen=0.5 * rel) audio2send = [] print "Listening ..." else: prev_audio.append(cur_data) print "* Done listening" stream.close() p.terminate()
def avg(fragment, sampwidth): """ Return the average of all the samples. @param fragment (string) input frames. @param sampwidth (int) sample width of the frames. @return the average """ return audioop.avg(fragment, sampwidth)
def listen_for_speech(threshold=THRESHOLD, num_phrases=-1): #Open stream p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) print "* Listening mic. " audio2send = [] cur_data = '' # current chunk of audio data rel = RATE/CHUNK slid_win = deque(maxlen=SILENCE_LIMIT * rel) #Prepend audio from 0.5 seconds before noise was detected prev_audio = deque(maxlen=PREV_AUDIO * rel) started = False n = num_phrases response = [] while (num_phrases == -1 or n > 0): cur_data = stream.read(CHUNK) slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4)))) #print slid_win[-1] if(sum([x > THRESHOLD for x in slid_win]) > 0): if(not started): print "Starting record of phrase" started = True audio2send.append(cur_data) elif (started is True): print "Finished" # The limit was reached, finish capture and deliver. filename = save_speech(list(prev_audio) + audio2send, p) if num_phrases == -1: print "Uploading" #, r # Reset all started = False slid_win = deque(maxlen=SILENCE_LIMIT * rel) prev_audio = deque(maxlen=0.5 * rel) audio2send = [] n -= 1 print "Listening ..." else: prev_audio.append(cur_data) print "* Done recording" stream.close() p.terminate() return response
def listen(self, iterations): """Listen for a keyword""" #Get a handle on pyaudio pyaudio_handle = pyaudio.PyAudio() stream = pyaudio_handle.open(format=self.audio_format, channels=self.audio_channels, rate=self.audio_rate, input=True, frames_per_buffer=self.audio_chunk_size) audio_data_out = [] curr_chunk = '' window_size = self.audio_rate / self.audio_chunk_size audio_window = deque(maxlen = self.silence_limit * window_size) prev_audio = deque(maxlen = self.prev_audio_time * window_size) num = iterations response = [] recording = False #n = 0 while (iterations == -1 or num > 0): curr_chunk = stream.read(self.audio_chunk_size) sample = math.sqrt(abs(audioop.avg(curr_chunk, 4))) audio_window.append(sample) self.update_threshold(sample) #self.logger.logDebug("Audio: " + str(audio_window[-1]) +" " +str(n)) #n = n+1 if (sum( [x > self.threshold for x in audio_window]) > 0): if (not recording): self.logger.logDebug("Starting recording window") recording = True audio_data_out.append(curr_chunk) elif (recording): self.logger.logDebug("Finished recording window") filename = self.save_speech_window(list(prev_audio) + audio_data_out, pyaudio_handle) self.logger.logDebug("saved speech file: " + filename) #Perform STT operation here # Reset before going into the next recording window os.remove(filename) recording = False audio_window = deque(maxlen = self.silence_limit * window_size) prev_audio = deque(maxlen = self.prev_audio_time * window_size) audio_data_out = [] num = num - 1 else: prev_audio.append(curr_chunk)
def mix_signals(audio_data, bit_width): _data = [] size = max(len(data) for data in audio_data) for index in range(size): samples = '' for data in audio_data: try: samples += data[index] except IndexError: continue _data.append(audioop.avg(samples, bit_width)) return _data
def record(threshold=THRESHOLD, silence=SILENCE_LIMIT): """ Listens to Microphone, records voice until phrase ends. A "phrase" is sound surrounded by silence (according to threshold). :param int threshold: Intensity value that defines silence. lower than threshold is silence. :param silence: Max ammount of seconds where only silence is recorded. When this time passes the recording finishes. """ # Open stream p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) print ("* Listening mic. ") frames = [] cur_data = '' rel = RATE/CHUNK window = deque(maxlen=silence * rel) prev_audio = deque(maxlen=PREV_AUDIO * rel) start = False exit_loop = 0 while (exit_loop != 1): cur_data = stream.read(CHUNK) window.append(math.sqrt(abs(audioop.avg(cur_data, 4)))) if(sum([x > THRESHOLD for x in window]) > 0): if(not start): print ("recording..") start = True frames.append(cur_data) elif start is True: print ("Finished") save_audio(list(prev_audio) + frames, p) start = False window = deque(maxlen=silence * rel) prev_audio = deque(maxlen=0.5 * rel) frames = [] exit_loop = 1 else: prev_audio.append(cur_data) print ("Done recording") stream.close() p.terminate()
def test_avg(self): for w in 1, 2, 4: self.assertEqual(audioop.avg(b'', w), 0) p = packs[w] self.assertEqual(audioop.avg(p(5), w), 5) self .assertEqual(audioop.avg(p(5, 8), w), 6) self.assertEqual(audioop.avg(p(5, -8), w), -2) self.assertEqual(audioop.avg(p(maxvalues[w], maxvalues[w]), w), maxvalues[w]) self.assertEqual(audioop.avg(p(minvalues[w], minvalues[w]), w), minvalues[w]) self.assertEqual(audioop.avg(packs[4](0x50000000, 0x70000000), 4), 0x60000000) self.assertEqual(audioop.avg(packs[4](-0x50000000, -0x70000000), 4), -0x60000000)
def listen_for_speech(): """ Does speech recognition using Google's speech recognition service. Records sound from microphone until silence is found and save it as WAV and then converts it to FLAC. Finally, the file is sent to Google and the result is returned. """ stream = initStream() print "* listening. CTRL+C to finish." all_m = [] data = '' #SILENCE_LIMIT = 2 rel = vConfig.RATE/vConfig.INPUT_FRAMES_PER_BLOCK slid_win = deque(maxlen=vConfig.SILENCE_LIMIT*rel) started = False while (True): data = stream.read(vConfig.INPUT_FRAMES_PER_BLOCK) slid_win.append (abs(audioop.avg(data, 2))) if(True in [ x>vConfig.THRESHOLD for x in slid_win]): if(not started): print "starting record" started = True all_m.append(data) elif (started==True): print "finished" #the limit was reached, finish capture and deliver filename = save_speech(all_m,p) print filename textString = GoogleSpeech.stt(filename, vConfig.RATE) if ( textString != '' ): #os.system( "say " + str(textString) ) print "Initiating Configuration Lookup" #cmd = vConfig.getConfig( textString ) #if ( cmd is not None ): runCommand(textString) #reset all started = False slid_win = deque(maxlen=vConfig.SILENCE_LIMIT*rel) all_m= [] stream = initStream() print stream print "listening ... again" print "* done recording" stream.close()
def listen_for_speech(): """ Does speech recognition using Google's speech recognition service. Records sound from microphone until silence is found and save it as WAV and then converts it to FLAC. Finally, the file is sent to Google and the result is returned. """ #open stream p = pyaudio.PyAudio() stream = p.open(format = FORMAT, channels = CHANNELS, rate = RATE, input = True, frames_per_buffer = chunk) debug_print("* listening. CTRL+C to finish.") all_m = [] data = '' rel = RATE/chunk slid_win = deque(maxlen=SILENCE_LIMIT*rel) started = False try: while (True): data = stream.read(chunk) slid_win.append (abs(audioop.avg(data, 2))) if(True in [ x>THRESHOLD for x in slid_win]): if(not started): debug_print("starting record") started = True all_m.append(data) elif (started==True): debug_print("finished") print("***") #the limit was reached, finish capture and deliver filename = save_speech(all_m,p) stt_google_wav(filename) #reset all started = False slid_win = deque(maxlen=SILENCE_LIMIT*rel) all_m= [] debug_print("listening ...") except KeyboardInterrupt: debug_print("\nuser stopped the recording") if (started==True): filename = save_speech(all_m,p) stt_google_wav(filename) debug_print("* done recording") stream.close() p.terminate()
def get_swipe(): p = pyaudio.PyAudio() stream = p.open(format = FORMAT, channels = CHANNELS, rate = RATE, input = True, frames_per_buffer = CHUNK) baselines = deque([2**15] * 4) bias = 0 while 1: data, power = get_chunk(stream, bias) baseline = sum(baselines) / len(baselines) * THRESHOLD_FACTOR print power, baseline, power / (baseline or 1) chunks = [] while power > baseline: print power, baseline, power / (baseline or 1), '*' chunks.append(data) data, power = get_chunk(stream, bias) if len(chunks) > 1: data = old_data + ''.join(chunks) + data while audioop.maxpp(data[:3000], 2) < baseline / 2: data = data[1000:] while audioop.maxpp(data[-3000:], 2) < baseline / 2: data = data[:-1000] return audioop.bias(data, 2, -audioop.avg(data, 2)) old_data = data bias = -audioop.avg(data, 2) baselines.popleft() baselines.append(power)
def get_utterance(self): """ TODO(): Put a ton of information here """ # Do some initialization window_size = self.silence_time*(self.rate/self.chunk) sliding_window = collections.deque(maxlen=window_size) utterance = None # Setup the pyaudio stream logging.info('Listening for speech input') self.stream = self.pyaudio_handler.open(format=self.bits, channels=self.channels, rate=self.rate, input=True, frames_per_buffer=self.chunk) # Start recording some data started = False recording = True while (recording): # Get a chunk of data from the stream try: data = self.stream.read(self.chunk) except IOError as e: data = '\x00' * self.chunk logging.warning('Probably just a hiccup in the recording: ' + str(e)) # Get average of the last two bytes in the window and keep track of it sliding_window.append(abs(audioop.avg(data, 2))) # If the average of the chunk exceeds the threshold keep the data if(True in [x>self.threshold for x in sliding_window]): if(not started): logging.info('Speech input detected. Recording raw audio') started = True self._all_chunks.append(data) # If the average of the chunk is elif(started): logging.info('Speech input no longer detected') recording = False self._write() utterance = self._get_google_transciption() sliding_window = collections.deque(maxlen=window_size) self._all_chunks = [] self.stream.close() return utterance
def start_recording(output_rate, recording_time, def_id, device_index, input_rate, output_path): p = pyaudio.PyAudio() device = -1 if device_index == None: device = int(p.get_default_input_device_info()["index"]) else: device = int(device_index) device_info = p.get_device_info_by_index(device) rate = -1 if input_rate == None: rate = int(device_info["defaultSampleRate"]) else: rate = int(input_rate) if output_path == None: output_path = "" output_path += str(def_id) + "-" + time.strftime("%Y%m%d_%H%M%S") + "-" chunk_size = int(rate / int(output_rate)) chunks_per_recording = int(recording_time) * rate / chunk_size stream = p.open(format=pyaudio.paInt16, channels=1, rate=rate, frames_per_buffer=chunk_size, input=True, input_device_index=device) samples = np.ndarray(shape=(chunks_per_recording,), dtype=np.int32) i = 0 n = 0 while True: chunk = stream.read(chunk_size) samples[i] = audioop.avg(chunk, 2) i += 1 if i == chunks_per_recording: save_samples(samples, output_path + str(n) + ".txt") i = 0 n += 1 samples = np.ndarray(shape=(chunks_per_recording,), dtype=np.int32) stream.stop_stream() stream.close() p.terminate()