def to_wav(self, frames): if type(frames) == list: frames = ''.join(frames) sample_size = 2L if self.RATE is not 16000: # Every STT engine needs 16kHz try: frames, _ = audioop.ratecv( frames, sample_size, 1, self.RATE, 16000, None ) except audioop.error, e: if e.message == "not a whole number of frames": # This means that either the first or the last byte is rubbish # If we delete the wrong byte we will get crap, i.e. loud # noise. if audioop.rms(frames[0:-1], 2) > audioop.rms(frames[1:], 2): frames = frames[1:] else: frames = frames[0:-1] print "Try again" with open('test.raw', 'wb') as fp: fp.write(frames) frames, _ = audioop.ratecv( frames, sample_size, 1, self.RATE, 16000, None ) else: raise e
def __db_level(self, rms_mode=False): """ Returns the average audio volume level measured in dB (range -60 db to 0 db) If the sample is stereo, you get back a tuple: (left_level, right_level) If the sample is mono, you still get a tuple but both values will be the same. This method is probably only useful if processed on very short sample fragments in sequence, so the db levels could be used to show a level meter for the duration of the sample. """ maxvalue = 2**(8*self.__samplewidth-1) if self.nchannels == 1: if rms_mode: peak_left = peak_right = (audioop.rms(self.__frames, self.__samplewidth)+1)/maxvalue else: peak_left = peak_right = (audioop.max(self.__frames, self.__samplewidth)+1)/maxvalue else: left_frames = audioop.tomono(self.__frames, self.__samplewidth, 1, 0) right_frames = audioop.tomono(self.__frames, self.__samplewidth, 0, 1) if rms_mode: peak_left = (audioop.rms(left_frames, self.__samplewidth)+1)/maxvalue peak_right = (audioop.rms(right_frames, self.__samplewidth)+1)/maxvalue else: peak_left = (audioop.max(left_frames, self.__samplewidth)+1)/maxvalue peak_right = (audioop.max(right_frames, self.__samplewidth)+1)/maxvalue # cut off at the bottom at -60 instead of all the way down to -infinity return max(20.0*math.log(peak_left, 10), -60.0), max(20.0*math.log(peak_right, 10), -60.0)
def can_play(self): try: wav = subprocess.Popen('arecord -D plughw:1 -f dat -d 1 analog.wav', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) wav.wait() wav_file = wave.open('analog.wav', 'r') data = wav_file.readframes(wav_file.getnframes()) rms = audioop.rms(data, 2) os.remove('analog.wav') print rms if rms > 600: return True print 'no analog' return False except: try: pc = subprocess.Popen(['mplayer', 'analog.dump', '-ao', 'pcm:fast:file=analog_dump.wav', '-af', 'format=s16le'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) pc.wait() wav_file = wave.open('analog_dump.wav', 'r') wav_file.setpos(wav_file.getnframes()-100) data = wav_file.readframes(100) rms = audioop.rms(data, 2) self.clean('analog_dump.wav') print rms if rms > 600: return True print 'no analog' return False except Exception, e: print 'no analog' return false
def getWAVFileProperties(filePath): a = wave.open(filePath, 'r') nFrames = a.getnframes() framerate = a.getframerate() seconds = nFrames / float(framerate) # get rms value for each section of the audio framesPerSection = int(nFrames / float(settings.NUMBER_OF_AUDIO_FILE_BINS)) # note the truncation rmsValues = [] count = 0 for i in range(settings.NUMBER_OF_AUDIO_FILE_BINS-1): section = a.readframes(framesPerSection) count += framesPerSection r = audioop.rms(section, sampleWidth) rmsValues.append(r) # all the truncated time adds up. # we therefore read the last audio section to the end of the file, # rather than to an integer number of frames. last = nFrames - count section = a.readframes(last) r = audioop.rms(section, sampleWidth) rmsValues.append(r) return seconds, rmsValues
def listenToSurroundings(threadName): try: print "Started listening on thread %s" % threadName chunk = 1024 if config.debugging: rms = [] for i in range(0,10): p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16,channels=1,rate=44100,input=True,frames_per_buffer=chunk) data = stream.read(chunk) rmsTemp = audioop.rms(data,2) print rmsTemp rms.append(rmsTemp) rmsMean = numpy.mean(rms) rmsStd = numpy.std(rms) print rms stream.stop_stream() stream.close() p.terminate() volumeThreshold = 1050 # set after running the previous commands and looking at vtput print "Volume threshold set at %2.1f" % volumeThreshold lastInterupt = datetime.datetime.now() while (1): if config.gettingStillImages and config.gettingStillAudio: pass elif config.gettingVisualInput: time.sleep(5) else: print "Starting listening stream" lastInterupt = datetime.datetime.now() config.gettingStillAudio = 0 rmsTemp = 0 p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16,channels=1,rate=16000,input=True,frames_per_buffer=chunk) ## listen to surroundings while rmsTemp < volumeThreshold and not config.gettingVisualInput: data = stream.read(chunk) rmsTemp = audioop.rms(data,2) timeDifference = datetime.datetime.now() - lastInterupt if timeDifference.total_seconds() > config.audioHangout: config.gettingStillAudio = 1 if config.gettingStillAudio and config.gettingStillImages: break stream.stop_stream() stream.close() p.terminate() if not config.gettingVisualInput and not config.gettingStillAudio: config.timeTimeout = 0 # reset timeout config.gettingVoiceInput = 1 output = getUsersVoice(5) processInput(output) config.gettingVoiceInput = 0 except: import traceback print traceback.format_exc()
def listen(self, source, timeout=None): assert isinstance(source, AudioSource) and source.stream # record audio data as raw samples frames = collections.deque() assert self.pause_threshold >= self.quiet_duration >= 0 seconds_per_buffer = (source.CHUNK + 0.0) / source.RATE pause_buffer_count = int( math.ceil(self.pause_threshold / seconds_per_buffer) ) # number of buffers of quiet audio before the phrase is complete quiet_buffer_count = int( math.ceil(self.quiet_duration / seconds_per_buffer) ) # maximum number of buffers of quiet audio to retain before and after elapsed_time = 0 # store audio input until the phrase starts while True: elapsed_time += seconds_per_buffer if timeout and elapsed_time > timeout: # handle timeout if specified raise TimeoutError("listening timed out") buffer = source.stream.read(source.CHUNK) if len(buffer) == 0: break # reached end of the stream frames.append(buffer) # check if the audio input has stopped being quiet energy = audioop.rms(buffer, source.SAMPLE_WIDTH) # energy of the audio signal if energy > self.energy_threshold: break if len(frames) > quiet_buffer_count: # ensure we only keep the needed amount of quiet buffers frames.popleft() # read audio input until the phrase ends pause_count = 0 while True: buffer = source.stream.read(source.CHUNK) if len(buffer) == 0: break # reached end of the stream frames.append(buffer) # check if the audio input has gone quiet for longer than the pause threshold energy = audioop.rms(buffer, source.SAMPLE_WIDTH) # energy of the audio signal if energy > self.energy_threshold: pause_count = 0 else: pause_count += 1 if pause_count > pause_buffer_count: # end of the phrase break # obtain frame data for i in range(quiet_buffer_count, pause_buffer_count): frames.pop() # remove extra quiet frames at the end frame_data = b"".join(list(frames)) return AudioData(source.RATE, self.samples_to_flac(source, frame_data))
def listen(self, file_name = 'output.wav'): p = pyaudio.PyAudio() stream = p.open(format = self.FORMAT, channels = self.CHANNELS, rate = self.RATE, input = True, frames_per_buffer = self.CHUNK) frames = collections.deque() while True: buf = stream.read(self.CHUNK) if len(buf) == 0: break frames.append(buf) energy = audioop.rms(buf, self.SAMPLE_WIDTH) print(energy) if energy > self.energy_threshold: break if len(frames) > self.quiet_buffer_count: frames.popleft() print('Energy is above the threshold') pause_count = 0 while True: buf = stream.read(self.CHUNK) if len(buf) == 0: break frames.append(buf) energy = audioop.rms(buf, self.SAMPLE_WIDTH) print (energy) if energy > self.energy_threshold: pause_count = 0 else: pause_count += 1 if pause_count > self.pause_buffer_count: break for i in range(self.quiet_buffer_count, self.pause_buffer_count): frames.pop() frame_data = b"".join(list(frames)) stream.stop_stream() stream.close() p.terminate() wf = wave.open(file_name, 'wb') wf.setnchannels(self.CHANNELS) wf.setsampwidth(p.get_sample_size(self.FORMAT)) wf.setframerate(self.RATE) wf.writeframes(b''.join(frames)) wf.close()
def test_rms(self): for w in 1, 2, 3, 4: self.assertEqual(audioop.rms(b"", w), 0) self.assertEqual(audioop.rms(bytearray(), w), 0) self.assertEqual(audioop.rms(memoryview(b""), w), 0) p = packs[w] self.assertEqual(audioop.rms(p(*range(100)), w), 57) self.assertAlmostEqual(audioop.rms(p(maxvalues[w]) * 5, w), maxvalues[w], delta=1) self.assertAlmostEqual(audioop.rms(p(minvalues[w]) * 5, w), -minvalues[w], delta=1) self.assertEqual(audioop.rms(datas[1], 1), 77) self.assertEqual(audioop.rms(datas[2], 2), 20001) self.assertEqual(audioop.rms(datas[3], 3), 5120523) self.assertEqual(audioop.rms(datas[4], 4), 1310854152)
def rms(self): """ Return the root mean square of the frames. """ if self._nchannels == 1: return audioop.rms(self._frames, self._sampwidth) else: rms_sum = 0 for i in range(self._nchannels): new_frames = "" for j in range(i*self._sampwidth, len(self._frames), self._sampwidth*self._nchannels): for k in range(self._sampwidth): new_frames = new_frames + self._frames[j+k] rms_sum += audioop.rms(new_frames, self._sampwidth) return int(rms_sum/self._nchannels)
def run(self): try: block = self.stream.read(self.block) except IOError as e: print(e) return amplitude = audioop.rms(block, 2 ) if amplitude > self.background_level * self.sensitivity: # noisy self.noisycount += 1 print(self.noisycount) if self.noisycount > 3 / self.block_time : #we've had 3 seconds of noise, maybe background is louder. Recalibrate. self.background_level = self.listen_to_background() self.noisycount = 0 else: # quiet self.quietcount += 1 if 1 <= self.noisycount <= self.clap_length: #we just had a period of noisy blocks which match the length of a clap self.clap_counter += 1 self.block_counter = 0 #reset pattern timer if self.quietcount > 100/self.block_time: self.background_level = self.listen_to_background() self.quietcount = 0 self.noisycount = 0 if self.clap_counter >= 1: self.block_counter += 1 if self.block_counter >= self.pattern_limit: self.claps_detected() self.clap_counter = 0 self.block_counter = 0
def CheckNoiseLevel(RATE, CHUNK, stream): "Returns RMS noise level. Requires bit rate and chunk size." noiseRmsData = [] for i in range(0, int(RATE / CHUNK * 2)): data = stream.read(CHUNK) noiseRmsData.append(audioop.rms(data, 2)) return math.ceil(sum(noiseRmsData) / len(noiseRmsData)) * 2
def getSoundData(self): soundData = self.stream.read(512) r = unpack("1024h", soundData) i = 0 left = 0 right = 0 while i < len(r): left += r[i] right += r[i + 1] i += 2 j = 0 l = b"" r = b"" while j < len(soundData): r += soundData[j : j + 2] l += soundData[j + 2 : j + 4] j += 4 left = left / (len(r) / 2) right = right / (len(r) / 2) lfreq = self.mkHz.getHz(l, self.rate) rfreq = self.mkHz.getHz(r, self.rate) # self.data['leftAll'] = b64encode(l) # self.data['rightAll'] = b64encode(r) self.data["leftAll"] = lfreq self.data["rightAll"] = rfreq self.data["loudness"] = audioop.rms(soundData, 2) self.cs.sendto(json.dumps(self.data), (self.IP, self.PORT))
def main(): if len(sys.argv) < 2: print("Usage: %s filename.wav" % sys.argv[0]) sys.exit(-1) p = pyaudio.PyAudio() q = Queue() t = Thread(target=play_audio, args=(sys.argv[1], q)) t.daemon = True t.start() stream = open_mic_stream(p) # rolling window of samples of room noise samples = deque([1], maxlen=int(WINDOW_SIZE / INPUT_BLOCK_TIME)) try: while 1: mean = numpy.mean(samples) print mean q.put(mean/MAX) block = stream.read(INPUT_FRAMES_PER_BLOCK) amplitude = audioop.rms(block, 2) samples.append(amplitude) except KeyboardInterrupt: q.put(CLEANUP) stream.stop_stream() stream.close() p.terminate()
def calculate_volume(sphfile,leftch,rightch,SIR): # compute volume level from sph header. # Useful to compute the signal-to-interference # level of stereo sph files. with open(sphfile) as s: bytes = s.read() s1_bytes1 = audioop.tomono(bytes,2,leftch,rightch) s2_bytes1 = audioop.tomono(bytes,2,rightch,leftch) s1_bytes = s1_bytes1[1024:] s2_bytes = s2_bytes1[1024:] e1 = audioop.rms(s1_bytes,2)*1.0 # make float by multiplying by 1.0 e2 = audioop.rms(s2_bytes,2)*1.0 print e1,e2 vol = math.exp(-1.0*float(SIR)/10)*e1/e2 return vol
def _silent_detection(audio, silent_list, first_silent_done, logger): """Analyse audio chunk to determine if this is a silent return False: the user did NOT speak return None: the user is speaking or we are waiting for it return True: the user had finished to speack """ # Get rms for this chunk audio_rms = audioop.rms(audio, 2) # Detect first silent if first_silent_done is False: logger.debug("Audio level: %s", audio_rms) if audio_rms < THRESHOLD: logger.debug("Waiting for user speaking") silent_list.append(True) else: logger.debug("User is maybe starting to speak") silent_list.append(False) if len([s for s in silent_list if s is False]) > 5: logger.debug("User is starting to speak") silent_list = [] first_silent_done = True if len(silent_list) > FS_NB_CHUNK: logger.debug("The user did NOT speak") return False else: silent_list.append(True if audio_rms < THRESHOLD else False) if len(silent_list) > NB_CHUNK: logger.debug("The user is speaking. Level: %d", audio_rms) silent_list.pop(0) if len(silent_list) == NB_CHUNK and all(silent_list): logger.debug("The user has finished to speak") return True return None
def GetRMSAmplitude(self, time, sampleDur): startframe = int(round(time * self.wave_reference.getframerate())) samplelen = int(round(sampleDur * self.wave_reference.getframerate())) self.wave_reference.setpos(startframe) frame = self.wave_reference.readframes(samplelen) width = self.wave_reference.getsampwidth() return audioop.rms(frame,width)
def calc_volume(wav, vol_rate=DEFAULT_VOL_RATE): ''' ボリュームデータを計算する。 二乗平均平方根(Root Mean Square)を計算。 ''' w = wav.getsampwidth() rate = wav.getframerate() buf = wav.readframes(wav.getnframes()) if wav.getnchannels() == 2: buf = audioop.tomono(buf, w, 0.5, 0.5) vol_nframes = wav.getnframes() * vol_rate / rate step = len(buf) / vol_nframes step = step + (step % w) vol = [] for i in range(vol_nframes): sec = float(i) / vol_rate wav_f = int(sec * rate) st = wav_f * w ed = st + step rms = audioop.rms(buf[st: ed], w) vol.append(rms) return vol
def pushData(self, unit_data): rms = audioop.rms(unit_data, 2) logging.info("rms=%f" % rms) if rms < self.SICLENCE_THRESH: if self.segout == False: self.wav.writeframes(unit_data) self.silence_unit_cnt = self.silence_unit_cnt + 1 if self.silence_unit_cnt >= self.SILENCE_UNIT_MAX: #when in a segment, and enough of silence, #make a new segment self.segout = True oldfn = self.wavfn; # recognize with it self.nextWav() else: if self.segout == True: #goes in self.segout = False #push pre silences in for d in self.pre_silences: self.wav.writeframes(d) self.wav.writeframes(unit_data) self.pre_silences.append(unit_data) if len(self.pre_silences) >= self.SILENCE_UNIT_MAX: self.pre_silences.pop()
def listen(stream, queue): try: while not exit: stream.start_stream() print('Listening...') for i in range(0,size): data = stream.read(frame) ar = numpy.fromstring(data, dtype=numpy.int16) data2 = resample(ar, (16000./48000.), 'linear') q.put(data2) # samps = numpy.fromstring(data, dtype=numpy.int16) # print (samps, q.qsize()) rms = audioop.rms(data, 2) print rms stream.stop_stream() if exit: sys.exit() q.join() except IOError: print('ERROR!!!!') pass stream.stop_stream() stream.close() pyaud.terminate() print "----------------------------------------------------------------------------------------------------------------"
def adjust_for_ambient_noise(self, source, duration = 1): """ Adjusts the energy threshold dynamically using audio from ``source`` (an ``AudioSource`` instance) to account for ambient noise. Intended to calibrate the energy threshold with the ambient energy level. Should be used on periods of audio without speech - will stop early if any speech is detected. The ``duration`` parameter is the maximum number of seconds that it will dynamically adjust the threshold for before returning. This value should be at least 0.5 in order to get a representative sample of the ambient noise. """ assert isinstance(source, AudioSource), "Source must be an audio source" assert source.stream is not None, "Audio source must be opened before recording - see documentation for `AudioSource`" assert self.pause_threshold >= self.non_speaking_duration >= 0 seconds_per_buffer = (source.CHUNK + 0.0) / source.SAMPLE_RATE elapsed_time = 0 # adjust energy threshold until a phrase starts while True: elapsed_time += seconds_per_buffer if elapsed_time > duration: break buffer = source.stream.read(source.CHUNK) energy = audioop.rms(buffer, source.SAMPLE_WIDTH) # energy of the audio signal # dynamically adjust the energy threshold using assymmetric weighted average damping = self.dynamic_energy_adjustment_damping ** seconds_per_buffer # account for different chunk sizes and rates target_energy = energy * self.dynamic_energy_ratio self.energy_threshold = self.energy_threshold * damping + target_energy * (1 - damping)
def amplitude_testing(): audio = pyaudio.PyAudio() stream = audio.open(format = FORMAT, channels = CHANNELS, rate = RATE, input = True, frames_per_buffer = CHUNK, input_device_index = DEVICE) print '=> testing...' # for i in xrange(SEC * RATE / CHUNK ): # data = stream.read(CHUNK) # print_debug(' | rms: '+str(audioop.rms(data, 2))) # time.sleep(0.01) utter,thres = 0,200 while True: try: data = stream.read(CHUNK) rms = audioop.rms(data, DEPTH / 8) if rms>thres: utter+=1 progress = min(MAX_LINE,rms/SLOPE) print_debug('['+'|'*progress+' '*(MAX_LINE-progress)+'] rms:'+str(rms)) time.sleep(0.01) except KeyboardInterrupt: print ; break stream.stop_stream() stream.close() audio.terminate() print 'number of utter: %d' % utter print 'utter: %g sec' % (float(utter*CHUNK)/RATE) print '=> end'
def collect(bits=50): CHUNK = 1024 FORMAT = pyaudio.paInt16 CHANNELS = 2 RATE = 48000 RECORD_SECONDS = 2 p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) frames = [] print "* recording" #for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)): for i in range(0, bits): data = stream.read(CHUNK) # root mean square (research this?) rms = int(audioop.rms(data, 2)) frames.append(0 if rms % 2 == 0 else 1) print "* done recording" stream.stop_stream() stream.close() p.terminate() return frames
def run(self): global killswitch print("Worker thread for %s online " % self.myName) print("Device description: " + str(self.myDev)) stream = get_stream(self.myDev) while not killswitch: try: data = stream.read(CHUNK) rms = audioop.rms(data, 2) pitch = find_pitch(data, self.myDev["rate"]) timestamp = int(round(time.time()*1000)) # pitch2 = max_frequency(data, self.myDev["rate"]) if rms > RMS_THRESHOLD \ and pitch > PITCH_THRESHOLD[0]: # and pitch < PITCH_THRESHOLD[1]: with mutex: print("\nName: %s" % self.myName) print("RMS: %d" % rms) print("Pitch: %d" % pitch) print("TS: %s" % (timestamp % 1000)) # print("Pitch2: %d" % pitch2) hit_add(rms, pitch, timestamp, self.myName) # push_data_to_server(client, rms, pitch, timestamp, self.myName) except IOError as e: print( "Error recording: %s" % (e) ) killswitch = True
def testMaxAudioWithBaselineShift(self): low_base = b"".join(["\x10\x00\x01\x00"] * 100) higher_base = b"".join(["\x01\x00\x00\x01"] * 100) source = MockSource() for i in range(100): source.stream.inject(low_base) source.stream.inject(higher_base) recognizer = ResponsiveRecognizer(None) sec_per_buffer = float(source.CHUNK) / (source.SAMPLE_RATE * source.SAMPLE_WIDTH) test_seconds = 30.0 while test_seconds > 0: test_seconds -= sec_per_buffer data = source.stream.read(source.CHUNK) energy = recognizer.calc_energy(data, source.SAMPLE_WIDTH) recognizer.adjust_threshold(energy, sec_per_buffer) higher_base_energy = audioop.rms(higher_base, source.SAMPLE_WIDTH) # after recalibration (because of max audio length) new threshold # should be >= 1.5 * higher_base_energy delta_below_threshold = ( recognizer.energy_threshold - higher_base_energy) min_delta = higher_base_energy * .5 assert abs(delta_below_threshold - min_delta) < 1
def check_silence(self, buf): volume = audioop.rms(buf, 2) if (volume > self.THRESHOLD): if (self.append == False): if (self.debug): print ('starting append mode') self.silence_timer = time.time() self.timer = time.time() for sbuf in self.silence_buffer: self.prepare.prepare(sbuf, volume) self.silence_buffer = [ ] self.append = True self.silence_counter = 0 else: self.silence_counter += 1 self.silence_buffer.append(buf) if (len(self.silence_buffer) > 3): del self.silence_buffer[0] if (self.out != None and self.out.closed != True): self.out.write(buf) if (self.append == True): self.prepare.prepare(buf, volume) if (self.append == True and self.silence_timer > 0 and self.silence_timer + processor.MAX_SLILENCE_AFTER_START < time.time() and self.live == True and self.endless_loop == False): self.stop("stop append mode because of silence") if (self.append == True and self.timer + processor.MAX_TIME < time.time() and self.live == True): self.stop("stop append mode because time is up") if (self.append == True and self.live == True and self.endless_loop == True and self.silence_counter > 300): self.append = False self.stop("endless loop silence detected")
def visual(self): """ Collects audio input data and returns volume levels and changes image based on data """ while 1: # l is length, d is captured data # reads both from audio stream l,data = self.input.read() # if l is 0, no audio data if l: # root mean square to avoid sign errors vol = audioop.rms(data,2) # prints volume levels print vol # loads image to screen at position 0,0 remap = int(self.remap_interval(vol)) self.screen.blit(self.images[remap],(0,0)) # updates screen pygame.display.flip() # exits program when ESC button is pressed for exit in pygame.event.get(): if exit.type == pygame.KEYDOWN: if (exit.key == pygame.K_ESCAPE): pygame.quit()
def processAudio(self, fileIn, offset): self.audioFileValues = [] for i in range(1, offset): self.audioFileValues.append(0) sceneFrameRate = "" if fileIn == "blank": return 0 else: waveFile = wave.open(fileIn, 'rb') if waveFile.getcomptype() == "NONE": sceneFPS = self.sceneFrameRate waveFrameRate = waveFile.getframerate() waveLength = waveFile.getnframes() numChannels = waveFile.getnchannels() spf = waveFrameRate / sceneFPS width = waveFile.getsampwidth() for i in range(1,waveLength/spf): rawdata = waveFile.readframes(spf) current_avg = audioop.rms(rawdata, width) self.audioFileValues.append(int(current_avg/100)) else: print "ERROR: UNSUPPORTED COMPRESSION TYPE" return self.audioFileValues
def levels(sound): buf = sound.tostring() rms = audioop.rms(buf, 2) / 32768.0 # signed peak = audioop.avgpp(buf, 2) / 32768.0 # signed r = [rms, rms] p = [peak, peak] d = [0, 0] return (r, p, d)
def listen(): while(True): data = stream.read(CHUNK) #if you heard a bang. if (audioop.rms(data, 2) > TAP_LIMIT): print "BANG" #and after TAP_GAP seconds time.sleep(TAP_GAP) print "NOW" #within VAR_SECONDS for i in range(0, int(RATE / CHUNK * VAR_SECONDS)): data = stream.read(CHUNK) #you hear another bang if (audioop.rms(data, 2) > TAP_LIMIT): #do something b() return print "missed it"
def fx_noise_cancel(chunk_p, tres_p): if len(chunk_p) != 2 * CHUNK: print('[echo] chunk size is not %d but %d' % (2 * CHUNK, len(chunk_p))) return chunk_p power = audioop.rms(chunk_p, 2) / float(math.pow(2, 15)) if power < tres_p: chunk_p = audioop.mul(chunk_p, 2, 0) return chunk_p
def record_cmd(self, filename, duration): filedata = [] activesecs = 0 inactivesecs = 0 self.audioInStream.start_stream() wf = wave.open(join(self.workingDirectory, filename), 'wb') wf.setnchannels(SpeechServices.CHANNELS) wf.setsampwidth(self.audio.get_sample_size(SpeechServices.FORMAT)) wf.setframerate(SpeechServices.SAMPLERATE) while activesecs < duration or inactivesecs < 1: rmsdata = [] frames = [] for i in range( 0, int(SpeechServices.SAMPLERATE / SpeechServices.SAMPLESIZE)): sample = self.audioInStream.read(SpeechServices.SAMPLESIZE) frames.append(sample) filedata.append(sample) rmsdata.append(audioop.rms(sample, 2)) if sum(i > SpeechServices.THRESHOLD for i in rmsdata) > 10: print "activity detected..." activesecs += 1 inactivesecs = 0 else: if inactivesecs > 1: activesecs = 0 inactivesecs = 0 filedata = [] inactivesecs += 1 print "Writing audio data to file..." wf.writeframes(b''.join(filedata)) wf.close() self.audioInStream.stop_stream() return filedata
def stream(): import socket sock = socket.socket( socket.AF_INET, # Internet socket.SOCK_DGRAM) # UDP listen_thread = threading.Thread(target=udp_listener, args=(sock, )) listen_thread.daemon = True listen_thread.start() p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, output=True, frames_per_buffer=CHUNK) print("\n" * 30) print("Streaming, press ctrl+c to stop recording") while 1: try: chunk = stream.read(CHUNK) rms = audioop.rms(chunk, 2) if rms < RMS_SILENCE: continue sock.sendto(chunk, (UDP_IP, UDP_PORT)) except KeyboardInterrupt: print("STOPPING") break stream.stop_stream() stream.close() p.terminate() return
def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_region_size=6): # pylint: disable=too-many-locals """ Perform voice activity detection on a given audio file. """ reader = wave.open(filename) sample_width = reader.getsampwidth() rate = reader.getframerate() n_channels = reader.getnchannels() chunk_duration = float(frame_width) / rate n_chunks = int(math.ceil(reader.getnframes()*1.0 / frame_width)) energies = [] for _ in range(n_chunks): chunk = reader.readframes(frame_width) energies.append(audioop.rms(chunk, sample_width * n_channels)) threshold = percentile(energies, 0.2) elapsed_time = 0 regions = [] region_start = None for energy in energies: is_silence = energy <= threshold max_exceeded = region_start and elapsed_time - region_start >= max_region_size if (max_exceeded or is_silence) and region_start: if elapsed_time - region_start >= min_region_size: regions.append((region_start, elapsed_time)) region_start = None elif (not region_start) and (not is_silence): region_start = elapsed_time elapsed_time += chunk_duration return regions
def adjust_for_ambient_noise(self, source, duration=1): """ Adjusts the energy threshold dynamically using audio from ``source`` (an ``AudioSource`` instance) to account for ambient noise. Intended to calibrate the energy threshold with the ambient energy level. Should be used on periods of audio without speech - will stop early if any speech is detected. The ``duration`` parameter is the maximum number of seconds that it will dynamically adjust the threshold for before returning. This value should be at least 0.5 in order to get a representative sample of the ambient noise. """ assert isinstance(source, AudioSource), "Source must be an audio source" seconds_per_buffer = (source.CHUNK + 0.0) / source.RATE elapsed_time = 0 # adjust energy threshold until a phrase starts while True: elapsed_time += seconds_per_buffer if elapsed_time > duration: break buffer = source.stream.read(source.CHUNK) # check if the audio input has stopped being quiet energy = audioop.rms( buffer, source.SAMPLE_WIDTH) # energy of the audio signal if energy > self.energy_threshold: break # dynamically adjust the energy threshold using assymmetric weighted average damping = self.dynamic_energy_adjustment_damping**seconds_per_buffer # account for different chunk sizes and rates target_energy = energy * self.dynamic_energy_ratio self.energy_threshold = self.energy_threshold * damping + target_energy * ( 1 - damping) print "threshold"
def music_visualizer(file_extension, volume_precision, x_size=350, y_size=350): """creates music visualizer using pygame, pyalsa, and functions from recursive art.""" #audio setup inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, 0) inp.setchannels(1) inp.setrate(16000) inp.setformat(alsaaudio.PCM_FORMAT_S16_LE) inp.setperiodsize(160) #generate frames - uncomment to generate a whole new set of images to pull from #generate_movie(file_extension, x_size, y_size, volume_precision) #pygame load list of screens to choose from surface_list = [] for i in range(volume_precision): filename = file_extension + str(i) + '.png' current_image = pygame.image.load(filename) surface_list.append(current_image) #start visualizer pygame.init() size = (x_size, y_size) screen = pygame.display.set_mode(size) running = True while running: for event in pygame.event.get(): if event.type == QUIT: running = False l, data = inp.read() if l: loudness = audioop.rms(data, 2) frame = int(remap_interval(loudness, 500, 5000, 1, volume_precision)) try: current_surface = surface_list[frame] except IndexError: current_surface = surface_list[49] screen.blit(current_surface, (0, 0)) pygame.display.update() time.sleep(.001)
def audioIn(self, in_data, frame_count, time_info, flag): audio_data = np.frombuffer(in_data, dtype=np.single) self.onset = -1*self.onsetdetection(audio_data) self.meanVol -= self.meanVol/self.volLength self.meanVol += self.onset/self.volLength if self.onset > 65: self.silent = True else: self.silent = False #no need to run analysis if nothing is playing if not self.silent: spec = self.spectrum(self.w(audio_data)) self.barkbands = self.bark(spec) self.lowonset =-1*self.onsetdetection(self.lowpass(audio_data)) if self.lowonset > self.onsetmax: self.onsetmax = self.lowonset else: self.lowonset = self.onsetmax - self.lowonset self.lowonset = self.lowonset / self.onsetmax # self.loud = self.loudness(audio_data) # if self.loud > self.maxloud: # self.maxloud -= self.maxloud/20 # self.maxloud += self.loud / 20 # self.loud = self.loud / self.maxloud self.rms = audioop.rms(audio_data, 2) if self.rms > self.maxrms: self.maxrms = self.rms self.rms = max(((self.rms/self.maxrms)-0.8)*5, 0) return (audio_data, pyaudio.paContinue)
def main(): global boolean global stream ORIGINAL_VOLUME = outputVolume() p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) BASELINE = determineBaseline(stream) print(BASELINE) #THRESHOLD = BASELINE * 4 THRESHOLD = 1200 + BASELINE print("Button clicked.") #for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)): while boolean == True: data = stream.read(CHUNK, False) rms = audioop.rms(data, 2) # here's where you calculate the volume time.sleep(.1) print(rms) if rms > THRESHOLD: adjustVolume(ORIGINAL_VOLUME, rms, THRESHOLD) if resetOriginalVol(rms, THRESHOLD) == True: for i in range(int(currentVol), int(ORIGINAL_VOLUME), 10): osascript.osascript("set volume output volume " + str(i)) stream.stop_stream() stream.close() p.terminate()
def fx_echo(chunk_p): global time global loop_frames global loop_times global delay global intense global setup if len(chunk_p) != 2 * CHUNK: print('[echo] chunk size is not %d but %d' % (2 * CHUNK, len(chunk_p))) return chunk_p if not setup: print('[echo] varibales are not set') return chunk_p #save data loop_times.append(time) loop_frames.append(chunk_p) #count value i = 0 while i < len(loop_frames): if (time - loop_times[i]) % delay == 0 and time != loop_times[i]: loop_frames[i] = audioop.mul(loop_frames[i], 2, intense) if audioop.rms(loop_frames[i], 2) < 1: del loop_frames[i] del loop_times[i] i = i - 1 else: chunk_p = audioop.add(chunk_p, loop_frames[i], 2) i = i + 1 del i time = time + 1 return chunk_p
def listen(mic_id, t, m, l): stream = audio.open(format=pyaudio.paInt16, rate=44100, channels=1, input_device_index=mic_id, input=True, frames_per_buffer=4096) vol_arr = [0, 0, 0, 0, 0] threshold = 100 t.send(0) while True: data = stream.read(4096, exception_on_overflow=False) rms = audioop.rms(data, 2) vol_arr[0] = vol_arr[1] vol_arr[1] = vol_arr[2] vol_arr[2] = vol_arr[3] vol_arr[3] = vol_arr[4] vol_arr[4] = rms l.acquire() m.value = rms l.release() avg = (vol_arr[1] + vol_arr[2] + vol_arr[3]) / 3.0 if avg > vol_arr[0] and avg > vol_arr[4] and avg > threshold: t.send(time.time())
def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_region_size=6): reader = wave.open(filename) sample_width = reader.getsampwidth() rate = reader.getframerate() n_channels = reader.getnchannels() total_duration = reader.getnframes() / rate chunk_duration = float(frame_width) / rate n_chunks = int(total_duration / chunk_duration) energies = [] for i in range(n_chunks): chunk = reader.readframes(frame_width) energies.append(audioop.rms(chunk, sample_width * n_channels)) threshold = percentile(energies, 0.2) elapsed_time = 0 regions = [] region_start = None for energy in energies: is_silence = energy <= threshold max_exceeded = region_start and elapsed_time - region_start >= max_region_size if (max_exceeded or is_silence) and region_start: if elapsed_time - region_start >= min_region_size: regions.append((region_start, elapsed_time)) region_start = None elif (not region_start) and (not is_silence): region_start = elapsed_time elapsed_time += chunk_duration return regions
def record_db(): p = pyaudio.PyAudio() # start the PyAudio class stream = p.open(format=pyaudio.paInt16, channels=2, rate=RATE, input=True, frames_per_buffer=CHUNK, input_device_index=1) #uses default input device data = np.fromstring(stream.read(CHUNK), dtype=np.int16).astype(np.float) # omit outliers data[data <= 0.0000000001] = 0 # the rms gives us the power from the amplitude? # for proper val compared to background noise need to get samples from mic2 and divide by it db = 20 * np.log10(audioop.rms(data, 2)) # DB.append(db) stream.stop_stream() stream.close() p.terminate() return db
def listen(self, with_filter=False, print_rms=False): self.open_stream() print("listening now...") silence = True while silence: #try: # input = self.stream.read(self.CHUNK) #except: # continue input = self.stream.read(self.CHUNK) #data = int.from_bytes(input, byteorder='big', signed=True) if (with_filter): filtered = self.filter_stream(data) filtered_tuple = tuple(filtered) rms_value = self.rms(filtered_tuple, bytestream=False) else: #rms_value = self.rms(data, bytestream = False) rms_value = ao.rms(input, 1) if print_rms: print(rms_value) if (rms_value > self.THRESHOLD): silence = False
def do_test(self): counter = 0 self.debug("show") led_status = ["1","1","1","1","1","1","1","1","1","1","1","1"] led_location = [10,0,2,4,6,8] mic_rms = [0,0,0,0,0,0] for event in self.key.read_loop(): if event.type == ecodes.EV_KEY: if categorize(event).keystate == 2: time.sleep(4) if self.platform == "respeaker v2": os.system("arecord -d 1 -f S16_LE -r 16000 -Dhw:0,0 -c 8 /tmp/aaa.wav") with recorder.recorder(16000, 8, 16000 / 16) as mic: for chunk in mic.read_chunks(): for i in range(6): data = np.fromstring(chunk, dtype='int16') data = data[i::8].tostring() rms = audioop.rms(data, 2) #rms_db = 20 * np.log10(rms) #print('channel: {} RMS: {} dB'.format(i,rms)) if counter != 0: mic_rms[i] = mic_rms[i] + rms if counter == 10: break counter = counter + 1 break for i in range(6): mic_rms[i] = mic_rms[i] / 10 print('channel: {} RMS: {} dB'.format(i,mic_rms[i])) if self.parameters["value"] - self.parameters["bias"] > mic_rms[i] \ or self.parameters["value"] + self.parameters["bias"] < mic_rms[i]: led_status[led_location[i]] = "0" self.debug("".join(led_status)) return self.ret
def listen(mic, should_stop, shared_mic, lock): audio = pyaudio.PyAudio() stream = audio.open(format=pyaudio.paInt16, rate=44100, channels=1, input_device_index=mic, input=True, frames_per_buffer=4096) print('initializing mic ' + str(mic)) threshold = get_threshold(stream, should_stop) print('mic ' + str(mic) + ' threshold acquired') while True: data = stream.read(4096, exception_on_overflow=False) rms = audioop.rms(data, 2) if rms > threshold: lock.acquire() shared_mic.value = rms lock.release() if should_stop.value == 1: break stream.stop_stream() stream.close() audio.terminate() print('\nprocess ' + str(mic) + ' stopped')
def audio_callback(self, indata, outdata, frames: int, time_, status): rms = audioop.rms(indata, consts.BYTES_PER_SAMPLE) if rms < self.noise_threshold: audio = bytes(len(indata)) elif self.sent_frames_count <= self.release_frame: audio = bytes(indata) else: audio = bytes(indata) self.release_frame = self.sent_frames_count + self.release_frame_duration packet = packets.ClientVoiceFramePacket( frameId=time(), clientId=self.client_id, voiceFrame=self.encoder.encode(audio)) packet_bytes = pickle.dumps(packet, protocol=consts.PICKLE_PROTOCOL) if not self.closing: self.voice_socket.sendto(packet_bytes, self.voice_addr) self.sent_frames_count += 1 samples = self.voice_buffer.get_samples() if samples is not None and self.muted is False: outdata[:] = self.encoder.decode(samples) else: outdata[:] = bytes(len(outdata))
def calc_energy(sound_chunk, sample_width): return audioop.rms(sound_chunk, sample_width)
frames = wav.getnframes() rate = wav.getframerate() width = wav.getsampwidth() numSeconds = frames / float(rate) # threshold for detecting speech. white noise seems to fall below this threshold THRESHOLD = math.pow(300, 3) CHUNK_SIZE = 1024 original_rms_vals = [] rms_vals = [] for chunk_num in range(frames/CHUNK_SIZE): sample = wav.readframes(CHUNK_SIZE) rms = audioop.rms(sample, width) original_rms_vals.append(rms) rms_vals.append(math.pow(rms, 3)) longest_start = 0 longest_end = 0 cur_len = 0 cur_start = 0 segment_count = 0 # np_rms_vals = np.array(original_rms_vals) # std_dev = np.std(np_rms_vals) time_strings = []
def listen(self, source, timeout=None): """ Records a single phrase from ``source`` (an ``AudioSource`` instance) into an ``AudioData`` instance, which it returns. This is done by waiting until the audio has an energy above ``recognizer_instance.energy_threshold`` (the user has started speaking), and then recording until it encounters ``recognizer_instance.pause_threshold`` seconds of silence or there is no more audio input. The ending silence is not included. The ``timeout`` parameter is the maximum number of seconds that it will wait for a phrase to start before giving up and throwing a ``TimeoutException`` exception. If ``None``, it will wait indefinitely. """ assert isinstance(source, AudioSource), "Source must be an audio source" # record audio data as raw samples frames = collections.deque() assert self.pause_threshold >= self.quiet_duration >= 0 seconds_per_buffer = (source.CHUNK + 0.0) / source.RATE pause_buffer_count = int( math.ceil(self.pause_threshold / seconds_per_buffer) ) # number of buffers of quiet audio before the phrase is complete quiet_buffer_count = int( math.ceil(self.quiet_duration / seconds_per_buffer) ) # maximum number of buffers of quiet audio to retain before and after elapsed_time = 0 # store audio input until the phrase starts while True: elapsed_time += seconds_per_buffer if timeout and elapsed_time > timeout: # handle timeout if specified raise TimeoutError("listening timed out") buffer = source.stream.read(source.CHUNK) if len(buffer) == 0: break # reached end of the stream frames.append(buffer) # check if the audio input has stopped being quiet energy = audioop.rms( buffer, source.SAMPLE_WIDTH) # energy of the audio signal if energy > self.energy_threshold: break # dynamically adjust the energy threshold using assymmetric weighted average if self.dynamic_energy_threshold: damping = self.dynamic_energy_adjustment_damping**seconds_per_buffer # account for different chunk sizes and rates target_energy = energy * self.dynamic_energy_ratio self.energy_threshold = self.energy_threshold * damping + target_energy * ( 1 - damping) if len( frames ) > quiet_buffer_count: # ensure we only keep the needed amount of quiet buffers frames.popleft() # read audio input until the phrase ends pause_count = 0 while True: buffer = source.stream.read(source.CHUNK) if len(buffer) == 0: break # reached end of the stream frames.append(buffer) # check if the audio input has gone quiet for longer than the pause threshold energy = audioop.rms( buffer, source.SAMPLE_WIDTH) # energy of the audio signal if energy > self.energy_threshold: pause_count = 0 else: pause_count += 1 if pause_count > pause_buffer_count: # end of the phrase break # obtain frame data for i in range(quiet_buffer_count, pause_count): frames.pop() # remove extra quiet frames at the end frame_data = b"".join(list(frames)) return AudioData(source.RATE, self.samples_to_flac(source, frame_data))
def find_speech_regions(filename, frame_width=4096, silent_percentile=0.2, min_region_size=0.5, max_region_size=6, silent_frame_cut=2, percentile_interval=30): """ Perform voice activity detection on a given audio file. """ reader = wave.open(filename) sample_width = reader.getsampwidth() rate = reader.getframerate() n_channels = reader.getnchannels() chunk_duration = float(frame_width) / rate n_chunks = int(math.ceil(reader.getnframes() * 1.0 / frame_width)) energies = [] for _ in range(n_chunks): chunk = reader.readframes(frame_width) energies.append(audioop.rms(chunk, sample_width * n_channels)) new_eng = [] n_samples_30_sec = int(percentile_interval / chunk_duration) for i in range(len(energies)): vals = [] for j in range(-n_samples_30_sec // 2, n_samples_30_sec // 2): if i + j < 0: vals.append(energies[0]) elif i + j >= len(energies): vals.append(energies[len(energies) - 1]) else: vals.append(energies[i + j]) new_eng.append(percentile(vals, silent_percentile)) threshold = percentile(energies, silent_percentile) elapsed_time = 0 regions = [] region_start = None silent_frames = 0 first_silence = -1 i = 0 for energy in energies: is_silence = energy <= new_eng[i] i += 1 max_exceeded = region_start and elapsed_time - region_start >= max_region_size if is_silence: if silent_frames == 0: first_silence = elapsed_time silent_frames += 1 else: silent_frames = 0 if not is_silence or max_exceeded: first_silence = elapsed_time if (max_exceeded or silent_frames >= silent_frame_cut) and region_start: if elapsed_time - region_start >= min_region_size: regions.append((region_start - chunk_duration, first_silence + chunk_duration)) region_start = None elif (not region_start) and (not is_silence): region_start = elapsed_time silent_frames = 0 elapsed_time += chunk_duration return regions
data32 = data32.astype(np.float32, order='C') / 32768.0 # aubio #samples = np.fromstring(data32, dtype=aubio.float_type) samples = np.frombuffer(data32, dtype=aubio.float_type) pitch = pDetection(samples)[0] # Compute the energy (volume) of the # current frame. #volume = (np.sum(samples**2)/len(samples))*1000 # Format the volume output so that at most # it has six decimal numbers. #volume = "{:.6f}".format(volume) rms = audioop.rms(data, 2) # here's where you calculate the volume # difference between current volume and last volume diff = rms - lastRms color = "red" percentage = rms / 100 # checks if |diff| exceeds the difference_threshold if (abs(diff) >= DIFFERENCE_THRESHOLD): # sets the brightness value bri = int(BRI_MODIFIER * percentage) if (bri < 0): bri = 0
def listen(self, source, timeout=None): """ Records a single phrase from ``source`` (an ``AudioSource`` instance) into an ``AudioData`` instance, which it returns. This is done by waiting until the audio has an energy above ``recognizer_instance.energy_threshold`` (the user has started speaking), and then recording until it encounters ``recognizer_instance.pause_threshold`` seconds of non-speaking or there is no more audio input. The ending silence is not included. The ``timeout`` parameter is the maximum number of seconds that it will wait for a phrase to start before giving up and throwing an ``speech_recognition.WaitTimeoutError`` exception. If ``timeout`` is ``None``, it will wait indefinitely. """ assert isinstance(source, AudioSource), "Source must be an audio source" assert self.pause_threshold >= self.non_speaking_duration >= 0 seconds_per_buffer = (source.CHUNK + 0.0) / source.SAMPLE_RATE pause_buffer_count = int( math.ceil(self.pause_threshold / seconds_per_buffer) ) # number of buffers of non-speaking audio before the phrase is complete phrase_buffer_count = int( math.ceil(self.phrase_threshold / seconds_per_buffer) ) # minimum number of buffers of speaking audio before we consider the speaking audio a phrase non_speaking_buffer_count = int( math.ceil(self.non_speaking_duration / seconds_per_buffer) ) # maximum number of buffers of non-speaking audio to retain before and after # read audio input for phrases until there is a phrase that is long enough elapsed_time = 0 # number of seconds of audio read while True: frames = collections.deque() # store audio input until the phrase starts while True: elapsed_time += seconds_per_buffer if timeout and elapsed_time > timeout: # handle timeout if specified raise WaitTimeoutError("listening timed out") buffer = source.stream.read(source.CHUNK) if len(buffer) == 0: break # reached end of the stream frames.append(buffer) if len( frames ) > non_speaking_buffer_count: # ensure we only keep the needed amount of non-speaking buffers frames.popleft() # detect whether speaking has started on audio input energy = audioop.rms( buffer, source.SAMPLE_WIDTH) # energy of the audio signal if energy > self.energy_threshold: break # dynamically adjust the energy threshold using assymmetric weighted average if self.dynamic_energy_threshold: damping = self.dynamic_energy_adjustment_damping**seconds_per_buffer # account for different chunk sizes and rates target_energy = energy * self.dynamic_energy_ratio self.energy_threshold = self.energy_threshold * damping + target_energy * ( 1 - damping) # read audio input until the phrase ends pause_count, phrase_count = 0, 0 while True: elapsed_time += seconds_per_buffer buffer = source.stream.read(source.CHUNK) if len(buffer) == 0: break # reached end of the stream frames.append(buffer) phrase_count += 1 # check if speaking has stopped for longer than the pause threshold on the audio input energy = audioop.rms( buffer, source.SAMPLE_WIDTH) # energy of the audio signal if energy > self.energy_threshold: pause_count = 0 else: pause_count += 1 if pause_count > pause_buffer_count: # end of the phrase break # check how long the detected phrase is, and retry listening if the phrase is too short phrase_count -= pause_count if phrase_count >= phrase_buffer_count: break # phrase is long enough, stop listening # obtain frame data for i in range(pause_count - non_speaking_buffer_count): frames.pop() # remove extra non-speaking frames at the end frame_data = b"".join(list(frames)) return AudioData(frame_data, source.SAMPLE_RATE, source.SAMPLE_WIDTH, source.CHANNELS)
def monitorAudioStream(self): form_1 = pyaudio.paInt32 # 32-bit resolution numberOfSecondsToExtend = 3 #Number of seconds that the recording will continue if it hears a lound sound. chans = 1 # 1 channel samp_rate = 44100 # 44.1kHz sampling rate chunk = 4096 * 3 # 2^12 samples for buffer record_secs = 3 # seconds to record over a specific decibel rating. operating_system = platform.system() if (operating_system == "Darwin"): dev_index = 0 threshhold_to_start_recording = 61 threshhold_to_add_additional_time = 56 #If sound above this threshhold is detected, it will add more time to the stream. threshhold_of_amplifacation = 10 elif (operating_system == "Linux"): dev_index = 2 threshhold_to_start_recording = 28 threshhold_to_add_additional_time = 24 #If sound above this threshhold is detected, it will add more time to the stream. threshhold_of_amplifacation = 20 else: print("os is unknown") audio = pyaudio.PyAudio() # create pyaudio instantiation # create pyaudio stream stream = audio.open(format = form_1,rate = samp_rate,channels = chans, \ input_device_index = dev_index,input = True, \ frames_per_buffer=chunk) print("Monitoring") frames = [] chunkToRecord = 20 barkTriggered = False # loop through stream and append audio chunks to frame array while True: try: data = stream.read(chunk, exception_on_overflow=False) loudness = audioop.rms(data, 1) print(loudness) #First if that is triggered by loud noise. if ((loudness > threshhold_to_start_recording) and (barkTriggered == False)): frames.append(data) barkTriggered = True print("TRIGGERED") #else if that only is activated when another loud noise is detected. elif ((loudness > threshhold_to_add_additional_time) and (barkTriggered)): if (chunkToRecord < 20): print("adding time") chunkToRecord = chunkToRecord + 5 frames.append(data) if (chunkToRecord < 0): break elif (barkTriggered): if (barkTriggered == True): chunkToRecord = chunkToRecord - (1) if (chunkToRecord < 0): break print(chunkToRecord) except: stream.stop_stream() stream.start_stream() continue self.wav_output_filename = datetime.datetime.now().strftime( "%I:%M:%S:%p") + ".wav" print("finished recording") print(self.wav_output_filename) # stop the stream, close it, and terminate the pyaudio instantiation stream.stop_stream() stream.close() audio.terminate() # save the audio frames as .wav file wavefile = wave.open(self.wav_output_filename, 'wb') wavefile.setnchannels(chans) wavefile.setsampwidth(audio.get_sample_size(form_1)) wavefile.setframerate(samp_rate) wavefile.writeframes(b''.join(frames)) wavefile.close()
def run(self) -> None: if not self.light_id: logger.warn("No light identified, not starting Hue") return logger.debug("Starting Hue") max_peak = 3000 audio = None stopping = False while not stopping: try: while event := self.events.get(False): if isinstance(event, StartedPlaying): try: self.light_state = hue_response( requests.get( f"http://{self.host}/api/{self.username}/lights/{self.light_id}" )) logger.debug("Stored light state") except HueError as e: logger.warn( f"Error loading current light state: %s", e) self.active = True elif isinstance(event, StoppedPlaying): self.active = False original_brightness = self.light_state.get( "state", {}).get("bri") if original_brightness is not None: try: hue_response( requests.put( f"http://{self.host}/api/{self.username}/lights/{self.light_id}/state", json={"bri": original_brightness}, )) logger.info( "Restored %s to previous brightness", self.light) except HueError as e: logger.warn( f"Error restoring light brightness: %s", e) elif isinstance(event, Exit): stopping = True except queue.Empty: ... if stopping: break try: while sample := self.pcm_in.get(False): audio = sample except queue.Empty: ... if audio and self.active: rms = audioop.rms(audio.raw, audio.channels) peak = audioop.max(audio.raw, audio.channels) max_peak = max(peak, max_peak) brightness = int(peak / max_peak * 255) logger.debug(f"Brightness: {brightness}") requests.put( f"http://{self.host}/api/{self.username}/lights/{self.light_id}/state", json={ "bri": brightness, "transitiontime": 1 }, ) time.sleep(0.1) logger.info("Hue stopped")
def _snr(self, frames): rms = audioop.rms(b''.join(frames), int(self._input_bits/8)) if rms > 0 and self._threshold > 0: return 20.0 * math.log(rms/self._threshold, 10) else: return 0
def _in(): _config = DEFAULT_CONFIG.copy() seconds_per_buffer = _config.get("chunk") / _config.get("sample_rate") pause_buffer_count = math.ceil( _config.get("pause_threshold") / seconds_per_buffer) # Number of buffers of non-speaking audio during a phrase before the phrase should be considered complete. phrase_buffer_count = math.ceil( _config.get("phrase_threshold") / seconds_per_buffer ) # Minimum number of buffers of speaking audio before we consider the speaking audio a phrase. non_speaking_buffer_count = math.ceil( _config.get("non_speaking_duration") / seconds_per_buffer ) # Maximum number of buffers of non-speaking audio to retain before and after a phrase. stream = sounddevice.Stream(samplerate=_config.get("sample_rate"), channels=_config.get("channels"), dtype='int16') with stream: while not oa.core.finished.is_set(): elapsed_time = 0 # Number of seconds of audio read buf = b"" # An empty buffer means that the stream has ended and there is no data left to read. while not oa.core.finished.is_set(): frames = collections.deque() # Store audio input until the phrase starts while not oa.core.finished.is_set(): # Handle waiting too long for phrase by raising an exception elapsed_time += seconds_per_buffer if _config.get("timeout" ) and elapsed_time > _config.get("timeout"): raise Exception( "Listening timed out while waiting for phrase to start." ) buf = stream.read(_config.get("chunk"))[0] frames.append(buf) if len(frames) > non_speaking_buffer_count: # Ensure we only keep the required amount of non-speaking buffers. frames.popleft() # Detect whether speaking has started on audio input. energy = audioop.rms(buf, _config.get( "sample_width")) # Energy of the audio signal. if energy > _config.get("energy_threshold"): break # Dynamically adjust the energy threshold using asymmetric weighted average. if _config.get("dynamic_energy_threshold"): damping = _config.get( "dynamic_energy_adjustment_damping" )**seconds_per_buffer # Account for different chunk sizes and rates. target_energy = energy * _config.get( "dynamic_energy_ratio") _config["energy_threshold"] = _config.get( "energy_threshold") * damping + target_energy * ( 1 - damping) # Read audio input until the phrase ends. pause_count, phrase_count = 0, 0 phrase_start_time = elapsed_time while not oa.core.finished.is_set(): # Handle phrase being too long by cutting off the audio. elapsed_time += seconds_per_buffer if _config.get( "phrase_time_limit" ) and elapsed_time - phrase_start_time > _config.get( "phrase_time_limit"): break buf = stream.read(_config.get("chunk"))[0] frames.append(buf) phrase_count += 1 # Check if speaking has stopped for longer than the pause threshold on the audio input. energy = audioop.rms( buf, _config.get("sample_width") ) # unit energy of the audio signal within the buffer. if energy > _config.get("energy_threshold"): pause_count = 0 else: pause_count += 1 if pause_count > pause_buffer_count: # End of the phrase. break # Check how long the detected phrase is and retry listening if the phrase is too short. phrase_count -= pause_count # Exclude the buffers for the pause before the phrase. if phrase_count >= phrase_buffer_count or len(buf) == 0: break # Phrase is long enough or we've reached the end of the stream, so stop listening. # Obtain frame data. for _ in range(pause_count - non_speaking_buffer_count): frames.pop() # Remove extra non-speaking frames at the end. frame_data = numpy.concatenate(frames) yield frame_data
def wait_for_keyword(self, keyword=None): if not keyword: keyword = self._keyword frame_queue = queue.Queue() keyword_uttered = threading.Event() # FIXME: not configurable yet num_worker_threads = 2 for i in range(num_worker_threads): t = threading.Thread(target=self.check_for_keyword, args=(frame_queue, keyword_uttered, keyword)) t.daemon = True t.start() frames = collections.deque([], 30) recording = False recording_frames = [] self._logger.info("Waiting for keyword '%s'...", keyword) for frame in self._input_device.record(self._input_chunksize, self._input_bits, self._input_channels, self._input_rate): if keyword_uttered.is_set(): if self._logger.isEnabledFor(logging.DEBUG): self._logger.info("Keyword %s has been uttered", keyword) else: print(">> %r" % self._transcribed) return self._transcribed frames.append(frame) if not recording: snr = self._snr([frame]) if snr >= 10: # 10dB # Loudness is higher than normal, start recording and use # the last 10 frames to start self._logger.debug("Started recording on device '%s'", self._input_device.slug) self._logger.debug("Triggered on SNR of %sdB", snr) recording = True recording_frames = list(frames)[-10:] elif len(frames) >= frames.maxlen: # Threshold SNR not reached. Update threshold with # background noise. self._threshold = float(audioop.rms("".join(frames), 2)) else: # We're recording recording_frames.append(frame) if len(recording_frames) > 20: # If we recorded at least 20 frames, check if we're below # threshold again last_snr = self._snr(recording_frames[-10:]) self._logger.debug( "Recording's SNR dB: %f", last_snr) if last_snr <= 3 or len(recording_frames) >= 60: # The loudness of the sound is not at least as high as # the the threshold, or we've been waiting too long # we'll stop recording now recording = False self._logger.debug("Recorded %d frames", len(recording_frames)) frame_queue.put(tuple(recording_frames)) self._threshold = float( audioop.rms(b"".join(frames), 2))
def rms(self): return audioop.rms(self._data, self.sample_width)
def getScore(self, data): rms = audioop.rms(data, 2) score = rms / 3 return score
input=True, output=False, frames_per_buffer=CHUNK) audio_buffer = [] no_sounds = 0 def calc_mfcc(audio_buf): mfcc_list = [flatten for inner in audio_buf for flatten in inner] return make_mfcc.convert_center_mfcc(mfcc_list) while stream.is_active(): data = stream.read(CHUNK) rms = audioop.rms(data, 2) if rms > 60: sig = np.frombuffer(data, dtype="int16") ceps, mspec, spec = mfcc(sig) audio_buffer.append(ceps) else: no_sounds += 1 if no_sounds > 240: no_sounds = 0 if len(audio_buffer) == 80: c_mfcc = calc_mfcc(audio_buffer) y_pred = rforest_clf.predict([c_mfcc]) label = dataset[y_pred[0]]['label'] print label
def arduino_soundlight(p, device=0): # p = pyaudio.PyAudio() print "choosing device: " + str(device) + ': ' + pyaudio.PyAudio( ).get_device_info_by_index(device)['name'] chunk = 2**12 # Change if too fast/slow, never less than 2**11 scale = 10 # Change if too dim/bright exponent = 1 # Change if too little/too much difference between loud and quiet sounds #samplerate = 44100 samplerate = int(p.get_device_info_by_index(device)['defaultSampleRate']) print "samplerate: %d" % samplerate # CHANGE THIS TO CORRECT INPUT DEVICE # Enable stereo mixing in your sound card # to make you sound output an input # Use list_devices() to list all your input devices #device = 14 #'dmix' stream = p.open(format=pyaudio.paInt16, channels=1, rate=samplerate, input=True, frames_per_buffer=chunk, input_device_index=device) #print "Starting, use Ctrl+C to stop" dev = None try: dev = AMBX(0) for light in lights: try: dev.set_color_rgb8(light, [255, 255, 255]) except IOError: print 'USB Error' break bass_temp = 0 mid_temp = 0 treble_temp = 0 rms_temp = 0 max_rms = 0 while True: try: data = stream.read(chunk) except IOError: print 'Overflow' # Do FFT [bass, mid, treble] = calculate_levels(data, chunk, samplerate) # Get % volume rms = audioop.rms(data, 2) rms = rms_temp * 0.95 + 0.05 * rms # low pass filter #if max_rms < rms: # max_rms = rms #else: # max_rms = 0.99 * max_rms # decay the saved max volume over time # what kind of volumes come streamed in rms_min = 16 rms_max = 1400 scale = 5.0 + 20 * (rms - rms_min) / (rms_max - rms_min) # if there is no sound input, switch off lights if rms < 8: #print 'off: ', rms for light in lights: try: dev.set_color_rgb8(light, [0, 0, 0]) except IOError: print 'USB Error' else: #print scale, max_rms # nice levels bass = max( min( int( max(min(bass / scale, 1.0), 0.0)**exponent * 255 + decay * bass_temp), 255), 0) mid = max( min( int( max(min(mid / scale, 1.0), 0.0)**exponent * 255 + decay * mid_temp), 255), 0) treble = max( min( int( max(min(treble / scale, 1.0), 0.0)**exponent * 255 + decay * treble_temp), 255), 0) bass_temp = bass mid_temp = mid treble_temp = treble #print bass, mid, treble for light in lights: try: dev.set_color_rgb8(light, [bass, mid, treble]) except IOError: print 'USB Error' except IndexError: if dev is None: print 'No AmbX found!' except KeyboardInterrupt: pass finally: print "…Stop" stream.close() p.terminate() if dev is not None: for light in lights: dev.set_color_rgb8(light, [0, 0, 0])