def record(): pa = PyAudio() in_stream = pa.open(format=paInt16, channels=1, rate=SAMPLING_RATE, input=True, frames_per_buffer=BUFFER_SIZE) out_stream = pa.open(format=paInt16, channels=1, rate=SAMPLING_RATE,output=True) save_count = 0 save_buffer = [] save_data = [] save_count = SAVE_LENGTH print 'start recording' while save_count>0: string_audio_data = in_stream.read(BUFFER_SIZE) audio_data = np.fromstring(string_audio_data, dtype=np.short) print type(audio_data) save_buffer.append( string_audio_data ) save_data.append( audio_data ) save_count = save_count - 1 #print 'save %s' % (wav.fileName) #save_wave_file(wav.fileName, save_buffer) save_wave_file("test.wav", save_buffer) pa.terminate()
def record(self): pa = PyAudio() in_stream = pa.open(format=paInt16, channels=1, rate=SAMPLING_RATE, input=True, frames_per_buffer=BUFFER_SIZE) save_count = 0 save_buffer = [] save_count = SAVE_LENGTH print 'start recording' while save_count>0: string_audio_data = in_stream.read(BUFFER_SIZE) audio_data = np.fromstring(string_audio_data, dtype=np.short) save_buffer.append( string_audio_data ) save_count = save_count - 1 print 'save %s' % (self.fileName) pa.terminate() wf = wave.open(self.fileName, 'wb') wf.setnchannels(1) wf.setsampwidth(2) wf.setframerate(SAMPLING_RATE) wf.writeframes("".join(save_buffer)) wf.close() self.stringAudioData = "".join(save_buffer) save_data = np.fromstring(self.stringAudioData, dtype=np.short) self.audioData = save_data[10000:10000+4608*4] self.stringAudioData = self.audioData.tostring() self.cutAudio = self.audioData # self.cut2() self.getFeature()
def record(self, time): audio = PyAudio() stream = audio.open(input_device_index=self.device_index, output_device_index=self.device_index, format=self.format, channels=self.channel, rate=self.rate, input=True, frames_per_buffer=self.chunk ) print "Recording..." frames = [] for i in range(0, self.rate / self.chunk * time): data = stream.read(self.chunk) frames.append(data) stream.stop_stream() print "Recording Complete" stream.close() audio.terminate() write_frames = open_audio(self.file, 'wb') write_frames.setnchannels(self.channel) write_frames.setsampwidth(audio.get_sample_size(self.format)) write_frames.setframerate(self.rate) write_frames.writeframes(''.join(frames)) write_frames.close() self.convert()
def record(): pa = PyAudio() in_stream = pa.open(format=paInt16, channels=1, rate=SAMPLING_RATE, input=True, frames_per_buffer=NUM_SAMPLES) out_stream = pa.open(format=paInt16, channels=1, rate=SAMPLING_RATE,output=True) save_count = 0 save_buffer = [] save_data = [] save_count = SAVE_LENGTH while save_count>0: string_audio_data = in_stream.read(NUM_SAMPLES) audio_data = np.fromstring(string_audio_data, dtype=np.short) save_buffer.append( string_audio_data ) save_data.append( audio_data ) save_count = save_count - 1 print 'save to test.wav' save_wave_file("test.wav",save_buffer)
def sine_tone(frequencies, amplitudes, duration, volume=1.0, sample_rate=22050): n_samples = int(sample_rate * duration) restframes = n_samples % sample_rate p = PyAudio() stream = p.open(format=p.get_format_from_width(1), # 8bit channels=1, # mono rate=sample_rate, output=True) def s(t): r = 0 for i in range(0, len(frequencies)): r += volume * amplitudes[i] * math.sin(2 * math.pi * frequencies[i] * t / sample_rate) return r samples = (int(s(t) * 0x7f + 0x80) for t in range(n_samples)) for buf in zip(*[samples]*sample_rate): # write several samples at a time stream.write(bytes(bytearray(buf))) # fill remainder of frameset with silence stream.write(b'\x80' * restframes) stream.stop_stream() stream.close() p.terminate()
class MsgSender(): """ Designed for receive data stream. """ def __init__(self, RecorderConfig): self.config = RecorderConfig self.pPyAudioObj = PyAudio() self.pStream = None def handle(self): self.pStream = self.pPyAudioObj.open( format=self.config['format'], channels=self.config['channels'], rate=self.config['rate'], frames_per_buffer=self.config['frames_per_buffer'], input=True) pStream = self.pStream while True: try: data = pStream.read(self.config['bufferSize']) except Exception, e: print 'Cannot recognize read sound stream.\ Please check Network.client.MsgSender 1.' print e # lock g_dUserDict if Defines.verify.g_pLock.acquire(): UserDict = deepcopy(Defines.verify.g_dUserDict) Defines.verify.g_pLock.release() # print 'UserDict: %s' % str(UserDict) threading.Thread( target=send2GroupThreaded, args=(UserDict, data) ).start()
def Audio_play(filepath): ''' play audio ''' CHUNK = 1024 wf = wave.open(filepath, 'rb') pa = PyAudio() default_output = pa.get_default_host_api_info().get('defaultOutputDevice') stream =pa.open(format = pa.get_format_from_width(wf.getsampwidth()), channels = wf.getnchannels(), rate = wf.getframerate(), output = True, output_device_index = default_output) NUM = int(wf.getframerate()/CHUNK * 15) logging.info(">> START TO PLAY AUDIO") while NUM: data = wf.readframes(CHUNK) if data == " ": break stream.write(data) NUM -= 1 stream.stop_stream() stream.close() del data pa.terminate()
class pybeeptone: def __init__(self, rate=44100): self.rate = 44100 self.pyaudio = PyAudio() self.stream = self.pyaudio.open( format = self.pyaudio.get_format_from_width(1), channels = 1, rate = self.rate, output = True) def play_tone(self, freq=1000, duration=0.3): rate = self.rate length = int(math.ceil(self.rate*duration)) data = ''.join( [chr(int(math.sin(x/((rate/freq)/math.pi))*127+128)) for x in xrange(length)] ) self.stream.write(data) def play_rest(self, duration): rate = self.rate length = int(math.ceil(self.rate*duration)) data = ''.join( [chr(int(128)) for x in xrange(length)] ) self.stream.write(data) def close(self): self.stream.stop_stream() self.stream.close() self.pyaudio.terminate()
def __init__(self): super(VCGame, self).__init__(255, 255, 255, 255, 800, 600) # 初始化参数 # frames_per_buffer self.numSamples = 1000 # 声控条 self.vbar = Sprite('black.png') self.vbar.position = 20, 450 self.vbar.scale_y = 0.1 self.vbar.image_anchor = 0, 0 self.add(self.vbar) # 皮卡丘类 self.pikachu = Pikachu() self.add(self.pikachu) # cocosnode精灵类 self.floor = cocos.cocosnode.CocosNode() self.add(self.floor) position = 0, 100 for i in range(120): b = Block(position) self.floor.add(b) position = b.x + b.width, b.height # 声音输入 audio = PyAudio() SampleRate = int(audio.get_device_info_by_index(0)['defaultSampleRate']) self.stream = audio.open(format=paInt16, channels=1, rate=SampleRate, input=True, frames_per_buffer=self.numSamples) self.schedule(self.update)
def main(): # read in some block data from pyaudio RATE=44100 INPUT_BLOCK_TIME=0.2 INPUT_FRAMES_PER_BLOCK=int(RATE*INPUT_BLOCK_TIME) pa=PyAudio() data=True fmt="%dh"%INPUT_FRAMES_PER_BLOCK total_rms=0 total_blocks=0 while data: for dr,subdr,fnames in os.walk(path): for filename in fnames: try: print filename wf=wave.open("%s/%s"%(path,filename),'rb') strm=pa.open(format=pa.get_format_from_width(wf.getsampwidth()), channels=wf.getnchannels(), rate=wf.getframerate(), input=True) strm.stop_stream() strm.close() d=wf.readframes(INPUT_FRAMES_PER_BLOCK) d=struct.unpack(fmt,d) wf.close() total_rms+=calc_rms(d) total_blocks+=1 except: #print e print "*** ERROR ***" data=False avg=total_rms/total_blocks print "The average is %f"%avg
def play(): wavName = 'test.wav' print "play %s" % (wavName) wf = wave.open(wavName, 'rb') pa = PyAudio() stream = pa.open(format=pa.get_format_from_width(wf.getsampwidth()), channels=wf.getnchannels(), rate=wf.getframerate(), output=True) data = wf.readframes(CHUNK) td = threading.Thread(target=startGame) td.start() while data != '': stream.write(data) data = wf.readframes(CHUNK) audio_data = np.fromstring(data, dtype=np.short) print data stream.stop_stream() stream.close() pa.terminate()
def __init__(self): self.THRESHOLD = 200 self.CHUNK_SIZE = 1024 self.RATE = 22100 p = PyAudio() self.stream = p.open(format=paInt16, channels=1, rate=self.RATE, input=True, output=True, frames_per_buffer=self.CHUNK_SIZE)
def record(self): #open the input of wave pa = PyAudio() stream = pa.open(format = paInt16, channels = 1, rate = self.getRate(pa), input = True, frames_per_buffer = self.NUM_SAMPLES) save_buffer = [] record_start = False record_end = False no_record_times = 0 while 1: #read NUM_SAMPLES sampling data string_audio_data = stream.read(self.NUM_SAMPLES) if record_start == True :save_buffer.append(string_audio_data) print max(array('h', string_audio_data)) if max(array('h', string_audio_data)) >5000: record_start = True no_record_times = 0 else: no_record_times += 1 if record_start == False:continue if no_record_times >10: break stream.close() pa.terminate() return save_buffer
def rec_audio(stat,filename,queue): NUM_SAMPLES = 200 SAMPLING_RATE = 8000 pa = PyAudio() stream = pa.open(format=paInt16, channels=1, rate=SAMPLING_RATE, input=True, frames_per_buffer=NUM_SAMPLES) save_count = 0 save_buffer = [] while True: signal=queue.get() if(signal=="audio_start"): break time_start=clock() while True: string_audio_data = stream.read(NUM_SAMPLES) save_buffer.append( string_audio_data ) if(stat.value==1): break time_finish=clock() wf = wave.open("./temp_frame/"+filename+".wav", 'wb') wf.setnchannels(1) wf.setsampwidth(2) wf.setframerate(SAMPLING_RATE) wf.writeframes("".join(save_buffer)) wf.close() save_buffer = [] print("audio_start: "+str(time_start)) print("audio_end: "+str(time_finish)) print("audio_duration (sec): "+str(time_finish-time_start)) #duration (second) print ("audio_file: ", filename, "saved" ) queue.put("wav_sav_ok")
def sine_tone(frequency, duration, volume=1, sample_rate=22050): n_samples = int(sample_rate * duration) restframes = n_samples % sample_rate p = PyAudio() stream = p.open(format=p.get_format_from_width(2), # 16 bit channels=2, rate=sample_rate, output=True) for i in xrange(0, 10): if i % 2 == 0: frequency = ZERO_FREQUENCY else: frequency = ONE_FREQUENCY s = lambda t: volume * math.sin(2 * math.pi * frequency * t / sample_rate) samples = (int(s(t) * 0x7f + 0x80) for t in xrange(n_samples)) for buf in izip(*[samples]*sample_rate): # write several samples at a time stream.write(bytes(bytearray(buf))) # fill remainder of frameset with silence stream.write(b'\x80' * restframes) stream.stop_stream() stream.close() p.terminate()
class Stream(Thread): def __init__(self, f, on_terminated): self.__active = True self.__path = f self.__paused = True self.on_terminated = on_terminated self.__position = 0 self.__chunks = [] self.__pyaudio = PyAudio() Thread.__init__(self) self.start() def play(self): self.__paused = False def seek(self, seconds): self.__position = int(seconds * 10) def is_playing(self): return self.__active and not self.__paused def get_position(self): return int(self.__position / 10) def get_duration(self): return int(len(self.__chunks) / 10) def pause(self): self.__paused = True def kill(self): self.__active = False def __get_stream(self): self.__segment = AudioSegment.from_file(self.__path) self.__chunks = make_chunks(self.__segment, 100) return self.__pyaudio.open(format=self.__pyaudio.get_format_from_width(self.__segment.sample_width), channels=self.__segment.channels, rate=self.__segment.frame_rate, output=True) def run(self): stream = self.__get_stream() while self.__position < len(self.__chunks): if not self.__active: break if not self.__paused: # noinspection PyProtectedMember data = self.__chunks[self.__position]._data self.__position += 1 else: free = stream.get_write_available() data = chr(0) * free stream.write(data) stream.stop_stream() self.__pyaudio.terminate() if self.__active: self.on_terminated()
def worker(): p = PyAudio() stream = p.open(format=p.get_format_from_width(2), channels=1, rate=44100, output=True) while True: self.lock.acquire() stream.write(self.wavdata.tostring()) self.lock.release()
def Audio_record_play(seconds,play,filename): ''' This function include record and play, if you want to play and record, please set the play is True. The sample rate is 44100 Bit:16 ''' CHUNK = 1024 CHANNELS = 2 SAMPLING_RATE = 44100 FORMAT = paInt16 NUM = int(SAMPLING_RATE/CHUNK * seconds) save_buffer = [] if play is True: source_file = autohandle_directory + '/audio_lib/'+'source1.wav' swf = wave.open(source_file, 'rb') #open audio stream pa = PyAudio() default_input = pa.get_default_host_api_info().get('defaultInputDevice') stream = pa.open( format = FORMAT, channels = CHANNELS, rate = SAMPLING_RATE, input = True, output = play, frames_per_buffer = CHUNK, input_device_index = default_input ) logging.info(">> START TO RECORD AUDIO") while NUM: save_buffer.append(stream.read(CHUNK)) NUM -= 1 if play is True: data = swf.readframes(CHUNK) stream.write(data) if data == " ": break #close stream stream.stop_stream() stream.close() pa.terminate() # save wav file def save_wave_file(filename,data): wf_save = wave.open(filename, 'wb') wf_save.setnchannels(CHANNELS) wf_save.setsampwidth(pa.get_sample_size(FORMAT)) wf_save.setframerate(SAMPLING_RATE) wf_save.writeframes("".join(data)) wf_save.close() save_wave_file(filename, save_buffer) del save_buffer[:]
def playWaveData(self, waveData): p = PyAudio() stream = p.open(format = p.get_format_from_width(1), channels = 1, rate = self.bitRate, output = True) stream.write(waveData) stream.stop_stream() stream.close() p.terminate()
def record_wave(self): #open the input of wave pa = PyAudio() stream = pa.open(format = paInt16, channels = 1, rate = self.framerate, input = True, frames_per_buffer = self.NUM_SAMPLES) save_buffer = [] count = 0 while count < self.TIME*5: string_audio_data = stream.read(self.NUM_SAMPLES) audio_data = np.fromstring(string_audio_data, dtype=np.short) # 得到audio_data中大约level的数据个数 large_sample_count = np.sum( audio_data > self.LEVEL ) #print large_sample_count #print 'mute_begin' + str(self.mute_begin) #print 'mute_end' + str(self.mute_end) #未开始计时,出现静音 # 如果一帧数据数据的有效数据小于mute_count_limit,则认为是静音 if large_sample_count < self.mute_count_limit : # 初始化静音计数 self.mute_begin=1 else: # 如果有声音出现 save_buffer.append(string_audio_data) # 静音标记为否 self.mute_begin=0 # 静音时长为0 self.mute_end=1 count += 1 # 如果静音时长大于5 if (self.mute_end - self.mute_begin) > 3: # 还原变量 self.mute_begin=0 # 还原变量 self.mute_end=1 # 结束本次从声卡取值,本次录音结束 break # 如果是静音,那么自增静音时长mute_end if self.mute_begin: self.mute_end+=1 save_buffer=save_buffer[:] if save_buffer: if self.file_name_index < 11: pass else: self.file_name_index = 1 filename = str(self.file_name_index)+'.wav' self.save_wave_file(filename=filename, data=save_buffer) self.writeQ(queue=self.wav_queue, data=filename) self.file_name_index+=1 save_buffer = [] #在嵌入式设备上必须加这一句,否则只能录音一次,下次录音时提示stram overflow 错误。 stream.close()
def play(self): print "play %s" % (self.fileName) pa = PyAudio() stream = pa.open(format=paInt16, channels=1, rate=SAMPLING_RATE, output=True, frames_per_buffer=BUFFER_SIZE) stream.write(self.stringAudioData) # stream.write(self.cutAudio) stream.stop_stream() stream.close() pa.terminate()
def Record(self): global CHANNELS # 开启声音输入 pa = PyAudio() stream = pa.open(format=paInt16, channels=CHANNELS, rate=self.sampling_rate, input=True, frames_per_buffer=self.cacheblock_size) save_count = 0 # 已经保存的样本块 silence_count = 0 # 持续无声音的样本块 save_buffer = [] # 音频缓冲 try: print "start recording" while True: # 录音、取样 string_audio_data = stream.read(self.cacheblock_size) # 将读入的数据转换为数组 audio_data = np.fromstring(string_audio_data, dtype=np.short) # 样本值大于LEVEL的取样为成功取样,计算成功取样的样本的个数 large_sample_count = np.sum(audio_data > self.level) print "Peak:",np.max(audio_data)," Sum:",large_sample_count # 如果成功取样数大于SAMPLING_NUM,则当前数据块取样都成功 if large_sample_count > self.sampling_num: # 有成功取样的数据块时,样本计数+1 save_count += 1 else: # 有成功录取的块后,若取样失败,此时可能处于静音状态,静音计数+1 if(save_count > 0): silence_count += 1 # 取样失败次数是否超过最大值 if (save_count <= self.max_save_length) and (silence_count <= self.max_silence_length): # 将要保存的数据存放到save_buffer中 save_buffer.append(string_audio_data) else: # 将save_buffer中的数据写入WAV文件,WAV文件的文件名是保存的时刻 if len(save_buffer) > 0: self.filename = datetime.now().strftime("%Y-%m-%d_%H_%M_%S") + ".wav" self.__Save_wave_file(self.filename, save_buffer) save_buffer = [] print self.filename, "saved" break except KeyboardInterrupt: print "manual exit" finally: # stop stream stream.stop_stream() stream.close() # close PyAudio pa.terminate() print "exit recording" return self.filename
def main(): # 开启声音输入 pa = PyAudio() stream = pa.open(format=paInt16, channels=1, rate=SAMPLING_RATE, input=True, frames_per_buffer=NUM_SAMPLES); time = np.arange(0, NUM_SAMPLES) * (1.0/SAMPLING_RATE); delta = 0 while True: print "delta = ", delta delta += NUM_SAMPLES * (1.0/SAMPLING_RATE) # 读入NUM_SAMPLES个取样 string_audio_data = stream.read(NUM_SAMPLES) # 将读入的数据转换为数组 audio = np.fromstring(string_audio_data, dtype=np.short) curr = [] for i in range(1, len(time)): dy = audio[i] - audio[i-1] if (abs(dy) > THRESHOLD): # print abs(dy) curr.append(delta + time[i]) blk_sum = 0 blk_num = 0 for i in range(1, len(curr)): if (curr[i] - curr[i-1] < MAXDELTA): blk_sum += curr[i] blk_num += 1 else: if (blk_num > 60): print "blk::sum, num = ", blk_sum/blk_num, blk_num push(blk_sum/blk_num) blk_sum = 0 blk_num = 0 if (blk_num > 60): print "blk::sum, num = ", blk_sum/blk_num, blk_num push(blk_sum/blk_num) cnt = 0 for i in range(1, len(event)): if (event[i] - event[i-1] < 0.4): cnt += 1 else: cnt = 0 if (cnt >= 2): thread.start_new_thread(play, ()) print "-- shuia" del event[0:i+1] break print event
def recode(self): pa = PyAudio() stream = pa.open(format=paInt16, channels=self.nchannel, rate=self.SAMPLING_RATE, input=True, frames_per_buffer=self.NUM_SAMPLES) save_count = 0 save_buffer = [] time_out = self.TIME_OUT NO_WORDS=self.NO_WORDS while True and NO_WORDS: time_out -= 1 print 'time_out in', time_out # 读入NUM_SAMPLES个取样 string_audio_data = stream.read(self.NUM_SAMPLES) # 将读入的数据转换为数组 audio_data = np.fromstring(string_audio_data, dtype=np.short) # 查看是否没有语音输入 NO_WORDS -= 1 if np.max(audio_data) > self.UPPER_LEVEL: NO_WORDS=self.NO_WORDS print 'self.NO_WORDS ', NO_WORDS print 'np.max(audio_data) ', np.max(audio_data) # 计算大于LOWER_LEVEL的取样的个数 large_sample_count = np.sum( audio_data > self.LOWER_LEVEL ) # 如果个数大于COUNT_NUM,则至少保存SAVE_LENGTH个块 if large_sample_count > self.COUNT_NUM: save_count = self.SAVE_LENGTH else: save_count -= 1 #print 'save_count',save_count # 将要保存的数据存放到save_buffer中 if save_count < 0: save_count = 0 elif save_count > 0 : save_buffer.append( string_audio_data ) else: pass # 将save_buffer中的数据写入WAV文件,WAV文件的文件名是保存的时刻 if len(save_buffer) > 0 and NO_WORDS==0: self.Voice_String = save_buffer save_buffer = [] rospy.loginfo( "Recode a piece of voice successfully!") #return self.Voice_String elif len(save_buffer) > 0 and time_out==0: self.Voice_String = save_buffer save_buffer = [] rospy.loginfo( "Recode a piece of voice successfully!") #return self.Voice_String else: pass
def pitch_tracking(): pitches = []; pa = PyAudio() in_stream = pa.open(format=paInt16, channels=1, rate=SAMPLING_RATE, input=True, frames_per_buffer=BUFFER_SIZE) out_stream = pa.open(format=paInt16, channels=1, rate=SAMPLING_RATE,output=True) while True: string_data = in_stream.read(BUFFER_SIZE) audio_data = np.fromstring(string_data, dtype=np.short) xs = audio_data[:BUFFER_SIZE] print xs xf = np.fft.rfft(xs)/BUFFER_SIZE freqs = np.linspace(0, SAMPLING_RATE/2, BUFFER_SIZE/2+1) xfp = 20*np.log10(np.clip(np.abs(xf), 1e-20, 1e100)) idx = np.argmax(xfp) pitches.append(idx) print freqs[idx]
def record_wav(self,save_file_name): pa = PyAudio() stream = pa.open(format = paInt16 ,channels = 1 ,rate = self.framerate ,input = True ,frames_per_buffer = self.NUM_SAMPLES) buf = [] while self.isRecording: audio_data = stream.read(self.NUM_SAMPLES) buf.append(audio_data) self.save_wav_file(save_file_name, buf) stream.close()
def openWav(self): chunk = 1024 wf = wave.open(r"result.wav", 'rb') p = PyAudio() stream = p.open(format = p.get_format_from_width(wf.getsampwidth()), channels = wf.getnchannels(), rate = wf.getframerate(), output = True) while True: data = wf.readframes(chunk) if data == "":break stream.write(data) stream.close() p.terminate()
class AudioStream(object): def __init__(self, sample_rate=44100, channels=1, width=2, chunk=1024, input_device_index=None): self.sample_rate = sample_rate self.channels = channels self.width = width self.chunk = chunk self.input_device_index = input_device_index def __enter__(self): self._pa = PyAudio() if self.input_device_index is None: self.input_device_index = \ self._pa.get_default_input_device_info()['index'] self._stream = self._pa.open( format=self._pa.get_format_from_width(self.width), channels=self.channels, rate=self.sample_rate, input=True, frames_per_buffer=self.chunk, input_device_index=self.input_device_index) self._stream.start_stream() return self def read(self): ''' On a buffer overflow this returns 0 bytes. ''' try: return self._stream.read(self.chunk) except IOError: return '' except AttributeError: raise Exception('Must be used as a context manager.') def stream(self): try: while True: bytes = self.read() if bytes: self.handle(bytes) except (KeyboardInterrupt, SystemExit): pass def __exit__(self, type, value, traceback): self._stream.stop_stream() self._stream.close() self._pa.terminate() def handle(self, bytes): pass
def throw_process_loop(self, q: Queue): """ A sound loop. """ import sounddevice p = PyAudio() stream = p.open( format=self.formatting, channels=self.channels, rate=self.rate, output=True ) while q.empty(): stream.write(self.data) p.terminate() stream.close()
def play(wave_data): chunk_size = BITRATE/10 p = PyAudio() stream = p.open(format = p.get_format_from_width(1), channels = 1, rate = BITRATE, output = True) for chunk in itertools.islice(wave_data, chunk_size): stream.write(chunk) stream.stop_stream() stream.close() p.terminate()
def __init__(self): super(VCGame, self).__init__(255, 255, 255, 255, 800, 600) # 初始化参数 # frames_per_buffer self.numSamples = 1000 # 声控条 self.vbar = Sprite('black.png') self.vbar.position = 20, 450 self.vbar.scale_y = 0.1 self.vbar.image_anchor = 0, 0 self.add(self.vbar) # 皮卡丘类 self.pikachu = Pikachu() self.add(self.pikachu) # cocosnode精灵类 self.floor = cocos.cocosnode.CocosNode() self.add(self.floor) position = 0, 100 for i in range(120): b = Block(position) self.floor.add(b) position = b.x + b.width, b.height # 声音输入 audio = PyAudio() SampleRate = int( audio.get_device_info_by_index(0)['defaultSampleRate']) self.stream = audio.open(format=paInt16, channels=1, rate=SampleRate, input=True, frames_per_buffer=self.numSamples) self.schedule(self.update)
class Player(QThread): """ 播放器 """ def __init__(self): """ 初始化播放器 """ QThread.__init__(self) self.__audio_segment = None # type: AudioSegment self.__start_position = 0 # 开始播放位值 self.audio = PyAudio() # type: PyAudio self.exception_list = [] # type: list self.chunks = [] # 要播放的块 self.loaded_length = 0 # 已经载入的长度 self.chunk_duration = 50 / 1000.0 # 块大小,按照时间长度截取,即 0.05 秒每块 self.time = 0 # 当前播放时间 self.volume = 100 # 播放音量 self.is_playing = False # 是否在播放 self.is_paused = False # 是否已经暂停 def __del__(self): self.stop() if not self.wait(500): self.terminate() self.wait() @property def has_exception(self): """ :return: 判断是否存在异常 """ return len(self.exception_list) > 0 @property def audio_segment(self): """ :return: 音频剪辑对象 """ return self.__audio_segment @audio_segment.setter def audio_segment(self, value): """ 设置音频剪辑对象,同时从剪辑中读取 chunks 数据,替换正在播放的 chunks 在缓存进行过程中,每次发出缓存消息,写入文件之后,都要更新正在播放的 chunks 必须载入最新缓存的数据再能持续不断的向后播放 :param value: 音频剪辑对象 """ self.__audio_segment = value self.setup_chunks_for_cache() @property def start_position(self): """ :return: 开始播放音乐的位值 """ return self.__start_position @start_position.setter def start_position(self, value): """ 更新开始播放音乐的位值,也就是更新正在播放的 chunks 数据 :param value: 开始播放音乐的位值 """ self.__start_position = value self.setup_chunks_for_start() @property def is_valid(self): """ :return: 音频剪辑对象是否已经设置,有效 """ return self.audio_segment is not None @property def duration(self): """ 音乐的总时间长度 :return: 时间长度 """ if not self.is_valid: return 0 return self.audio_segment.duration_seconds @property def current_time(self): """ :return: 当前播放时间 """ if not self.is_playing: return 0 return self.time def rms_amplitude(self, time, sample_dur): """ 音频振幅 :param float time: 时间 :param sample_dur: 采样时间 :return: """ if not self.is_valid: return None return self.audio_segment[time * 1000.0:(time + sample_dur) * 1000.0].rms def setup_chunks_for_cache(self): """ 从文件缓冲中载入要播放的 chunks 由于缓存文件在不停的变化,因此要记录下累计从缓存中载入了多少数据 下次缓存消息发出的时候,从已经载入的数据位置开始继续载入 :return: 缓存载入的状态 """ if not self.is_valid: return False start = self.loaded_length length = self.duration - self.loaded_length self.loaded_length += length # 创建要播放的 chunk play_chunk = self.audio_segment[start * 1000.0: (start+length) * 1000.0] - \ (60 - (60 * (self.volume / 100.0))) self.chunks += make_chunks(play_chunk, self.chunk_duration * 1000) return True def setup_chunks_for_start(self): """ 从指定的点开始播放,设置要播放的 chunks 一般是在用户拉动播放进度条的时候发生,需要整体重新改写所有要播放的 chunks 会与缓存载入发生冲突,即数据尚未载入,但从 song_info 参数上看,音乐的长度数据是已经存在的。 :return: """ if not self.is_valid: return False length = self.duration - self.start_position # 创建要播放的 chunk play_chunk = self.audio_segment[self.start_position * 1000.0: (self.start_position + length) * 1000.0] - \ (60 - (60 * (self.volume / 100.0))) self.chunks = make_chunks(play_chunk, self.chunk_duration * 1000) return True def play(self): """ 从 self.start_position 时间开始播放音乐 """ if not self.is_valid: return False self.is_playing = True self.emit(SIGNAL('before_play()')) # 发出准备播放的消息 self.time = self.start_position audio_stream = self.audio.open(format=self.audio.get_format_from_width( self.audio_segment.sample_width), channels=self.audio_segment.channels, rate=self.audio_segment.frame_rate, output=True) index = 0 # for chunk in self.chunks: while True: if not self.is_playing: # 停止播放,退出播放循环 break while self.is_paused: # 暂停播放,进入阻塞循环 sleep(0.5) continue if self.time >= self.duration: # 播放完毕,退出播放循环 self.emit(SIGNAL('play_finished()')) # 发出播放完毕的消息 break self.time += self.chunk_duration if index < len(self.chunks): # 将 chunk 数据写入声卡,播放 audio_stream.write(self.chunks[index].raw_data) index += 1 # 线程中的消息会有退出异常的现象 # 继续等待即可,直到线程退出 try: self.emit(SIGNAL('playing()')) # 发出正在播放的消息 except Exception as e: continue audio_stream.close() self.is_playing = False self.emit(SIGNAL('stopped()')) # 发出停止播放的消息 def stop(self): """ 停止播放,进行线程等待结束 """ self.is_playing = False self.start_position = 0 def pause(self): """ 停止播放 """ self.is_paused = True self.emit(SIGNAL('pause()')) # 发出播放暂停的消息 def proceed(self): """ 继续播放,即退出暂停状态 """ self.is_paused = False self.emit(SIGNAL('proceed()')) # 发出恢复播放的消息 def run(self, *args, **kwargs): """ 开始播放 """ self.play()
# Called for every client connecting (after handshake) def new_client(client, server): print("New client connected and was given id %d" % client['id']) # Called for every client disconnecting def client_left(client, server): print("Client(%d) disconnected" % client['id']) import base64 p = PyAudio() stream = p.open(format=paInt16, channels=1, rate=8000, output=True, output_device_index=1) # Called when a client sends a message def message_received(client, server, message): print("Client(%d) said: %s" % (client['id'], message[:5])) stream.write(base64.b64decode(message)) PORT = 9001 server = WebsocketServer(PORT, "0.0.0.0") server.set_fn_new_client(new_client) server.set_fn_client_left(client_left) server.set_fn_message_received(message_received)
rand_counter = np.mod(i*reps+ii,len_rand_seqs) random_time_ind = rand_time_arr[rand_counter] random_grain = rand_grain_arr[rand_counter] random_channel = rand_ch_arr[rand_counter] random_pitch = rand_pitch_arr[rand_counter] grain_sample[random_time_ind:random_time_ind+GRAIN_SIZE,random_channel]+=grain_mat[:,random_grain,random_pitch] out_data=struct.pack('%sf' % (2*WINDOW_SIZE),*grain_sample[sample_counter*WINDOW_SIZE:(sample_counter+1)*WINDOW_SIZE,:].reshape(2*WINDOW_SIZE)) i+=1 return (out_data, paContinue) # open stream using callback (3) stream = p.open(format=paFloat32, channels=2, rate=fs, output = True, frames_per_buffer=WINDOW_SIZE, stream_callback=callback) # start the stream stream.start_stream() while(1): pass stream.stop_stream() stream.close() # close PyAudio p.terminate()
class AudioAnalyzer(Thread): """ This AudioAnalyzer reads the microphone and finds the frequency of the loudest tone. To use it, you also need the ProtectedList class from the file threading_helper.py. You need to created an instance of the ProtectedList, which acts as a queue, and you have to pass this queue to the AudioAnalyzer. Then you can read the values from the queue: queue = ProtectedList() analyzer = AudioAnalyzer(queue) analyzer.start() while True: freq = queue.get() print("loudest frequency:", q_data, "nearest note:", a.frequency_to_note_name(q_data, 440)) time.sleep(0.02) """ # settings: (are tuned for best detecting string instruments like guitar) SAMPLING_RATE = 48000 # mac hardware: 44100, 48000, 96000 CHUNK_SIZE = 1024 # number of samples BUFFER_TIMES = 50 # buffer length = CHUNK_SIZE * BUFFER_TIMES ZERO_PADDING = 3 # times the buffer length NUM_HPS = 3 # Harmonic Product Spectrum # overall frequency accuracy (step-size): SAMPLING_RATE / (CHUNK_SIZE * BUFFER_TIMES * (1 + ZERO_PADDING)) Hz # buffer length in seconds: (CHUNK_SIZE * BUFFER_TIMES) / SAMPLING_RATE sec NOTE_NAMES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'] def __init__(self, queue, *args, **kwargs): Thread.__init__(self, *args, **kwargs) self.queue = queue # queue should be instance of ProtectedList (threading_helper.ProtectedList) self.buffer = np.zeros(self.CHUNK_SIZE * self.BUFFER_TIMES) self.hanning_window = np.hanning(len(self.buffer)) self.running = False try: self.audio_object = PyAudio() self.stream = self.audio_object.open(format=paInt16, channels=1, rate=self.SAMPLING_RATE, input=True, output=False, frames_per_buffer=self.CHUNK_SIZE) except Exception as e: sys.stderr.write('Error: Line {} {} {}\n'.format(sys.exc_info()[-1].tb_lineno, type(e).__name__, e)) return @staticmethod def frequency_to_number(freq, a4_freq): """ converts a frequency to a note number (for example: A4 is 69)""" if freq == 0: sys.stderr.write("Error: No frequency data. Program has potentially no access to microphone\n") return 0 return 12 * np.log2(freq / a4_freq) + 69 @staticmethod def number_to_frequency(number, a4_freq): """ converts a note number (A4 is 69) back to a frequency """ return a4_freq * 2.0**((number - 69) / 12.0) @staticmethod def number_to_note_name(number): """ converts a note number to a note name (for example: 69 returns 'A', 70 returns 'A#', ... ) """ return AudioAnalyzer.NOTE_NAMES[int(round(number) % 12)] @staticmethod def frequency_to_note_name(frequency, a4_freq): """ converts frequency to note name (for example: 440 returns 'A') """ number = AudioAnalyzer.frequency_to_number(frequency, a4_freq) note_name = AudioAnalyzer.number_to_note_name(number) return note_name def run(self): """ Main function where the microphone buffer gets read and the fourier transformation gets applied """ self.running = True while self.running: try: # read microphone data data = self.stream.read(self.CHUNK_SIZE, exception_on_overflow=False) data = np.frombuffer(data, dtype=np.int16) # append data to audio buffer self.buffer[:-self.CHUNK_SIZE] = self.buffer[self.CHUNK_SIZE:] self.buffer[-self.CHUNK_SIZE:] = data # apply the fourier transformation on the whole buffer (with zero-padding + hanning window) magnitude_data = abs(np.fft.fft(np.pad(self.buffer * self.hanning_window, (0, len(self.buffer) * self.ZERO_PADDING), "constant"))) # only use the first half of the fft output data magnitude_data = magnitude_data[:int(len(magnitude_data) / 2)] # HPS: multiply data by itself with different scalings (Harmonic Product Spectrum) magnitude_data_orig = copy.deepcopy(magnitude_data) for i in range(2, self.NUM_HPS+1, 1): hps_len = int(np.ceil(len(magnitude_data) / i)) magnitude_data[:hps_len] *= magnitude_data_orig[::i] # multiply every i element # get the corresponding frequency array frequencies = np.fft.fftfreq(int((len(magnitude_data) * 2) / 1), 1. / self.SAMPLING_RATE) # set magnitude of all frequencies below 60Hz to zero for i, freq in enumerate(frequencies): if freq > 60: magnitude_data[:i - 1] = 0 break # put the frequency of the loudest tone into the queue self.queue.put(round(frequencies[np.argmax(magnitude_data)], 2)) except Exception as e: sys.stderr.write('Error: Line {} {} {}\n'.format(sys.exc_info()[-1].tb_lineno, type(e).__name__, e)) self.stream.stop_stream() self.stream.close() self.audio_object.terminate()
Fs = 11026 # 取样频率 pt_step = math.floor(Fs / 2) delta_time = 2.5 avrg_time = 10 last_time = 10 alarm_cnt = 0 NUM_SAMPLES = int(Fs * delta_time) # pyAudio内部缓存的块的大小 NUM_TEST = 4 * NUM_SAMPLES # 开启声音输入 pa = PyAudio() stream = pa.open(format=paInt16, channels=1, rate=Fs, input=True, frames_per_buffer=NUM_TEST) save_count = 0 save_buffer = [] eng_step = [] avrg_eng_step = [] alarm_all = [] string_audio_data = stream.read(NUM_TEST) y1 = np.fromstring(string_audio_data, dtype=np.short) ny = len(y1) N_step = int(math.floor(ny / pt_step))
class PreciseRunner(object): """ Wrapper to use Precise. Example: >>> def on_act(): ... print('Activation!') ... >>> p = PreciseRunner(PreciseEngine('./precise-engine'), on_activation=on_act) >>> p.start() >>> from time import sleep; sleep(10) >>> p.stop() Args: engine (Engine): Object containing info on the binary engine trigger_level (int): Number of chunk activations needed to trigger on_activation Higher values add latency but reduce false positives sensitivity (float): From 0.0 to 1.0, relates to the network output level required to consider a chunk "active" stream (BinaryIO): Binary audio stream to read 16000 Hz 1 channel int16 audio from. If not given, the microphone is used on_prediction (Callable): callback for every new prediction on_activation (Callable): callback for when the wake word is heard """ def __init__(self, engine, trigger_level=3, sensitivity=0.5, stream=None, on_prediction=lambda x: None, on_activation=lambda: None): self.engine = engine self.trigger_level = trigger_level self.sensitivity = sensitivity self.stream = stream self.on_prediction = on_prediction self.on_activation = on_activation self.chunk_size = engine.chunk_size self.read_divisor = 1 self.pa = None self.thread = None self.running = False self.is_paused = False self.detector = TriggerDetector(self.chunk_size, sensitivity, trigger_level) atexit.register(self.stop) def _calc_read_divisor(self): """ pyaudio.Stream.read takes samples as n, not bytes so read(n) should be read(n // sample_depth """ try: import pyaudio if isinstance(self.stream, pyaudio.Stream): return 2 except ImportError: pass return 1 def start(self): """Start listening from stream""" if self.stream is None: from pyaudio import PyAudio, paInt16 self.pa = PyAudio() self.stream = self.pa.open(16000, 1, paInt16, True, frames_per_buffer=self.chunk_size) self.read_divisor = self._calc_read_divisor() self.engine.start() self.running = True self.is_paused = False self.thread = Thread(target=self._handle_predictions) self.thread.daemon = True self.thread.start() def stop(self): """Stop listening and close stream""" if self.thread: self.running = False if isinstance(self.stream, ReadWriteStream): self.stream.write(b'\0' * self.chunk_size) self.thread.join() self.thread = None self.engine.stop() if self.pa: self.pa.terminate() self.stream.stop_stream() self.stream = self.pa = None def pause(self): self.is_paused = True def play(self): self.is_paused = False def _handle_predictions(self): """Continuously check Precise process output""" while self.running: chunk = self.stream.read(self.chunk_size // self.read_divisor) #print(len(chunk)) if self.is_paused: continue prob = self.engine.get_prediction(chunk) print('prob: ', prob) self.on_prediction(prob) if self.detector.update(prob): self.on_activation()
def callback(in_data, frame_count, time_info, flag): if flag: print("Playback Error: %i" % flag) played_frames = callback.counter callback.counter += frame_count limiter.limit(signal[played_frames:callback.counter], threshold) return signal[played_frames:callback.counter], paContinue callback.counter = 0 pa = PyAudio() stream = pa.open(format=paFloat32, channels=1, rate=fs, frames_per_buffer=block_length, output=True, stream_callback=callback) while stream.is_active(): sleep(0.1) stream.close() pa.terminate() ############################## Plot results #################################### plt.figure() plt.plot(original_signal, color='grey', label='original signal') plt.plot(signal, color='black', label='limited signal') plt.legend()
class SoundCircle(object): def __init__(self): pg.setConfigOptions(antialias=True) self.app = QtGui.QApplication(sys.argv) self.win = pg.GraphicsWindow(title='SoundCircle') self.win.setWindowTitle('SoundCircle') self.win.setGeometry(0, 0, 1920, 1080) #Adding circles to our dict self.circles = { SpectrumMovement(61, (10, 100, 10), 98): [None, None], #WaveMovement(54, (130,0,0), 95) : [None, None], #WaveMovement(47, (190,0,0), 92) : [None, None], #WaveMovement(40, (250,0,0), 89) : [None, None], } # and assigning PlotItems, the actual graphical circles for c in self.circles: self.circles[c][0] = self.win.addPlot(row=1, col=1, axisItems={}) self.circles[c][0].hideAxis('left') self.circles[c][0].hideAxis('bottom') #self.win.showMaximized() self.win.showFullScreen() #PyAudio setup self.FORMAT = paInt16 self.CHANNELS = 1 self.RATE = 44100 self.CHUNK = 1024 * 2 self.p = PyAudio() self.stream = self.p.open( format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, output=False, frames_per_buffer=self.CHUNK, ) def start(self): if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'): QtGui.QApplication.instance().exec_() def plot_data(self, circ, data_x, data_y): if circ.first_draw: #The first time we draw the circle, we set its properties the_line = self.circles[circ][0].plot(pen=circ.color, width=300) self.circles[circ][1] = the_line self.circles[circ][0].setYRange(0, 200, padding=0) self.circles[circ][0].setXRange(-90, 270, padding=0) circ.first_draw = False else: #Here we just update the data for each circle self.circles[circ][1].setData(data_x, data_y) def update_all(self): in_data = self.stream.read(self.CHUNK) for circle in self.circles: plot_tuple = circle.update(in_data, self.CHUNK) self.plot_data(circle, plot_tuple[0], plot_tuple[1]) def animation(self): timer = QtCore.QTimer() timer.timeout.connect(self.update_all) timer.start(20) self.start()
from pyaudio import PyAudio, paContinue, paFloat32, paInt16 from time import sleep from bandstop import filter import numpy as np pa = PyAudio() NORM_CONST = 32768.0 D_TYPE = np.int16 def callback(in_data, frame_count, time_info, status): audio_data = np.fromstring(in_data, dtype=D_TYPE) normalized = [x / NORM_CONST for x in audio_data] out = filter(11025, 50, 1000, 1, 7, 'ellip', normalized) norm_out = np.array(np.round_(out * NORM_CONST)) return (norm_out.astype(D_TYPE).tostring(), paContinue) stream = pa.open(format = paInt16, channels = 1, rate = 11025, input = True, output = True, frames_per_buffer = 4096, stream_callback = callback) while stream.is_active(): sleep(0.1) stream.close() pa.terminate()
ax.set_ylim([-args[1], args[1]]) else: ax.set_ylim([-0.9 * ymax, 0.9 * ymax]) return ax.plot(args[0]) def frames(stream): while True: yield microphone(stream) if __name__ == "__main__": pa = PyAudio() stream = pa.open(format=paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) fig = plt.figure() ax = plt.Axes(fig, [0.15, 0.1, 0.7, 0.8]) fig.add_axes(ax) microphone = Microphone() anim = animation.FuncAnimation(fig, animate, frames=frames(stream), interval=10, init_func=init) plt.show() # for i in range(int(20*RATE/CHUNK)):
class PySine(object): BITRATE = 96000. def __init__(self): self.pyaudio = PyAudio() self.frames = None try: self.stream = self.pyaudio.open( format=self.pyaudio.get_format_from_width(1), channels=1, rate=int(self.BITRATE), output=True) except: logger.error( "No audio output is available. Mocking audio stream to simulate one..." ) # output stream simulation with magicmock try: from mock import MagicMock except: # python > 3.3 from unittest.mock import MagicMock from time import sleep self.stream = MagicMock() def write(array): duration = len(array) / float(self.BITRATE) sleep(duration) self.stream.write = write def __del__(self): self.stream.stop_stream() self.stream.close() self.pyaudio.terminate() def sine(self, frequency=440.0, duration=1.0): points = int(self.BITRATE * duration) try: times = np.linspace(0, duration, points, endpoint=False) data = np.array( (np.sin(times * frequency * 2 * np.pi) + 1.0) * 127.5, dtype=np.int8).tostring() if not self.frames: self.frames = data else: self.frames += data except: # do it without numpy data = '' omega = 2.0 * pi * frequency / self.BITRATE for i in range(points): data += chr(int(127.5 * (1.0 + sin(float(i) * omega)))) self.frames.append(data) if not self.frames: self.frames = b''.join(self.frames) else: self.frames += b''.join(self.frames) self.stream.write(data) def save(self, filename): wf = wave.open(filename, 'wb') wf.setnchannels(1) wf.setsampwidth(1) wf.setframerate(self.BITRATE) wf.writeframes(self.frames) wf.close()
class AudioDevice(QtCore.QObject): def __init__(self, logger): QtCore.QObject.__init__(self) self.logger = logger self.duo_input = False self.logger.push("Initializing PyAudio") self.pa = PyAudio() # look for devices self.input_devices = self.get_input_devices() self.output_devices = self.get_output_devices() for device in self.input_devices: self.logger.push("Opening the stream") self.stream = self.open_stream(device) self.device = device self.logger.push("Trying to read from input device %d" % device) if self.try_input_stream(self.stream): self.logger.push("Success") break else: self.logger.push("Fail") self.first_channel = 0 nchannels = self.get_current_device_nchannels() if nchannels == 1: self.second_channel = 0 else: self.second_channel = 1 # counter for the number of input buffer overflows self.xruns = 0 # method def get_readable_devices_list(self): devices_list = [] default_device_index = self.get_default_input_device() for device in self.input_devices: dev_info = self.pa.get_device_info_by_index(device) api = self.pa.get_host_api_info_by_index( dev_info['hostApi'])['name'] if device is default_device_index: extra_info = ' (system default)' else: extra_info = '' nchannels = self.pa.get_device_info_by_index( device)['maxInputChannels'] desc = "%s (%d channels) (%s) %s" % (dev_info['name'], nchannels, api, extra_info) devices_list += [desc] return devices_list # method def get_readable_output_devices_list(self): devices_list = [] default_device_index = self.get_default_output_device() for device in self.output_devices: dev_info = self.pa.get_device_info_by_index(device) api = self.pa.get_host_api_info_by_index( dev_info['hostApi'])['name'] if device is default_device_index: extra_info = ' (system default)' else: extra_info = '' nchannels = self.pa.get_device_info_by_index( device)['maxOutputChannels'] desc = "%s (%d channels) (%s) %s" % (dev_info['name'], nchannels, api, extra_info) devices_list += [desc] return devices_list # method def get_default_input_device(self): return self.pa.get_default_input_device_info()['index'] # method def get_default_output_device(self): return self.pa.get_default_output_device_info()['index'] # method def get_device_count(self): # FIXME only input devices should be chosen, not all of them ! return self.pa.get_device_count() # method # returns a list of input devices index, starting with the system default def get_input_devices(self): device_count = self.get_device_count() default_input_device = self.get_default_input_device() device_range = range(0, device_count) # start by the default input device device_range.remove(default_input_device) device_range = [default_input_device] + device_range # select only the input devices by looking at the number of input channels input_devices = [] for device in device_range: n_input_channels = self.pa.get_device_info_by_index( device)['maxInputChannels'] if n_input_channels > 0: input_devices += [device] return input_devices # method # returns a list of output devices index, starting with the system default def get_output_devices(self): device_count = self.get_device_count() default_output_device = self.get_default_output_device() device_range = range(0, device_count) # start by the default input device device_range.remove(default_output_device) device_range = [default_output_device] + device_range # select only the output devices by looking at the number of output channels output_devices = [] for device in device_range: n_output_channels = self.pa.get_device_info_by_index( device)['maxOutputChannels'] if n_output_channels > 0: output_devices += [device] return output_devices # method def select_input_device(self, device): # save current stream in case we need to restore it previous_stream = self.stream previous_device = self.device self.stream = self.open_stream(device) self.device = device self.logger.push("Trying to read from input device #%d" % (device)) if self.try_input_stream(self.stream): self.logger.push("Success") previous_stream.close() success = True self.first_channel = 0 nchannels = self.get_current_device_nchannels() if nchannels == 1: self.second_channel = 0 else: self.second_channel = 1 else: self.logger.push("Fail") self.stream.close() self.stream = previous_stream self.device = previous_device success = False return success, self.device # method def select_first_channel(self, index): self.first_channel = index success = True return success, self.first_channel # method def select_second_channel(self, index): self.second_channel = index success = True return success, self.second_channel # method def open_stream(self, device): ''' by default we open the device stream with all the channels # (interleaved in the data buffer)''' maxInputChannels = self.pa.get_device_info_by_index( device)['maxInputChannels'] stream = self.pa.open(format=paInt32, channels=maxInputChannels, rate=SAMPLING_RATE, input=True, frames_per_buffer=FRAMES_PER_BUFFER, input_device_index=device) return stream # method # return the index of the current input device in the input devices list # (not the same as the PortAudio index, since the latter is the index # in the list of *all* devices, not only input ones) def get_readable_current_device(self): i = 0 for device in self.input_devices: if device == self.device: break else: i += 1 return i # method def get_readable_current_channels(self): dev_info = self.pa.get_device_info_by_index(self.device) nchannels = dev_info['maxInputChannels'] if nchannels == 2: channels = ['L', 'R'] else: channels = [] for channel in range(0, dev_info['maxInputChannels']): channels += ["%d" % channel] return channels # method def get_current_first_channel(self): return self.first_channel # method def get_current_second_channel(self): return self.second_channel # method def get_current_device_nchannels(self): return self.pa.get_device_info_by_index( self.device)['maxInputChannels'] # method # return True on success def try_input_stream(self, stream): n_try = 0 while (stream.get_read_available() < FRAMES_PER_BUFFER and n_try < 1000000): n_try += 1 if n_try == 1000000: return False else: lat_ms = 1000 * stream.get_input_latency() self.logger.push("Device claims %d ms latency" % (lat_ms)) return True # try to update the audio buffer # return the number of chunks retrieved, and the time elapsed def update(self, ringbuffer): t = QtCore.QTime() t.start() channel = self.get_current_first_channel() nchannels = self.get_current_device_nchannels() if self.duo_input: channel_2 = self.get_current_second_channel() chunks = 0 available = self.stream.get_read_available() available = int(floor(available / FRAMES_PER_BUFFER)) for _ in range(0, available): try: rawdata = self.stream.read(FRAMES_PER_BUFFER) except IOError as inst: # FIXME specialize this exception handling code # to treat overflow errors particularly self.xruns += 1 print "Caught an IOError on stream read.", inst break intdata_all_channels = fromstring(rawdata, int32) int32info = iinfo(int32) norm_coeff = max(abs(int32info.min), int32info.max) floatdata_all_channels = (intdata_all_channels.astype(float64) / float(norm_coeff)) floatdata1 = floatdata_all_channels[channel::nchannels] if self.duo_input: floatdata2 = floatdata_all_channels[channel_2::nchannels] floatdata = vstack((floatdata1, floatdata2)) else: floatdata = floatdata1 floatdata.shape = (1, FRAMES_PER_BUFFER) # update the circular buffer ringbuffer.push(floatdata) chunks += 1 return (chunks, t.elapsed(), chunks * FRAMES_PER_BUFFER) def set_single_input(self): self.duo_input = False def set_duo_input(self): self.duo_input = True # returns the stream time in seconds def get_stream_time(self): return self.stream.get_time()
class MicrophoneStream(object): """Opens a recording stream as a generator yielding the audio chunks.""" def __init__(self, rate, chunk): self._rate = rate self._chunk = chunk # Create a thread-safe buffer of audio data self._buff = Queue() self.closed = True def __enter__(self): self._audio_interface = PyAudio() self._audio_stream = self._audio_interface.open( format=paInt16, # The API currently only supports 1-channel (mono) audio # https://goo.gl/z757pE channels=1, rate=self._rate, input=True, frames_per_buffer=self._chunk, # Run the audio stream asynchronously to fill the buffer object. # This is necessary so that the input device's buffer doesn't # overflow while the calling thread makes network requests, etc. stream_callback=self._fill_buffer, ) self.closed = False return self def __exit__(self, type, value, traceback): self._audio_stream.stop_stream() self._audio_stream.close() self.closed = True # Signal the generator to terminate so that the client's # streaming_recognize method will not block the process termination. self._buff.put(None) self._audio_interface.terminate() def _fill_buffer(self, in_data, frame_count, time_info, status_flags): """Continuously collect data from the audio stream, into the buffer.""" self._buff.put(in_data) return None, paContinue def generator(self): while not self.closed: # Use a blocking get() to ensure there's at least one chunk of # data, and stop iteration if the chunk is None, indicating the # end of the audio stream. chunk = self._buff.get() if chunk is None: return data = [chunk] # Now consume whatever other data's still buffered. while True: try: chunk = self._buff.get(block=False) if chunk is None: return data.append(chunk) except Empty: break yield b''.join(data)
wf.setsampwidth(2) wf.setframerate(SAMPLING_RATE) wf.writeframes("".join(data)) wf.close() NUM_SAMPLES = 2000 SAMPLING_RATE = 8000 LEVEL = 1500 COUNT_NUM = 20 SAVE_LENGTH = 8 pa = PyAudio() stream = pa.open(format=paInt16, channels=1, rate=SAMPLING_RATE, input=True, frames_per_buffer=NUM_SAMPLES) save_count = 0 save_buffer = [] while True: string_audio_data = stream.read(NUM_SAMPLES) audio_data = np.fromstring(string_audio_data, dtype=np.short) large_sample_count = np.sum(audio_data > LEVEL) print np.max(audio_data) if large_sample_count > COUNT_NUM: save_count = SAVE_LENGTH
class AudioClient(Thread): """Client class for sending audio data to server. Keep running as a thread until self.sock is deleted. """ def __init__(self, ip, port): super(AudioClient, self).__init__() self.setDaemon(True) self.address = (ip, port) self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.recoder = PyAudio() self.audio_stream = None # parameters for recording self.chunk = config.VOICE_CHUNK self.format = config.VOICE_FORMAT self.channels = config.VOICE_CHANNELS self.rate = config.VOICE_RATE self.max_record_seconds = config.MAX_RECORD_SECONDS * 50 self.is_alive = True def __del__(self): self.sock.close() if self.audio_stream is not None: self.audio_stream.stop_stream() self.audio_stream.close() self.recoder.terminate() def run(self): while True: try: self.sock.connect(self.address) break except ConnectionRefusedError: time.sleep(1) self.audio_stream = self.recoder.open(format=self.format, channels=self.channels, rate=self.rate, input=True, frames_per_buffer=self.chunk) while self.audio_stream.is_active() and self.is_alive: audio_frames = [] # begin recoding time_count = self.max_record_seconds while True: audio_data = self.audio_stream.read(self.chunk) audio_frames.append(audio_data) time_count -= 1 if time_count == 0: break # send data audio_frames = dumps(audio_frames) try: self.sock.sendall( struct.pack('L', len(audio_frames)) + audio_frames) except (ConnectionRefusedError, ConnectionAbortedError, ConnectionResetError): self.kill() return def kill(self): """Kill the thread.""" self.is_alive = False
import wave import sys from pyaudio import PyAudio CHUNK = 1024 wf = wave.open(sys.argv[1], 'rb') p = PyAudio() stream = p.open(format=p.get_format_from_width(wf.getsampwidth()), channels=wf.getnchannels(), rate=wf.getframerate(), output=True) data = wf.readframes(CHUNK) while data != '': stream.write(data) data = wf.readframes(CHUNK) stream.stop_stream() stream.close() p.terminate()
import numpy as np import matplotlib.pyplot as plt from pyaudio import PyAudio, paFloat32 from time import sleep pa = PyAudio() def callback(in_data, frame_count, time_info, flag): if flag: print("Playback Error: %i" % flag) played_frames = counter counter += frame_count return signal[played_frames:counter], paContinue stream = pa.open(format=paFloat32, channels=1, rate=44100, output=True, frames_per_buffer=1024, stream_callback=callback) while stream.is_active(): sleep(0.1) stream.close() pa.terminate()
class BD_API(): def __init__(self): self.initariateV() def initariateV(self): self.pa = None self.stream = None def save_wave_file(self, filename, data): '''save the date to the wavfile''' self.wf = wave.open(filename, 'wb') self.wf.setnchannels(channels) # 声道 self.wf.setsampwidth(sampwidth) # 采样字节 1 or 2 self.wf.setframerate(framerate) # 采样频率 8000 or 16000 self.wf.writeframes( b"".join(data) ) # https://stackoverflow.com/questions/32071536/typeerror-sequence-item-0-expected-str-instance-bytes-found self.wf.close() def my_record(self): self.pa = PyAudio() self.stream = self.pa.open(format=paInt16, channels=1, rate=framerate, input=True, frames_per_buffer=NUM_SAMPLES) print("开始录音。。。") frames = [] temp1 = [] temp2 = 0 # t1=time.time() while (True): print('begin ') temp2 = temp2 + 1 for i in range(0, int(framerate / NUM_SAMPLES)): data = self.stream.read(NUM_SAMPLES) frames.append(data) audio_data = np.fromstring(data, dtype=np.short) large_sample_count = np.sum(audio_data > 800) temp = np.max(audio_data) if temp2 > 3: if temp < 800: # print("未检测到信号") temp1.append(temp) if len(temp1) == 2: print('未说话超过2s') break elif temp2 >= 60: print("超过百度调用最大时长") break print("录音结束") self.stream.stop_stream() self.stream.close() self.pa.terminate() self.save_wave_file('01.pcm', frames) def get_file_content(self, filePath): with open(filePath, 'rb') as fp: return fp.read() def send_request(self, words): result = client.synthesis(words, 'zh', 1, {'vol': 5, 'per': 4}) if not isinstance(result, dict): with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as f: f.write(result) tmpfile = f.name return tmpfile, result, f def say(self, words): tmpfile, result, f = self.send_request(words) # time.sleep(0.5) print("file name is" + tmpfile) subprocess.call("play -q %s" % tmpfile, shell=True) def InitDevice(): audioDev = pyaudio.PyAudio() stream = audioDev.open(format=pyaudio.paInt8, channels=1, rate=16000, output=True) return stream def play(stream, Result): stream.write(Result)
shuffle(mood_strs) root = tk.Tk() frame = tk.Frame(master=root) labels = tk.Frame(master=frame) question_label_1 = tk.Label(master=labels, font=('Arial', 30), text='Is the mood of the excerpt') question_label_2 = tk.Label(master=labels, font=('Arial', 30), foreground='red', text=mood_strs[0]) question_label_3 = tk.Label(master=labels, font=('Arial', 30), text='?') audio = PyAudio() stream = audio.open(format=paInt16, channels=1, rate=44100, output=True) def button_no_clicked(): if finished == False and button_no['state'] == tk.NORMAL: records[all_excerpts[current_excerpt_index]][ mood_strs[current_mood_index]] = 0 update_index() def button_maybe_clicked(): if finished == False and button_maybe['state'] == tk.NORMAL: records[all_excerpts[current_excerpt_index]][ mood_strs[current_mood_index]] = 1 update_index()
from pyaudio import PyAudio BITRATE = 16000 #number of frames per second/frameset. FREQUENCY = 261.63 #Hz, waves per second, 261.63=C4-note. LENGTH = 0.4 #seconds to play sound NUMBEROFFRAMES = int(BITRATE * LENGTH) RESTFRAMES = NUMBEROFFRAMES % BITRATE WAVEDATA = '' for x in xrange(NUMBEROFFRAMES): WAVEDATA += chr( int(math.sin(x / ((BITRATE / FREQUENCY) / math.pi)) * 127 + 128)) #fill remainder of frameset with silence for x in xrange(RESTFRAMES): WAVEDATA += chr(128) p = PyAudio() stream = p.open( format=p.get_format_from_width(1), channels=1, rate=BITRATE, output=True, ) stream.write(WAVEDATA) stream.stop_stream() stream.close() p.terminate()
def __init__(self): super(VoiceGame2, self).__init__(255, 255, 255, 255, WIDTH, HEIGHT) pygame.mixer.init() self.cloud = cocos.sprite.Sprite('fla.png') self.cloud.scale_x = 1.5 self.cloud.scale_y = 1.83 self.cloud.position = 300, 240 self.add(self.cloud) self.gameover = None self.score = 0 #count score self.txt_score = cocos.text.Label(u'Score:0', font_name=FONTS, font_size=16, color=BLACK) self.txt_score.position = 510, 240 self.add(self.txt_score, 99999) self.top = '', 0 self.top_notice = cocos.text.Label(u'', font_name=FONTS, font_size=18, color=BLACK) self.top_notice.position = 400, 410 self.add(self.top_notice, 99999) self.name = '' # init voice self.NUM_SAMPLES = 2048 # pyAudio cache size self.LEVEL = 1500 # sound threshold '''self.voicebar = Sprite('black.png', color=(0, 0, 255)) self.voicebar.position = 20, 450 self.voicebar.scale_y = 0.1 self.voicebar.image_anchor = 0, 0 self.add(self.voicebar)''' self.ppx = Flappy(self) self.add(self.ppx) self.floor2 = cocos.cocosnode.CocosNode() self.floor = cocos.cocosnode.CocosNode() self.add(self.floor) self.add(self.floor2) self.last_block = 0, 100 for i in range(5): b = Tube(self) u = upTube(self) self.floor.add(b) self.floor.add(u) self.pitch_pic(u) pos = b.x + b.width, b.height # start inputing sound pa = PyAudio() SAMPLING_RATE = int( pa.get_device_info_by_index(0)['defaultSampleRate']) self.stream = pa.open(format=paInt16, channels=1, rate=SAMPLING_RATE, input=True, frames_per_buffer=self.NUM_SAMPLES) self.stream.stop_stream() pygame.music.load('intro.wav'.encode()) pygame.music.play(1) self.schedule(self.update)
def monitor(): global saveCount pa = PyAudio() stream = pa.open(format=paInt16, channels=1, rate=16000, input=True, frames_per_buffer=NUM_SAMPLES) print('开始缓存录音') audioBuffer = [] rec = [] audioFlag = False t = False while True: data = stream.read(NUM_SAMPLES, exception_on_overflow=False) audioBuffer.append(data) #录音源文件 audioData = np.fromstring(data, dtype=np.short) #字符串创建矩阵 largeSampleCount = np.sum(audioData > 2000) temp = np.max(audioData) print temp if temp > 3000 and t == False: #3000 according different mic t = 1 #开始录音 print "检测到语音信号,开始录音" begin = time.time() print temp if t: end = time.time() if end - begin > 5: timeFlag = 1 #5s录音结束 if largeSampleCount > 20: saveCount = 3 else: saveCount -= 1 if saveCount < 0: saveCount = 0 if saveCount > 0: rec.append(data) else: if len(rec) > 0 or timeFlag: save_wave_file('music/detected_voice.wav', rec) # 保存检测的语音 voice.identify_synthesize( 'music/detected_voice.wav') # 调用百度语音实现语音识别和语音合成 rec = [] t = 0 timeFlag = 0 breakFlag = recognizeCommand( 'result.txt') #Analysis command Run.brake(0) Run.gpio_release() if breakFlag == 1: breakFlag = 0 break stream.stop_stream() stream.close() pa.terminate() print('next awaken,please say wake-up word') Run.gpio_release()
def recoder_and_decide(unit_time=0.3): pa = PyAudio() config = Configure(unit_time=unit_time) times = 0 result_before = [] binary_str = b"" pre_str = b"" temp_binary_str = b"" print("begin_____________________") is_recording,pre_is_recording = False, False test_zcr_count = 0 while times < 100 or True: stream = pa.open(format=config.FORMAT, channels=config.CHANNELS, rate=config.RATE, input=True, frames_per_buffer=config.CHUNK) frames = [] times += 1 pre_is_recording = is_recording pre_str = temp_binary_str stream.start_stream() for i in range(0, config.get_count_chunk()): data = stream.read(config.CHUNK) frames.append(data) stream.stop_stream() temp_binary_str = b''.join(frames) digit_data = np.fromstring(temp_binary_str, np.int16) zcr = ZCR(digit_data) zcr_flag,energy_flag = zcr.get_weather_start_record_state() is_recording = energy_flag or zcr_flag # is_recording = zcr_flag or energy_flag print(zcr_flag,energy_flag) if zcr_flag: test_zcr_count += 1 # print("zcr_rate = {},max_energy = {}".format(max(zcr_rate),max(energy))) # zcr_flag = max(zcr_rate) > zcr.zcr_threshold # energy_flag = max(energy) > zcr.energy_threshold # print("max_zcr={},max_energy={}".format(max(zcr_rate),max(energy))) # print("t={}s,z={},e={}".format(times*config.RECORD_SECONDS,zcr_flag,energy_flag)) # if zcr_flag or energy_flag: # is_recording = True # else # is_recording = False # result_before += list(digit_data) if is_recording: # print(type(binary_str),type(temp_binary_str)) if not pre_is_recording: binary_str = pre_str binary_str += temp_binary_str print("Recording,time=%s" % str(len(binary_str)/32000)) print("\n__________________________________________") # print("Recording@@@{}@@@{}".format(zcr_flag,energy_flag)) elif pre_is_recording: # stop recording and start recognizing print("test_zcr_count = %d" % test_zcr_count) test_zcr_count = 0 print("Recognizing......") sp = Recognition('./969_914.pb', './conv_labels.txt') recognized_dict = sp.run(binary_str,unit_time) print("识别结果:{}".format(recognized_dict)) binary_str = b"" else: print("Finding") # print("Finding+++{}+++{}".format(zcr_flag,energy_flag)) stream.stop_stream() stream.close() pa.terminate() return None
class GoogleSpeechRecognition: @nobind def __init__(self, session=None): self._logger = Logger(self.__class__.__name__) # initialize internal variables self._logger.info("[init ] Initializing service") self._subscriber_list = set() self._is_robot = (session is not None) self._is_running = False self._robot_session = session self._robot_service_id = None self._robot_audio_device = None self._computer_audio_interface = None self._computer_audio_stream = None self._raw_buffer = deque() self._input_qty = 0 self._size_to_filter = 0 self._filtered_buffer = Queue() self._google_rate = 0 self._google_client = None self._google_recognition_config = None self._google_response_iterator = None # initialize public variables self.on_sentence = Signal("(s)") # Will raise an exception to stop the service if we are on a virtual robot self._raise_if_virtual_robot() # Set environment variable for Google credentials... environ["GOOGLE_APPLICATION_CREDENTIALS"] = GOOGLE_CRED_FILE @nobind def __enter__(self): self._logger.info("[enter] Connecting service") if self._is_robot: self._robot_service_id = self._robot_session.registerService( self.__class__.__name__, self) return self @nobind def __exit__(self, exc_type, exc_val, exc_tb): self._logger.info("[exit ] Disconnecting service") self._close_microphones() if self._is_robot: self._robot_session.unregisterService(self._robot_service_id) @nobind def __del__(self): pass @nobind def _raise_if_virtual_robot(self): if self._is_robot: try: self._robot_session.service("ALSystem") self._logger.info( "[init ] Running on a robot, will use the robot's microphones." ) except RuntimeError: raise RuntimeError( 'Running on a virtual robot, this service won\'t work! ' 'Run it on a real robot or on a computer.') else: self._logger.info( "[init ] Running on a computer, will use the computer's microphone." ) #################################### # # # MANAGE SUBSCRIBERS # # # #################################### @bind(methodName="subscribe", returnType=String, paramsType=[String]) def subscribe(self, subscriber_name): """Subscribe to speech recognition. This call starts the engine. The function will return the subscriber name, you need to provide this to unsubscribe. :param subscriber_name: str :return: str """ # 1) find a unique subscriber name final_name = subscriber_name while final_name in self._subscriber_list: final_name = "{}_{}".format(subscriber_name, str(uuid4())[1:3]) self._logger.info("[sub ] New subscriber: {}".format(final_name)) # 2) add it to the list self._subscriber_list.add(final_name) # 3) start streaming if not done already self._start_streaming() # 4) return subscriber name to application return final_name @bind(methodName="unsubscribe", returnType=Bool, paramsType=[String]) def unsubscribe(self, subscriber_name): """Unsubscribe to speech recognition. Provide the subscriber name as returned by the subscribe function. Once all subscribers are removed, the streaming stops. Returns True or False depending on if the call was successful. :param subscriber_name: str :return: boolean """ # 1) remove from subscriber list try: self._subscriber_list.remove(subscriber_name) except IndexError: print "Not subscribed" return False # 2) stop streaming if needed if len(self._subscriber_list) == 0: self._stop_streaming() return True @nobind def _start_streaming(self): if self._is_running: return self._is_running = True self._logger.info("[start] Start streaming") async (self._open_microphones) async (self._start_google_stream) @nobind def _stop_streaming(self): if not self._is_running: return self._is_running = False self._logger.info("[stop ] Stop streaming") async (self._close_microphones) #################################### # # # GET SOUND FROM MICROPHONES # # # #################################### @nobind def _open_microphones(self): if self._is_robot: self._logger.info("[open ] Opening robot microphones.") # self._google_rate = 48000 self._google_rate = 16000 self._robot_audio_device = self._robot_session.service( "ALAudioDevice") # ask for all microphones signals interleaved sampled at 48kHz # self._robot_audio_device.setClientPreferences(self.__class__.__name__, 48000, 0, 0) self._robot_audio_device.setClientPreferences( self.__class__.__name__, 16000, 1, 0) self._robot_audio_device.subscribe(self.__class__.__name__) else: self._logger.info("[open ] Opening computer microphones.") self._google_rate = 16000 self._computer_audio_interface = PyAudio() self._computer_audio_stream = self._computer_audio_interface.open( format=paInt16, channels=1, rate=16000, input=True, stream_callback=self._computer_callback, ) self._logger.info("[open ] Done!") @nobind def _close_microphones(self): if self._is_robot: pass else: self._logger.info("[close] Closing computer microphones.") self._computer_audio_interface.terminate() @nobind def _computer_callback(self, in_data, frame_count, time_info, status_flags): """ This is the callback used for computer microphone audio buffers """ # self._logger.info('[proc ] New buffer ({} samples)'.format(frame_count)) self._filtered_buffer.put(in_data) return None, paContinue # todo: rename this process_remote @bind(methodName="processRemote", returnType=Void, paramsType=[Int32, Int32, Int32, List(Int32)]) def processRemote(self, nr_of_channels, nr_of_samples_per_channel, timestamp, input_buffer): """ This is the callback that receives the robots audio buffers """ # self._logger.info('[proc ] New buffer received ({} channels x {} samples)'.format(nr_of_channels, # nr_of_samples_per_channel)) self._filtered_buffer.put(input_buffer) return self._raw_buffer.extend(input_buffer) self._input_qty += nr_of_samples_per_channel if self._input_qty > BEAM_FORMING_SAMPLE_DIFF: async (self._filter_buffer) self._size_to_filter = self._input_qty self._input_qty = 0 # self._logger.info("[proc ] finished") @nobind def _filter_buffer(self): # self._logger.verbose('[filtr] Filtering raw values...') # calculate how many samples we can get (need to keep a rolling buffer for beam forming) n_samples = self._size_to_filter - BEAM_FORMING_SAMPLE_DIFF # copying those samples into a list raw_buffer = [] # deleting them from the queue, except the ones for next beam-forming, which are put back on the left for i in range(n_samples * BEAM_FORMING_CHANNELS): raw_buffer.append(self._raw_buffer.popleft()) cross_values = [] for i in range(BEAM_FORMING_SAMPLE_DIFF * BEAM_FORMING_CHANNELS): v = self._raw_buffer.popleft() raw_buffer.append(v) cross_values.append(v) cross_values.reverse() for i in cross_values: self._raw_buffer.appendleft(i) # apply the filter "delay and sum beam forming" for i in range(n_samples): self._filtered_buffer.put( (raw_buffer[i * BEAM_FORMING_CHANNELS] + raw_buffer[i * BEAM_FORMING_CHANNELS + 1] + raw_buffer[ (i + BEAM_FORMING_SAMPLE_DIFF) * BEAM_FORMING_CHANNELS + 2] + raw_buffer[(i + BEAM_FORMING_SAMPLE_DIFF) * BEAM_FORMING_CHANNELS + 3]) / BEAM_FORMING_CHANNELS) # self._logger.verbose('[filtr] Finished') #################################### # # # STREAM BUFFER TO GOOGLE # # # #################################### @nobind def _generate_next_buffer(self): while self._is_running: # self._logger.info("[next ] Generate next data!") # yield types.StreamingRecognizeRequest(audio_content=self._filtered_buffer.get()) audio_data = [str(self._filtered_buffer.get())] while not self._filtered_buffer.empty(): audio_data.append(str(self._filtered_buffer.get())) yield types.StreamingRecognizeRequest( audio_content=b''.join(audio_data)) @nobind def _start_google_stream(self): self._logger.info("[gstar] Start streaming to Google") # Configure Google speech recognition self._google_client = speech.SpeechClient() self._logger.info("[gstar] Got Google client") contexts = [types.SpeechContext(phrases=[])] config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=self._google_rate, language_code="en_US", max_alternatives=1, profanity_filter=False, speech_contexts=contexts, enable_word_time_offsets=False) self._google_recognition_config = types.StreamingRecognitionConfig( config=config, single_utterance=False, interim_results=False) self._logger.info("[gstar] Google configuration ready") source_audio = (types.StreamingRecognizeRequest(audio_content=content) for content in self._generate_next_buffer()) self._logger.info("[gstar] source list ready") self._google_response_iterator = self._google_client.streaming_recognize( self._google_recognition_config, self._generate_next_buffer()) # source_audio) self._logger.info("[gstar] Streaming started!") async (self._process_next_response) @nobind def _process_next_response(self): self._logger.info("[gresp] Waiting for next response...") if not self._is_running: return streaming_recognize_response = self._google_response_iterator.next() self._logger.info("[gresp] Got a response!") if not self._is_running or not streaming_recognize_response: return # if streaming_recognize_response.error: # print streaming_recognize_response # print streaming_recognize_response.error # self._logger.info("[gresp] error: {}".format(streaming_recognize_response.error)) # elif streaming_recognize_response.speech_event_type: # self._logger.info("[gresp] event: {}".format(streaming_recognize_response.speech_event_type)) if streaming_recognize_response.results: # self._logger.info("[gresp] result: {}".format(streaming_recognize_response.results)) for result in streaming_recognize_response.results: async (self._process_valid_result, result) async (self._process_next_response) @nobind def _process_valid_result(self, result): if result.is_final: self._logger.info("[valid] *** New final result ***") alternative = result.alternatives[ 0] # there is only 1 because max_alternatives=1 self._logger.info(alternative.transcript) self._logger.info(alternative.confidence) self.on_sentence([alternative.transcript, alternative.confidence])
class AudioRecorder(DIWA_THREAD): """ A thread for capturing audio continuously. It keeps a buffer that can be saved to a file. By convention AudioRecorder is usually written in mixed case even as we prefer upper case for threading types. :param parent: Parent of the thread. :type parent: :py:class:`diwacs.GraphicalUserInterface` """ def __init__(self, parent): DIWA_THREAD.__init__(self, name='AudioRecorder') self.parent = parent self.py_audio = PyAudio() self.stream = self.open_mic_stream() self.buffer = deque(maxlen=diwavars.MAX_LENGTH) def stop(self): """ Stop the audio recorder thread. """ DIWA_THREAD.stop(self) sleep(0.1) self.stream.close() def find_input_device(self): """ Find a microphone device. """ for i in range(self.py_audio.get_device_count()): # Internationalization hack... # LOGGER.debug("Selecting audio device %s / %s " % # (str(i),str(self.py_audio.get_device_count()))) # device_index = i # return device_index devinfo = self.py_audio.get_device_info_by_index(i) for keyword in ['microphone']: if keyword in devinfo['name'].lower(): return i default_device = self.py_audio.get_default_input_device_info() if default_device: return default_device['index'] return None def open_mic_stream(self): """ Opens the stream object for microphone. """ device_index = None # uncomment the next line to search for a device. # device_index = self.find_input_device() stream = self.py_audio.open( format=diwavars.FORMAT, channels=diwavars.CHANNELS, rate=diwavars.RATE, input=True, input_device_index=device_index, frames_per_buffer=diwavars.INPUT_FRAMES_PER_BLOCK) return stream def run(self): """ Continuously record from the microphone to the buffer. The size should be limited at diwavars.MAX_LENGTH constant. The implementation keeps only the most recent data in the case that there's too much data to store. """ while not self._stop.is_set(): try: data = self.stream.read(diwavars.INPUT_FRAMES_PER_BLOCK) while len(self.buffer) >= self.buffer.maxlen: element = self.buffer.popleft() del element self.buffer.append(data) except IOError as excp: _logger().exception('Error recording: {0!s}'.format(excp)) def save(self, event_id, path): """ Save the buffer to a file. """ try: _logger().debug('Saving audio buffer') date_string = datetime.now().strftime('%d%m%Y%H%M') filename = '{0}_{1}.wav'.format(event_id, date_string) filepath = os.path.join(path, 'Audio') if not os.path.exists(filepath): os.makedirs(filepath) filepath = os.path.join(filepath, filename) sample_size = self.py_audio.get_sample_size(diwavars.FORMAT) wave_file = wave.open(filepath, 'wb') wave_file.setnchannels(diwavars.CHANNELS) wave_file.setsampwidth(sample_size) wave_file.setframerate(diwavars.RATE) wave_file.writeframes(b''.join(self.buffer)) wave_file.close() except: _logger().exception('audio save exception') #CallAfter(self.parent.ClearStatusText) self.parent.diwa_state.remove_from_swnp_data('audio') CallAfter(self.parent.UpdateScreens(update=True))
class Song(Thread): def __init__(self, playlist, is_loop=False, *args, **kwargs): self.playlist = playlist self.play_number = 0 self.play_count = 0 self.is_looped = is_loop self.is_paused = True self.is_stoped = True self.is_terminated = True self.progress = 0 self.db = 0 self.max_db = 0 self.duration = 0 self.rate = 0 self.channels = 0 self.sample_width = 0 self.title = "" self.artist = "" self.album = "" self.track = "" self.chunk_ms = 100 # 100 millisec self.chunk_count = 0 with noalsaerr(): self.p = PyAudio() Thread.__init__(self, *args, **kwargs) self.daemon = True # mainスレッドが終了した場合に終わるように # Songクラス単体で使うならFalseにするか,time.sleep(duration) self.pause_condition = Condition(Lock()) self.stop_condition = Condition(Lock()) def pause(self): # 再生状況を保存 self.is_paused = True self.pause_condition.acquire() def stop(self): # 再生中の曲番号だけ保存,再生状況は保存しない self.is_stoped = True self.stop_condition.acquire() def play(self): if not self._started.is_set(): # is not started self.start() else: # restart if self.is_stoped: self.stop_condition.notify() self.stop_condition.release() if self.is_paused: self.pause_condition.notify() self.pause_condition.release() self.is_paused = False self.is_stoped = False def terminate(self): self.is_terminated = True def skip(self): self.stop() self.play_count += 1 self.play() def rewind(self): self.stop() self.play_count = min(0, self.play_count - 1) self.play() def loop_on(self): self.is_looped = True def loop_off(self): # offにしてもplaylistの現在の曲から最後までは演奏する self.play_count = self.play_count % len(self.playlist) self.is_looped = False def volume(self, decibel): # 音割れする,別の手法にすべきか self.pause() # underrun 防止 self.__set_segment() self.seg += decibel self.chunks = make_chunks(self.seg, self.chunk_ms) self.__set_stream() self.play() def mute(self): pass def get_info(self): # set_info? duration = self.seg.duration_seconds # 再生時間 rate = self.seg.frame_rate # サンプリングレート channels = self.seg.channels # (1:mono, 2:stereo) sample_width = self.seg.sample_width # byte return (duration, rate, channels, sample_width) def get_playback_info(self): return (self.progress, self.db) def __set_segment(self): self.play_number = self.play_count % len(self.playlist) f = self.playlist[self.play_number] tags = FLAC(f) self.title = get_attribute(tags, 'title') # tags.get(key, None)もあり self.album = get_attribute(tags, 'album') self.artist = get_attribute(tags, 'albumartist') if self.artist == None: self.artist = get_attribute(tags, 'artist') self.track = get_attribute(tags, 'tracknumber') self.seg = AudioSegment.from_file(f) def __set_stream(self): # format は portaudio の定数 # (paInt8=16, paInt16=8, paInt24=4, paInt32=2, paFloat32=1)のどれか # pydub.AudioSegment.sample_width も量子化ビット数を返す # (1=8bit, 2=16bit, 3?, 4=32bit) # PyAudio.get_format_from_width は paInt32 を返さない # 手持ちの音源で 32bit-float がないので即時療法で対処 # https://github.com/jleb/pyaudio/blob/0109cc46cac6a3c404050f4ba11752e51aeb1fda/src/pyaudio.py#L215 # issue 案件な気がする def get_format_from_width(width): if width == 1: return paInt8 # = 16 elif width == 2: return paInt16 # = 8 elif width == 3: return paInt24 # = 4 elif width == 4: return paInt32 # = 2 else: raise ValueError(f"Invalid width {width}") self.stream = self.p.open( format=get_format_from_width(self.seg.sample_width), #=self.p.get_format_from_width( # self.seg.sample_width), channels=self.seg.channels, rate=self.seg.frame_rate, output=True) #, stream_callback=self.callback) def __play_song(self): self.__set_segment() self.chunks = make_chunks(self.seg, self.chunk_ms) self.chunk_count = 0 self.__set_stream() self.duration = self.seg.duration_seconds # 再生時間 self.rate = self.seg.frame_rate # サンプリングレート self.channels = self.seg.channels # (1:mono, 2:stereo) self.sample_width = self.seg.sample_width # byte (1:8bit, 2:16bit ...) self.max_db = self.seg.max_dBFS while not self.is_stoped: if self.chunk_count >= len(self.chunks): # 最後まで再生して終了 self.play_count += 1 # next song break with self.pause_condition: chunk = self.chunks[self.chunk_count] data = chunk._data self.chunk_count += 1 self.db = chunk.dBFS self.progress = self.chunk_count / len(self.chunks) self.stream.write(data) while self.is_paused: self.stream.stop_stream() # ALSA lib pcm.c:8526:(snd_pcm_recover) underrun occurred self.pause_condition.wait() self.stream.start_stream() # resume self.stream.close() # terminate the stream def run(self): # loop playlist while self.play_count < len(self.playlist) \ or self.is_looped or not self.is_terminated: with self.stop_condition: self.__play_song() while self.is_stoped: self.stop_condition.wait() self.p.terminate() # terminate the portaudio session
class RTAudio(object): def __init__(self, input_device_index, output_device_index, fs=48000, frame_length=1024, channels=1, callback=None): self.input_device_index = input_device_index self.output_device_index = output_device_index self.fs = fs self.stream_callback = callback self.p = PyAudio() self.frame_length = frame_length self.channels = channels self.dostop = False self.sleeptime = 0.1 self.frames = 0 def run(self): self.stream_start() if False: self.stream_run() else: t = Thread(target=self.stream_run) t.start() def stop(self): self.do_stop = True def _callback(self, in_data, frame_count, time_info, status): self.frames += 1 in_data = np.frombuffer(in_data, dtype=np.int16) in_data = in_data.astype(np.float32) / 32767 out_data = self(in_data) * 32767 out_data = out_data.astype(np.int16) return out_data.tobytes(), paContinue def stream_start(self): self.stream = self.p.open(format=paInt16, channels=self.channels, rate=self.fs, input=True, output=True, input_device_index=self.input_device_index, output_device_index=self.output_device_index, frames_per_buffer=self.frame_length, stream_callback=self._callback) self.stream.start_stream() def stream_run(self): self.do_stop = False while self.stream.is_active() and not self.do_stop: time.sleep(self.sleeptime) self.stream_stop() def stream_stop(self): self.stream.stop_stream() self.stream.close() #self.p.terminate() def devices(self): devices = [] for m in range(self.p.get_device_count()): dev = self.p.get_device_info_by_index(m) devices.append({'name': dev['name'], 'inputs': dev['maxInputChannels'], 'outputs': dev['maxOutputChannels']}) return devices
class MainWindow(QtGui.QMainWindow): """ A Qt QMainWindow that is home to a matplotlib figure and two combo boxes. The combo boxes allow the selection of a sound card by API and name. The figure will show the waveform of the audio input of that sound card. """ def __init__(self, parent=None): super(MainWindow, self).__init__(parent) # Monkey patch missing methods into PyAudio. PyAudio.device_index_to_host_api_device_index = ( device_index_to_host_api_device_index) self.pyaudio = PyAudio() # Create the UI widgets. central_widget = QtGui.QWidget(self) self.setCentralWidget(central_widget) main_layout = QtGui.QVBoxLayout(central_widget) self.figure = FigureWidget() main_layout.addWidget(self.figure) horizontal_layout = QtGui.QHBoxLayout() main_layout.addLayout(horizontal_layout) api_list = QtGui.QComboBox() api_list.setModel(APIListModel(self.pyaudio)) horizontal_layout.addWidget(api_list) device_list = QtGui.QComboBox() device_list_model = DeviceListModel(self.pyaudio) device_list.setModel(device_list_model) horizontal_layout.addWidget(device_list) # Connect the moving parts api_list.currentIndexChanged.connect(device_list_model.set_api_index) api_list.currentIndexChanged.connect(self.change_api_index) device_list.currentIndexChanged.connect(self.change_device_index) # Tell all widgets to use the default audio device. default_api_index = ( self.pyaudio.get_default_input_device_info()["hostApi"]) default_device_index = ( self.pyaudio.device_index_to_host_api_device_index( self.pyaudio.get_default_host_api_info()["defaultInputDevice"], default_api_index)) self.api_index = default_api_index self.device_index = default_device_index self.stream = None api_list.setCurrentIndex(default_api_index) device_list_model.set_api_index(default_api_index) device_list.setCurrentIndex(default_device_index) def closeEvent(self, event): """ Called by Qt when the program quits. Stops audio processing. """ self.stream.close() # wait for audio processing to clear its buffers time.sleep(0.1) def change_api_index(self, api_index): """ Restarts audio processing with new index. """ self.api_index = api_index self.restart_audio() def change_device_index(self, device_index): """ Restarts audio processing with new index. """ self.device_index = device_index self.restart_audio() def restart_audio(self): """ Restarts audio processing with current API and device indices. """ device_info = (self.pyaudio.get_device_info_by_host_api_device_index( self.api_index, self.device_index)) self.num_channels = device_info['maxInputChannels'] if self.stream: self.stream.close() self.stream = self.pyaudio.open( rate=int(device_info['defaultSampleRate']), channels=self.num_channels, input_device_index=device_info['index'], format=paFloat32, input=True, stream_callback=self.audio_callback) self.figure.create_plots(self.num_channels) def audio_callback(self, in_data, frame_count, time_info, status_flags): """ Called by pyaudio whenever audio data is available. Updates the matplotlib figure. """ data = numpy.fromstring(in_data, dtype=numpy.float32) data = numpy.reshape( data, (len(data) / self.num_channels, self.num_channels)) self.figure.draw(data) return (None, paContinue)
energy = sum(filt_data**2) / CHUNK if (energy > bass_thresh): top_left.on() on_cnt = 1 elif (on_cnt > 0): on_cnt += 1 if (on_cnt >= HOLD_FRAMES): on_cnt = 0 else: top_left.off() bass_avg = bass_avg * ((avg_n - 1) / avg_n) + (1 / avg_n) * energy bass_thresh = bass_avg * (2 - log2(1 + bass_avg * 40)) return bytes([]), paContinue pa = PyAudio() stream = pa.open(format=paFloat32, channels=1, rate=fs, input=True, input_device_index=2, frames_per_buffer=CHUNK, stream_callback=callback) while stream.is_active(): sleep(0.1) stream.close() pa.terminate()