def tts(self, text, session_begin_params): ret = c_int() sessionID = self.sdk.QTTSSessionBegin(session_begin_params, byref(ret)) self._logger.debug('QTTSSessionBegin => sessionID: %s ret: %s'% (sessionID, ret.value)) #input text ret = self.sdk.QTTSTextPut(sessionID, text, len(text), None) if const.MSP_SUCCESS != ret: self._logger.error("QTTSTextPut failed Error code %d.\n"%ret) else: self._logger.debug("QTTSTextPut SUCCESS=> %s"% ret) #systhesize audio audio_len = c_uint() synth_status = c_int() errorCode = c_int() lame = LameEncoder(const.RATE, const.CHANNEL, pyaudio.get_sample_size(pyaudio.paInt16)) with tempfile.NamedTemporaryFile(suffix='.%s'%self.audioFormat,mode='w+b', delete=False) as f: audioFile = None if self.audioFormat == "mp3": audioFile = open(f.name, "wb+") else: audioFile = wave.open(f, "wb") # 配置声道数、量化位数、取样频率 audioFile.setnchannels(const.CHANNEL) audioFile.setsampwidth(pyaudio.get_sample_size(pyaudio.paInt16)) audioFile.setframerate(const.RATE) self._logger.debug('QTTSAudioGet => ') while True: self.sdk.QTTSAudioGet.restype = POINTER(c_ushort * (1024 * 1024)) audio_data = self.sdk.QTTSAudioGet(sessionID, byref(audio_len), byref(synth_status), byref(errorCode)) self._logger.debug('QTTSAudioGet => audio_len: %s synth_status: %s errorCode: %s'% (audio_len, synth_status,errorCode)) if audio_data: data = string_at(audio_data, audio_len.value) if self.audioFormat == "mp3": output = lame.encode(data) audioFile.write(output) else: #将wav data 转换为二进制数据写入wav文件 audioFile.writeframes(data) if synth_status.value == const.MSP_TTS_FLAG_DATA_END or errorCode.value != const.MSP_SUCCESS: break time.sleep(0.1) if self.audioFormat == "mp3": output = lame.flush() audioFile.write(output) audioFile.close() ret = self.sdk.QTTSSessionEnd(sessionID, "Normal") self._logger.debug('QTTSSessionEnd => ret: %s'% ret) f.seek(0) return f.name
def __init__(self, recognizer, agent): super(Listener, self).__init__() self.recognizer = recognizer self.agent = agent self.queue = Queue.Queue() self.running = True self.sample_rate = 16000 self.sample_width = pyaudio.get_sample_size(pyaudio.paInt16) * 1 self.channels = 1 config = Decoder.default_config() config.set_string('-hmm', path.join(PS_MODEL_DIR, 'en-us/en-us')) config.set_string('-lm', path.join(PS_MODEL_DIR, 'en-us/en-us.lm.dmp')) config.set_string('-dict', path.join(DATA_DIR, 'pocketsphinx/model/en-us/victoria-en-us.dict')) config.set_string('-logfn', 'NUL') config.set_string('-keyphrase', KEY_PHRASE) config.set_float('-samprate', self.sample_rate) config.set_float('-kws_threshold', 1e-40) self.decoder = Decoder(config) self.decoder.start_utt() self.frames = collections.deque() self.listening = -1 self.silence_start = -1 self.last_logged_hyp = None
def __init__(self, on_audio: FunctionType, pa_instance: pyaudio.PyAudio = None, **kwargs): """ Create a new Listener object. """ # Set defaults for arguments kwargs.setdefault('format', pyaudio.paInt16) kwargs.setdefault('channels', 1) kwargs.setdefault('rate', 44100) self.chunk = kwargs.pop('chunk', 1024) self.threshold = kwargs.pop('threshold', 10.0) self.timeout = kwargs.pop('timeout', 1.0) self.sample_width = pyaudio.get_sample_size(kwargs['format']) # Register the function called when audio is captured self.on_audio = on_audio # Save the named arguments self.stream_args = kwargs # If we were constructed with a connection, use that, # Otherwise create one. if pa_instance is not None: self.connection = pa_instance else: self.connection = pyaudio.PyAudio() self.stream = self.connection.open(input=True, **kwargs)
def __init__(self): self.format = pyaudio.paInt16 # 16-bit int sampling self.SAMPLE_WIDTH = pyaudio.get_sample_size(self.format) # size of each sample self.SAMPLE_RATE = 16000 # sampling rate in Hertz self.CHUNK = 1024 # number of frames stored in each buffer self.stream = NaoMicStream()
def sigToWav(data, stream, out): waveFile = wave.open(out, 'wb') waveFile.setnchannels(stream._channels) waveFile.setsampwidth(pyaudio.get_sample_size(stream._format)) waveFile.setframerate(stream._rate) waveFile.writeframes(b''.join(data)) waveFile.close()
def __init__(self, recognizer, agent): super(Listener, self).__init__() self.recognizer = recognizer self.agent = agent self.queue = Queue.Queue() self.running = True self.sample_rate = 16000 self.sample_width = pyaudio.get_sample_size(pyaudio.paInt16) * 1 self.channels = 1 config = Decoder.default_config() config.set_string('-hmm', path.join(PS_MODEL_DIR, 'en-us/en-us')) config.set_string('-lm', path.join(PS_MODEL_DIR, 'en-us/en-us.lm.dmp')) config.set_string( '-dict', path.join(DATA_DIR, 'pocketsphinx/model/en-us/victoria-en-us.dict')) config.set_string('-logfn', 'NUL') config.set_string('-keyphrase', KEY_PHRASE) config.set_float('-samprate', self.sample_rate) config.set_float('-kws_threshold', 1e-40) self.decoder = Decoder(config) self.decoder.start_utt() self.frames = collections.deque() self.listening = -1 self.silence_start = -1 self.last_logged_hyp = None
def __init__(self, filename, channels, format, rate): super(WaveFile, self).__init__() self._frames = [] self._wf = wave.open(filename, 'wb') self._wf.setnchannels(channels) self._wf.setsampwidth(pyaudio.get_sample_size(format)) self._wf.setframerate(rate)
def on_command(self, data): """ Perform actions after voice input was recorded. :param np.array data: The voice input data """ self.recording_state = False # Simulate action time.sleep(5) if self.raspi_mode: self.light.processing() # TODO: For now, save the file filename = 'testapp' + datetime.now().strftime( '%Y-%m-%d_%H-%M-%S') + '.wav' wf = wave.open(filename, 'wb') wf.setnchannels(self.channels) wf.setsampwidth( pyaudio.get_sample_size(pyaudio.get_format_from_width(self.width))) wf.setframerate(self.sample_rate) wf.writeframes(b''.join(data)) wf.close() self.recording = [] self.data = np.zeros(self.feed_samples, dtype=self.format) self.queue.empty() self.recording_state = False if self.raspi_mode: self.light.off()
def __init__(self, wrapped_stream, format, muted=False): assert wrapped_stream is not None self.wrapped_stream = wrapped_stream self.muted = muted self.SAMPLE_WIDTH = pyaudio.get_sample_size(format) self.muted_buffer = b''.join([b'\x00' * self.SAMPLE_WIDTH])
def __init__(self): self.CHUNK = 1024 self.RATE = 16000 self.FORMAT = pyaudio.paInt16 self.DTYPE='Int16' self.CHANNELS = 1 self.RUN_SECONDS = 1000 self.sampwidth = pyaudio.get_sample_size(self.FORMAT) self.mic = False self.wf = None self.deque_time = deque(maxlen=20) self.deque_mean = deque(maxlen=3) self.deque_freq = deque(maxlen=3) self.posneg_frequencies = np.fft.fftfreq(self.CHUNK, 1.0/self.RATE) self.freqs_indices = np.where(self.posneg_frequencies >= 0) self.freqs = self.posneg_frequencies[np.where(self.posneg_frequencies >= 0)] self.t0 = time.time() self.ichunk = 0 # plt.ion() # plt.axis([0,100,0,800]) # fig=plt.figure() if RPI: self.led = Lights() self.led.start()
def record(self, participant='0', session='0', trial=0): # TODO: eliminate this session requirement # record mark RECORDON = True self.RECORDON = RECORDON # open audio stream self.STREAM = self.pad.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) self.FRAMES = [] while self.RECORDON: # when being called, the 'finish' method will feed a False value to RECORDON to end loop self.SOUND = self.STREAM.read(CHUNK) self.FRAMES.append(self.SOUND) # close stream and terminate PyAudio object self.STREAM.stop_stream() self.STREAM.close() self.pad.terminate() # define file name WAVE_OUTPUT_FILENAME = str(participant) + '_' + str( session) + '_' + str(trial) + '.wav' # write sound to disk waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb') waveFile.setnchannels(CHANNELS) waveFile.setsampwidth(pyaudio.get_sample_size(FORMAT)) waveFile.setframerate(RATE) waveFile.writeframes(b''.join(self.FRAMES)) waveFile.close() return None
def __init__(self, byte_data, sample_rate, bit_width, channels, dtype = None): """ byte_data: A byte string containing the raw data. BIT_WIDTH: bit width in bytes. """ assert isinstance(bit_width, (int, long)) and bit_width > 0, \ "`bit_width` must be positive integer." bit_width = pyaudio.get_sample_size(pyaudio.get_format_from_width(bit_width)) assert isinstance(channels, int) and channels in [1, 2], \ "`channels` can be either 1(mono) or 2(stereo)." assert channels in (1, 2), \ "`channels` can be either 1(mono) or 2(stereo) only." assert sample_rate > 0, "`sample_rate` must be positive." self.__bit_width = bit_width self.__channels = channels self.__sample_rate = sample_rate self.__byte_data = byte_data # a byte string if dtype is None: dtype = self._get_dtype_by_bit_width() if not self._validate_dtype(dtype): raise ValueError("`dtype` is not compatible with the `bit_width`.") self.__dtype = dtype self.format = pyaudio.get_format_from_width(self.BIT_WIDTH)
async def listen(websocket, _): print('Connected..') if settings.DEBUG: print('Debug activated') frame_data = [] stt = speechtotext.SpeechToText() while True: chunk = await websocket.recv() if chunk == 'transcribe' and frame_data: audio_data = stt.process_audio(frame_data) if settings.AUDIO_STORAGE and frame_data: filename = f'{settings.AUDIO_FOLDER_PATH}/{uuid.uuid4()}.wav' with wave.open(filename, 'wb') as f: f.setnchannels(settings.CHANNELS) f.setsampwidth(pyaudio.get_sample_size(settings.FORMAT)) f.setframerate(settings.RATE) f.writeframes(audio_data.frame_data) try: transcription = stt.recognize(audio_data) r = response(transcription) except speechtotext.RecognitionException as e: print('Error recognizing: {}'.format(str(e))) r = response(None, error=str(e)) frame_data = [] await websocket.send(r) else: frame_data.append(chunk)
class Recorder: FORMAT = pyaudio.paInt16 CHUNK = 1024 SWIDTH = pyaudio.get_sample_size(pyaudio.paInt16) def __init__(self, input_name='Microphone', channel_id=1): self.__create_stream(input_name, channel_id) def __create_stream(self, input_name, channel_id): self.__stream = p.open( format=Recorder.FORMAT, channels=channel_id, rate=settings.sampling_rate, input=True, input_device_index=get_index_by_name(input_name), frames_per_buffer=Recorder.CHUNK) def record(self): nbits = self.__stream.get_read_available() try: raw_data = self.__stream.read( settings.recording_chunk_size, exception_on_overflow=False) # TODO catch proper exception data = np.array( wave.struct.unpack("%dh" % (len(raw_data) / self.SWIDTH), raw_data)) except OSError: print(('skipping audio', nbits)) raw_data = self.__stream.read(settings.recording_chunk_size, exception_on_overflow=False) data = np.array( wave.struct.unpack("%dh" % (len(raw_data) / self.SWIDTH), raw_data)) return data
def __init__(self, device_index=None, sample_rate=16000, chunk_size=1024): assert device_index is None or isinstance( device_index, int), "Device index must be None or an integer" if device_index is not None: # ensure device index is in range audio = pyaudio.PyAudio() count = audio.get_device_count() audio.terminate() # obtain device count assert 0 <= device_index < count, "Device index out of range" assert isinstance( sample_rate, int ) and sample_rate > 0, "Sample rate must be a positive integer" assert isinstance( chunk_size, int ) and chunk_size > 0, "Chunk size must be a positive integer" self.device_index = device_index self.format = pyaudio.paInt16 # 16-bit int sampling self.SAMPLE_WIDTH = pyaudio.get_sample_size( self.format) # size of each sample self.SAMPLE_RATE = sample_rate # sampling rate in Hertz self.CHANNELS = 1 # mono audio self.CHUNK = chunk_size # number of frames stored in each buffer self.audio = None self.stream = None
def activeListenToAllOptions(self, THRESHOLD=None, LISTEN=True, MUSIC=False): """ Records until a second of silence or times out after 12 seconds Returns a list of the matching options or None """ RATE = 16000 CHUNK = 1024 LISTEN_TIME = 12 # check if no threshold provided if THRESHOLD is None: THRESHOLD = self.fetchThreshold() self.speaker.play(jasperpath.data('audio', 'beep_hi.wav')) # prepare recording stream stream = self._audio.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) frames = [] # increasing the range # results in longer pause after command # generation lastN = [THRESHOLD * 1.2 for i in range(30)] for i in range(0, int(RATE / CHUNK * LISTEN_TIME)): data = stream.read(CHUNK) frames.append(data) score = self.getScore(data) lastN.pop(0) lastN.append(score) average = sum(lastN) / float(len(lastN)) # TODO: 0.8 should not be a MAGIC NUMBER! if average < THRESHOLD * 0.8: break self.speaker.play(jasperpath.data('audio', 'beep_lo.wav')) # save the audio data stream.stop_stream() stream.close() with tempfile.SpooledTemporaryFile(mode='w+b') as f: wav_fp = wave.open(f, 'wb') wav_fp.setnchannels(1) wav_fp.setsampwidth(pyaudio.get_sample_size(pyaudio.paInt16)) wav_fp.setframerate(RATE) wav_fp.writeframes(b''.join(frames)) wav_fp.close() f.seek(0) return self.active_stt_engine.transcribe(f)
def __init__(self, file_handler, to_text=True, stop_event=None): self.file_handler = file_handler self.sample_width = pyaudio.get_sample_size( settings.STREAM_AUDIO_FORMAT) self.to_text = to_text self.recognizer = sr.Recognizer() self.stop_event = stop_event
def __init__(self, device_index, sample_rate, bit_width, chunk_size = 8092, channels = 1): audio = pyaudio.PyAudio() ## Checking the device_index is valid or not. assert isinstance(device_index, (int, long)), "Device index must be an integer." device_count = audio.get_device_count() assert 0 <= device_index < device_count, "`device_index` out of range: {} out of {}".format(device_index, count) audio.terminate() self.__device_index = device_index if not self.device_info["maxInputChannels"] > 0: raise DeviceTypeError("Can not source from a non-input device.") self.__format = pyaudio.get_format_from_width(bit_width) self.__bit_width = pyaudio.get_sample_size(self.FORMAT) assert isinstance(sample_rate, (int, long)), "`sample_rate` must be integer." max_sample_rate = self.device_info["defaultSampleRate"] assert 0 < sample_rate <= max_sample_rate, "`sample_rate` out of range: {} out of {}".format(sample_rate, max_sample_rate) self.__sample_rate = sample_rate assert isinstance(chunk_size, (int, long)), "`chunk_size` must be integer." self.__chunk_size = chunk_size assert channels in [1, 2], '`channels` can be either 1 or 2. 1 for mono audio, 2 for stereo.' self.__channels = channels # audio resource and streams. self.__audio = None self.__input_stream = None
def __init__(self, path_prefix): assert len(self.COLOR_GRADIENT_WHEEL) == self.FRAME_HEIGHT, \ "Need exactly {} colors in 'COLOR_GRADIENT_WHEEL'".format(self.FRAME_HEIGHT) # Convert hex string (for easy programmer modification) to bytearrays in 'COLOR_GRADIENT_WHEEL' for i, color_str in enumerate(self.COLOR_GRADIENT_WHEEL): self.COLOR_GRADIENT_WHEEL[i] = np.frombuffer( bytes.fromhex(color_str), dtype=np.uint8) self.template = np.zeros( (self.FRAME_HEIGHT, self.FRAME_WIDTH, self.NUM_COLOR_CHANNELS), dtype=np.uint8) self.pyaudio = PyAudio() audio_device_index = settings( path_prefix).get_selected_audio_device_index() self.audio_device_info = self.pyaudio.get_device_info_by_index( audio_device_index) if self.audio_device_info[ 'maxOutputChannels'] < self.NUM_AUDIO_CHANNELS: raise Exception("Audio output device should be at least stereo.") self.format = pyaudio.paInt16 self.sample_size = pyaudio.get_sample_size(self.format) self.stream = None self.raw_audio_frames = b'\x00' * (self.NUM_AUDIO_CHANNELS * self.NUM_AUDIO_FRAMES_PER_BUFFER * self.sample_size)
def activeListenToAllOptions(self, THRESHOLD=None, LISTEN=True, MUSIC=False): """ Records until a second of silence or times out after 12 seconds Returns a list of the matching options or None """ RATE = 16000 CHUNK = 1024 LISTEN_TIME = 12 # check if no threshold provided if THRESHOLD == None: THRESHOLD = self.fetchThreshold() self.speaker.play(jasperpath.data('audio', 'beep_hi.wav')) # prepare recording stream stream = self._audio.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) frames = [] # increasing the range # results in longer pause after command generation lastN = [THRESHOLD * 1.2 for i in range(30)] for i in range(0, RATE / CHUNK * LISTEN_TIME): data = stream.read(CHUNK) frames.append(data) score = self.getScore(data) lastN.pop(0) lastN.append(score) average = sum(lastN) / float(len(lastN)) # TODO: 0.8 should not be a MAGIC NUMBER! if average < THRESHOLD * 0.8: break self.speaker.play(jasperpath.data('audio', 'beep_lo.wav')) # save the audio data stream.stop_stream() stream.close() with tempfile.SpooledTemporaryFile(mode='w+b') as f: wav_fp = wave.open(f, 'wb') wav_fp.setnchannels(1) wav_fp.setsampwidth(pyaudio.get_sample_size(pyaudio.paInt16)) wav_fp.setframerate(RATE) wav_fp.writeframes(''.join(frames)) wav_fp.close() f.seek(0) mode = TranscriptionMode.MUSIC if MUSIC else TranscriptionMode.NORMAL transcribed = self.active_stt_engine.transcribe(f, mode=mode) return transcribed
def __init__(self, rt): super().__init__(rt) self.chunk_size = self.config['chunk_size'] self.format = pyaudio.paInt16 self.sample_width = pyaudio.get_sample_size(self.format) self.sample_rate = self.config['sample_rate'] self.channels = self.config['channels'] self.p = pyaudio.PyAudio() self.stream = self.p.open(format=self.format, channels=self.channels, rate=self.sample_rate, input=True, frames_per_buffer=self.chunk_size) self.talking_volume_ratio = self.config['talking_volume_ratio'] self.required_integral = self.config['required_noise_integral'] self.max_di_dt = self.config['max_di_dt'] self.noise_max_out_sec = self.config['noise_max_out_sec'] self.recording_timeout = self.config['recording_timeout'] self.energy_weight = 1.0 - pow( 1.0 - self.config['ambient_adjust_speed'], self.chunk_size / self.sample_rate) # For convenience self.chunk_sec = self.chunk_size / self.sample_rate self.av_energy = None self.integral = 0 self.noise_level = 0 self._intercept = None self._has_activated = False self.engine = WakeWordService( rt, self.on_activation) # type: WakeWordEnginePlugin self.engine.startup()
def predict_file(dec, pyaudio, path, frames, args, rate=16000, format=pyaudio.paInt16, save=False): wf = wave.open(path, 'wb') wf.setnchannels(1) wf.setsampwidth(pyaudio.get_sample_size(format)) wf.setframerate(rate) #this code works for only for pulseaudio #wf.writeframes(b''.join(frames)) wf.writeframes(frames) wf.close() results = dec.predict_file(path, feat_mode=args.feat_mode, feat_dim=args.feat_dim, three_d=args.three_d) if save == False: os.remove(path) if args.predict_mode == 0: task_outputs = dec.returnDiff(results) elif args.predict_mode == 1: task_outputs = dec.returnLabel(results) else: task_outputs = dec.returnClassDist(results) return task_outputs
def save(self): wf = wave.open(self._name, 'wb') wf.setnchannels(self._channels) wf.setsampwidth(pyaudio.get_sample_size(self._format)) wf.setframerate(self._rate) wf.writeframes(b''.join(self._frames)) wf.close()
def sample_size(self) -> int: """ Get size of a single audio sample. :return: Size in bytes """ return pyaudio.get_sample_size(self._sample_fmt)
def __init__(self, device_index = None): self.format = pyaudio.paInt16 self.SAMPLE_WIDTH = pyaudio.get_sample_size(self.format) self.CHUNK = 1024 self.audio = None self.stream = None
def __init__(self, args, CHUNK = 1024, FORMAT = pyaudio.paInt16, CHANNELS = 1, RATE = 16000, THRESHOLD = 2500, SILENCE_LIMIT = 2, PREV_AUDIO = 1): """ Initialization method for class AudioUtils. Defines the constants needed throughout the program. Keyword Arguments: CHUNK {number} -- CHUNKS of bytes to read each time from mic (default: {1024}) FORMAT {[type]} -- [description] (default: {pyaudio.paInt16}) CHANNELS {number} -- [description] (default: {1}) RATE {number} -- [description] (default: {16000}) THRESHOLD {number} -- The threshold intensity that defines silence and noise signal (an int. lower than THRESHOLD is silence) (default: {2500}) SILENCE_LIMIT {number} -- Silence limit in seconds. The max ammount of seconds where only silence is recorded. When this time passes the recording finishes and the file is delivered. (default: {2}) PREV_AUDIO {number} -- Previous audio (in seconds) to prepend. When noise is detected, how much of previously recorded audio is prepended. This helps to prevent chopping the beggining of the phrase. (default: {1}) """ vc_logging.init_logger(level = args.log_level, verbose = args.verbose) self.log = logging.getLogger("vc_logger") self.WIDTH = pyaudio.get_sample_size(FORMAT) self.CHUNK = CHUNK self.FORMAT = FORMAT self.CHANNELS = CHANNELS self.RATE = RATE self.THRESHOLD = THRESHOLD self.SILENCE_LIMIT = SILENCE_LIMIT self.PREV_AUDIO = PREV_AUDIO self.audioQueue = Queue()
def readMic(utteranceToneQ, utteranceSpeechQ, audioInputDevice): # setup DEVICE_IP_HW = audioInputDevice # this usually is hw:2,0 # DEVICE_IP_HW = audioInput FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 16000 CHUNK = 4096 BASELINE_SECONDS = 3 CHECK_SILENCE_SECONDS = 1 UTTERANCE_SECONDS = 5 OUTPUT_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "test") try: with noalsaerr(): p = pyaudio.PyAudio() # start the PyAudio class # open stream with this device stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input_device_index=get_ip_device_index( p, DEVICE_IP_HW), input=True, frames_per_buffer=CHUNK) # THRESHOLD = getThreshold(stream, RATE, CHUNK, BASELINE_SECONDS) +3000 # just to be safe THRESHOLD = 20000 # set for testing print("________________________________________") print("RECORDER -> Threshold : " + str(THRESHOLD)) print("________________________________________") utteranceCount = 0 while (True): utteranceData = getUtterance(stream, RATE, CHUNK, THRESHOLD, CHECK_SILENCE_SECONDS, UTTERANCE_SECONDS) # print("-----------------------------------------------") # set up the wav container to store the recorded 5 second utterances wavFile = wave.open( os.path.join(OUTPUT_DIR, "mic_" + str(utteranceCount) + ".wav"), "w") wavFile.setnchannels(CHANNELS) wavFile.setsampwidth(pyaudio.get_sample_size(pyaudio.paInt16)) wavFile.setframerate(RATE) wavFile.writeframes(utteranceData) wavFile.close() # print("saved " + os.path.join(OUTPUT_DIR, "mic_" + str(utteranceCOunt) + ".wav")) utteranceToneQ.put(utteranceData) utteranceSpeechQ.put(utteranceData) # print("AUDIO RECORDER -> Utterance " + str(utteranceCount) + " recorded") # print("-----------------------------------------------") utteranceCount += 1 except: pass
def pyaudio_scb(self, rate, fmt=pyaudio.paInt16): samp_size = pyaudio.get_sample_size(fmt) maxint = (1 << (8*samp_size)) - 1 dtype = ['!', 'h', 'i', '!', 'l', '!', '!', '!', 'q'][samp_size] def __callback(data, frames, time, status, self=self, rate=rate, maxint=maxint, dtype=dtype): return struct.pack(dtype*frames, *[maxint*int(i) for i in self.data(frames, self.freq, rate)]) return __callback
def getAudio(): #set properties of audio CHUNK = 2**11 RATE = 44100 CHANNELS = 2 #open stream p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) ##Listen to Microphone listening = False while (True): frames = [] i = 0 peak = 0 while (True): data = np.fromstring(stream.read(CHUNK), dtype=np.int16) peak = np.average(np.abs(data)) * 2 #check if silent if (peak < 600): ##do some tuning here boi ##Cut words and save words from microphone if (listening): ##Save the audio to disk filename = 'downloads/test' + str(random.randint( 0, 10000)) + '.wav' wavefile = wave.open(filename, 'wb') wavefile.setnchannels(1) wavefile.setsampwidth( pyaudio.get_sample_size(pyaudio.paInt16)) wavefile.setframerate(RATE) wavefile.writeframes(b''.join(frames)) wavefile.close frames = [] listening = False #print("SAVED") ##Start a new thread t = Thread(target=recognizeAudio, args=(filename, )) t.start() #print("Not listening") else: listening = True #print("Listening") #data_audio=stream.read(CHUNK) frames.append(data) ##display words stream.stop_stream() stream.close() p.terminate()
def save_audio(self, fp, frames): sample_width = pyaudio.get_sample_size(self.format) f = open_audio(fp, 'wb') f.setsampwidth(sample_width) f.setframerate(self.rate) f.setnchannels(1) f.writeframes(''.join(frames)) f.close()
def write_wav(frames, file_name): waveFile1 = wave.open(file_name, 'wb') waveFile1.setnchannels(1) waveFile1.setsampwidth(pyaudio.get_sample_size(pyaudio.paInt16)) waveFile1.setframerate(44100) waveFile1.writeframes(b''.join(frames)) waveFile1.close()
def save_file(self): c=Configure() wf = wave.open(self.output_file_name, 'wb') wf.setnchannels(c.CHANNELS) wf.setsampwidth(pyaudio.get_sample_size(c.FORMAT)) wf.setframerate(c.RATE) wf.writeframes(b''.join(self.wave_data_after)) wf.close()
def __callback(self, in_data, # recorded data if input=True; else None frame_count, # number of frames time_info, # dictionary status_flags): # PaCallbackFlags n = self.buffer.write_chunk(in_data, len(in_data)) self.dropped_samples += frame_count - (n / (self.channels * pyaudio.get_sample_size(self.format))) self.rec_bytes += n return None, pyaudio.paContinue
def __init__(self): self.pa = pa = pyaudio.PyAudio() self.stream = pa.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, frames_per_buffer=self.CHUNK) self.packer = Pcm2Wave(self.RATE, pyaudio.get_sample_size(self.FORMAT), self.CHANNELS)
def __init__(self): self._sound = pyaudio.PyAudio() self.format = pyaudio.paInt16 self.SAMPLE_WIDTH = pyaudio.get_sample_size(self.format) self.stream = self._sound.open(rate=self.SAMPLE_RATE, channels=1, format=self.format, input=True, frames_per_buffer=self.CHUNK)
def write_chunks(path, frames, rate=44100, channels=2, format=pyaudio.paInt16): """ write audio to hard disk """ wf = wave.open(path, "wb") wf.setnchannels(channels) wf.setsampwidth(pyaudio.get_sample_size(format)) wf.setframerate(rate) wf.writeframes(b"".join(frames)) wf.close()
def ActiveListening(this): threshold = None textout.SystemPrint("Started to listen actively") #RECORD A WAV FILE, CUTOFF AT 12s OR FALLS BELOW THRESHOLD #SEND TO WITAI #RECIEVE INPUT #SEND TO PROCESSOR AND RETURN INTENT #FIND ACTION. RATE = 16000 CHUNK = 1024 p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) frames = [] lastN = [ 144 * 1.2 for i in range(15) ] #changing array length will determine if average will change faster or not for i in range(0, int(RATE / CHUNK * 5)): #RATE(16000) / CHUNK(1024) * TIME (12s) data = stream.read(CHUNK) frames.append(data) score = getScore(data) lastN.pop(0) lastN.append(score) average = sum(lastN) / float(len(lastN)) print(str(average)) if average < 144 - 20: textout.SystemPrint("Listening stopped, below threshold.") break textout.SystemPrint("Listening Timeout!") stream.stop_stream() stream.close() p.terminate() with tempfile.NamedTemporaryFile(mode='w+b') as f: wav_fp = wave.open(f, 'wb') wav_fp.setnchannels(1) wav_fp.setsampwidth(pyaudio.get_sample_size(pyaudio.paInt16)) wav_fp.setframerate(16000) wav_fp.writeframes(b''.join(frames)) wav_fp.close() f.seek(0) transcriber.TranscribeAudiofile(this, f) textout.SystemPrint("Stopped listening actively")
def __init__(self, device_index = None): self.device_index = device_index self.format = pyaudio.paInt16 # 16-bit int sampling self.SAMPLE_WIDTH = pyaudio.get_sample_size(self.format) self.RATE = 16000 # sampling rate in Hertz self.CHANNELS = 1 # mono audio self.CHUNK = 1024 # number of frames stored in each buffer self.audio = None self.stream = None
def write_wave(arrs, filepath, sample_width=pyaudio.get_sample_size(PYAUDIO_FORMAT), channels=CHANNELS, sample_rate=SAMPLE_RATE): f = wave.open(filepath, 'w') f.setnchannels(channels) f.setsampwidth(sample_width) f.setframerate(sample_rate) for arr in _structpack(arrs): f.writeframes(arr) f.close()
def __init__(self, device_index=None): self.device_index = device_index self.format = pyaudio.paInt16 # 16-bit int sampling self.SAMPLE_WIDTH = pyaudio.get_sample_size(self.format) self.RATE = 16000 # sampling rate in Hertz self.CHANNELS = 1 # mono audio self.CHUNK = 1024 # number of frames stored in each buffer self.audio = None self.stream = None
def __init__(self, device_index = None): self.device_index = device_index self.format = pyaudio.paInt16 self.SAMPLE_WIDTH = pyaudio.get_sample_size(self.format) self.RATE = 44100 self.CHANNELS = 1 self.CHUNK = 2205 self.audio = None self.stream = None
def passiveListen(self, persona): rate = self.PASSIVE_RATE chunk = self.PASSIVE_CHUNK LISTEN_TIME = 10 THRESHOLD = self.THRESHOLD_MULTIPLIER * self.fetchThreshold() stream = self._audio.open(format=pyaudio.paInt16, channels=1, rate=rate, input=True, frames_per_buffer=chunk) frames = [] didDetect = False for i in range(0, rate / chunk * LISTEN_TIME): data = stream.read(chunk) frames.append(data) score = self.getScore(data) if score > THRESHOLD: didDetect = True break # no use continuing if no flag raised if not didDetect: print "No disturbance detected" stream.stop_stream() stream.close() return (None, None) # cutoff any recording before this disturbance was detected frames = frames[-20:] # otherwise, let's keep recording for few seconds and save the file DELAY_MULTIPLIER = 1.5 for i in range(0, rate / chunk * DELAY_MULTIPLIER): data = stream.read(chunk) frames.append(data) with tempfile.NamedTemporaryFile(mode='w+b') as f: wav_fp = wave.open(f, 'wb') wav_fp.setnchannels(1) wav_fp.setsampwidth(pyaudio.get_sample_size(pyaudio.paInt16)) wav_fp.setframerate(rate) wav_fp.writeframes(''.join(frames)) wav_fp.close() f.seek(0) # check if PERSONA was said transcribed = self.passive_transcribe(f) if any(persona in phrase for phrase in transcribed): return (THRESHOLD, persona) return (False, transcribed)
def activeListenToAllOptions(self, THRESHOLD=None, LISTEN=True, MUSIC=False): """ Records until a second of silence or times out after 12 seconds Returns a list of the matching options or None """ # check if no threshold provided if THRESHOLD is None: THRESHOLD = self.fetchThreshold() playing = 'playing' in check_output(['mpc', '-h', '[email protected]']).split('\n')[1] if playing: call(['mpc', '-h', '[email protected]', 'pause']) # self.speaker.play(jasperpath.data('audio', 'beep_hi.wav')) self.say(random.choice(BEFORE)) frames = [] # increasing the range # results in longer pause after command # generation lastN = [THRESHOLD * 1.2 for i in range(30)] for i in range(0, RATE / CHUNK * LISTEN_TIME): data = self.queue.get() frames.append(data) score = self.getScore(data) lastN.pop(0) lastN.append(score) average = sum(lastN) / float(len(lastN)) # TODO: 0.8 should not be a MAGIC NUMBER! if average < THRESHOLD * 0.8: break # self.speaker.play(jasperpath.data('audio', 'beep_lo.wav')) self.say(random.choice(AFTER)) if playing: call(['mpc', '-h', '[email protected]', 'play']) # save the audio data with tempfile.SpooledTemporaryFile(mode='w+b') as f: wav_fp = wave.open(f, 'wb') wav_fp.setnchannels(1) wav_fp.setsampwidth(pyaudio.get_sample_size(pyaudio.paInt16)) wav_fp.setframerate(RATE) wav_fp.writeframes(''.join(frames)) wav_fp.close() f.seek(0) return self.active_stt_engine.transcribe(f)
def __init__(self, device_index = None): assert device_index is None or isinstance(device_index, int), "Device index must be None or an integer" self.device_index = device_index self.format = pyaudio.paInt16 # 16-bit int sampling self.SAMPLE_WIDTH = pyaudio.get_sample_size(self.format) self.RATE = 16000 # sampling rate in Hertz self.CHANNELS = 1 # mono audio self.CHUNK = 1024 # number of frames stored in each buffer self.audio = None self.stream = None
def __init__(self, hmm_type=1, vad_threshold=3.5, pl_window=10, wip=1e-4, silprob=0.3, bestpath=True, remove_dc=True, do_keyphrase=False, keyphrase="NAVSA", kws_threshold=1e-4): self.CHUNK = 1024 self.RATE = 16000 self.FORMAT = pyaudio.paInt16 self.CHANNELS = 1 self.RUN_SECONDS = 1000 self.sampwidth = pyaudio.get_sample_size(self.FORMAT) self.do_trigger = False # Create a decoder with certain model self.config = Decoder.default_config() if hmm_type == 0: self.config.set_string('-hmm', '/usr/local/share/pocketsphinx/model/en-us/en-us') elif hmm_type == 1: self.config.set_string('-hmm', 'model/cmusphinx-en-us-5.2') self.config.set_string('-dict', 'model/7705.dic') if do_keyphrase: self.config.set_string('-keyphrase', keyphrase) self.config.set_float('-kws_threshold', kws_threshold) else: self.config.set_string('-lm', 'model/7705.lm') self.config.set_string('-logfn', '/dev/null') self.config.set_string('-debug', '1') # http://cmusphinx.sourceforge.net/wiki/pocketsphinxhandhelds self.config.set_boolean('-bestpath', bestpath) # default is true self.config.set_float('-vad_threshold', vad_threshold) # default is 2 self.config.set_float("-pl_window", pl_window) # default is 5, range is 0 to 10 self.config.set_float('-wip', wip) # 0.005 Silence word transition probability self.config.set_float('-silprob', silprob) # 0.65 Word insertion penalty self.config.set_string('-remove_dc', 'yes' if remove_dc else 'no') self.decoder = Decoder(self.config) self.deque_time = deque(maxlen=20) self.deque_mean = deque(maxlen=50) self.mic = False self.wf = None self.vad = False self.rec_trigger = False self.rec_frames = [] self.sec_since_kw = 999.9 self.sec_since_vad = 999.9
def read(self, buf, source_channels): source_sample_width = pyaudio.get_sample_size(pyaudio.paInt16) * source_channels audio = buf[3:] try: # sometimes the data received is incomplete so reusing state # data from ratecv() sometimes results in errors (audio, _) = audioop.ratecv(audio, source_sample_width, source_channels, 48000, self.listener.sample_rate, None) audio = audioop.tomono(audio, self.listener.sample_width, 0.5, 0.5) self.listener.read(audio) except audioop.error, e: logger.warn("Error preparing sample", exc_info=True)
def __init__(self): self.CHUNK = 1024 self.FORMAT = pyaudio.paInt16 self.CHANNELS = 2 self.RATE = 44100 self.SAMPLE_WIDTH = pyaudio.get_sample_size(self.FORMAT) self.energy_threshold = 1500 self.pause_threshold = 0.8 self.quiet_duration = 0.5 self.seconds_per_buffer = self.CHUNK / self.RATE self.pause_buffer_count = math.ceil(self.pause_threshold / self.seconds_per_buffer) self.quiet_buffer_count = math.ceil(self.quiet_duration / self.seconds_per_buffer)
def __init__(self, device_index = None): assert device_index is None or isinstance(device_index, int), "Device index must be None or an integer" if device_index is not None: # ensure device index is in range audio = pyaudio.PyAudio(); count = audio.get_device_count(); audio.terminate() # obtain device count assert 0 <= device_index < count, "Device index out of range" self.device_index = device_index self.format = pyaudio.paInt16 # 16-bit int sampling self.SAMPLE_WIDTH = pyaudio.get_sample_size(self.format) self.RATE = 16000 # sampling rate in Hertz self.CHANNELS = 1 # mono audio self.CHUNK = 1024 # number of frames stored in each buffer self.audio = None self.stream = None
def __init__(self, device_index = None, sample_rate = 16000, chunk_size = 1024): assert device_index is None or isinstance(device_index, int), "Device index must be None or an integer" if device_index is not None: # ensure device index is in range audio = pyaudio.PyAudio(); count = audio.get_device_count(); audio.terminate() # obtain device count assert 0 <= device_index < count, "Device index out of range" assert isinstance(sample_rate, int) and sample_rate > 0, "Sample rate must be a positive integer" assert isinstance(chunk_size, int) and chunk_size > 0, "Chunk size must be a positive integer" self.device_index = device_index self.format = pyaudio.paInt16 # 16-bit int sampling self.SAMPLE_WIDTH = pyaudio.get_sample_size(self.format) # size of each sample self.SAMPLE_RATE = sample_rate # sampling rate in Hertz self.CHUNK = chunk_size # number of frames stored in each buffer self.audio = None self.stream = None
def _start_wave_file(self): ''' Open a new wave file for writing in the specified output directory. Files are named using the UTC timestamp and a c_ prefix :return: ''' # figure out the new filename now = datetime.utcnow() file_name = "c_{0}.wav".format(now.strftime("%Y%m%dT%H%M%SZ")) file_path = os.path.join(self.output_directory, file_name) self._current_wave_file = wave.open(file_path, mode='wb') self._current_wave_file.setnchannels(self._num_channels) self._current_wave_file.setsampwidth(pyaudio.get_sample_size(self._sample_format)) self._current_wave_file.setframerate(self._sample_rate) self.log.info("Starting continuous recording: " + file_name)
def __init__(self, length, n_channels=1): """ :param length: length of the buffer in samples :param n_channels: Number of channels present in the audio samples. """ self._n_channels = n_channels self._length = length * self._n_channels # Length in samples self._sample_size = pyaudio.get_sample_size(pyaudio.paFloat32) self._sample_format = self._format[pyaudio.paFloat32] # Intialize state variables self._size = 0 self._write_start = 0 self._read_start = 0 # Instantiate buffer self._buffer = np.zeros(self._length, dtype=np.float32) print self._buffer.shape # Setup blocking interface events self._setup_events()
def store_data(self, indata): print('store_data(): len(frames): ' + str(len(self.frames))) self.frames.append(indata) if len(self.frames) >= self.chunks_per_file: # TODO: Move file I/O to an async thread wave_file = wave.open(self.directory + str(self.file_num), 'wb') wave_file.setnchannels(self.channels) wave_file.setsampwidth(pyaudio.get_sample_size(self.audio_format)) wave_file.setframerate(self.rate) wave_file.writeframes(b''.join(self.frames[:self.chunks_per_file])) wave_file.close() self.file_num += 1 # move frame # TODO: Use a circular buffer or other appropriate data structure, # to avoid doing this move in the callback thread. temp_frame_buffer = self.frames[self.chunks_per_file:] self.frames = temp_frame_buffer
def __init__(self, fmt=pa.paInt16, rate=44100): self.fmt = fmt self.rate = rate self.amp = 1 if fmt == pa.paFloat32 else 128**pa.get_sample_size( fmt) / 2 - 1 if fmt == pa.paFloat32: self.dtype = np.float32 # pylint: disable=E1101 elif fmt == pa.paInt32: self.dtype = np.Int32 # pylint: disable=E1101 elif fmt == pa.paInt24: raise SampleFormatNotSuportedException('paInt24') elif fmt == pa.paInt16: self.dtype = np.int16 elif fmt == pa.paInt8: self.dtype = np.int8 elif fmt == pa.paUInt8: self.dtype = np.uint8 else: raise SampleFormatNotSuportedException('paCustomFormat') self.reset() self.data = b''
def trigger_recording(self, filename=None): ''' :param filename: If specified, this filename will be used to save the triggered recording. If not, a filename will be generated by appending the UTC timestamp to t_. :return: ''' # figure out the new filename, if not specified if filename is None: now = datetime.utcnow() filename = "t_{0}.wav".format(now.strftime("%Y%m%dT%H%M%SZ")) file_path = os.path.join(self.output_directory, filename) triggered_wave_file = wave.open(file_path, mode='wb') triggered_wave_file.setnchannels(self._num_channels) triggered_wave_file.setsampwidth(pyaudio.get_sample_size(self._sample_format)) triggered_wave_file.setframerate(self._sample_rate) triggered_wave_file.writeframes(''.join(self._ring_buffer)) triggered_wave_file.close() self.log.info("Wrote triggered file: " + filename)
def __init__(self): """ Initiates the pocketsphinx instance. Arguments: acive_stt_engine -- performs STT while Jasper is in active listen mode """ self._logger = logging.getLogger(__name__) self._logger.info("Initializing PyAudio. ALSA/Jack error messages " + "that pop up during this process are normal and " + "can usually be safely ignored.") self._audio = pyaudio.PyAudio() self._logger.info("Initialization of PyAudio completed.") self.format = pyaudio.paInt16 self.SAMPLE_WIDTH = pyaudio.get_sample_size(self.format) self.energy_threshold = 300 # minimum audio energy to consider for recording self.dynamic_energy_threshold = True self.dynamic_energy_adjustment_damping = 0.15 self.dynamic_energy_ratio = 1.5 self.pause_threshold = 0.8 # seconds of non-speaking audio before a phrase is considered complete self.phrase_threshold = 0.3 # minimum seconds of speaking audio before we consider the speaking audio a phrase - values below this are ignored (for filtering out clicks and pops) self.non_speaking_duration = 0.5 # seconds of non-speaking audio to keep on both sides of the recording
from __future__ import print_function, division import pyaudio, os, sys, time, inspect FILE_PATH = os.path.dirname(inspect.getfile(inspect.currentframe())) ########################################################################## # SAMPLING PARAMETERS FRAME_RATE = 44100 # The sound card sampling rate in Hz DOWNSAMPLED_RATE = 16000 # Hz (MIT REDD uses 15kHz but 16kHz is a standard # rate and so increases compatibility) RECORD_SECONDS = 1 # Seconds to record per queue item N_CHANNELS = 2 # one for voltage, one for current FRAMES_PER_BUFFER = 1024 SAMPLE_FORMAT = pyaudio.paInt16 SAMPLE_WIDTH = pyaudio.get_sample_size(SAMPLE_FORMAT) N_READS_PER_QUEUE_ITEM = int(round(FRAME_RATE / FRAMES_PER_BUFFER * RECORD_SECONDS)) ########################################################################## # MAINS PARAMETERS MAINS_HZ = 50 SAMPLES_PER_MAINS_CYCLE = FRAME_RATE / MAINS_HZ PHASE_DIFF_TOLERANCE = SAMPLES_PER_MAINS_CYCLE / 4 SAMPLES_PER_DEGREE = SAMPLES_PER_MAINS_CYCLE / 360 ########################################################################## # PORT TO BROADCAST MEASURES ON BROADCAST_PORT = 5556 ##########################################################################
import threading import math import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from pa_tools.audiohelper import AudioHelper from pa_tools.audiobuffer import AudioBuffer from pa_tools.stftmanager import StftManager from pa_tools.audiolocalizer import AudioLocalizer from pa_tools.distributionlocalizer import DistributionLocalizer # Setup constants SAMPLE_TYPE = pyaudio.paFloat32 DATA_TYPE = np.float32 SAMPLE_SIZE = pyaudio.get_sample_size(SAMPLE_TYPE) SAMPLE_RATE = 44100 FRAMES_PER_BUF = 4096 # For 44100 Fs, be careful going over 4096, loud sounds may occur... FFT_LENGTH = FRAMES_PER_BUF WINDOW_LENGTH = FFT_LENGTH HOP_LENGTH = WINDOW_LENGTH / 2 NUM_CHANNELS_IN = 7 NUM_CHANNELS_OUT = 2 N_THETA = 20 N_PHI = N_THETA / 2 PLOT_CARTES = False PLOT_POLAR = True EXTERNAL_PLOT = False PLAY_AUDIO = True TIMEOUT = 1 # Setup mics
def passiveListen(self, PERSONA): """ Listens for PERSONA in everyday sound. Times out after LISTEN_TIME, so needs to be restarted. """ THRESHOLD_MULTIPLIER = 1.8 RATE = 16000 CHUNK = 1024 # number of seconds to allow to establish threshold THRESHOLD_TIME = 1 # number of seconds to listen before forcing restart LISTEN_TIME = 10 # prepare recording stream stream = self._audio.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) # stores the audio data frames = [] # stores the lastN score values lastN = [i for i in range(30)] # calculate the long run average, and thereby the proper threshold for i in range(0, RATE / CHUNK * THRESHOLD_TIME): data = stream.read(CHUNK) frames.append(data) # save this data point as a score lastN.pop(0) lastN.append(self.getScore(data)) average = sum(lastN) / len(lastN) # this will be the benchmark to cause a disturbance over! THRESHOLD = average * THRESHOLD_MULTIPLIER # save some memory for sound data frames = [] # flag raised when sound disturbance detected didDetect = False # start passively listening for disturbance above threshold for i in range(0, RATE / CHUNK * LISTEN_TIME): data = stream.read(CHUNK) frames.append(data) score = self.getScore(data) if score > THRESHOLD: didDetect = True break # no use continuing if no flag raised if not didDetect: print "No disturbance detected" stream.stop_stream() stream.close() return (None, None) # cutoff any recording before this disturbance was detected frames = frames[-20:] # otherwise, let's keep recording for few seconds and save the file DELAY_MULTIPLIER = 1 for i in range(0, RATE / CHUNK * DELAY_MULTIPLIER): data = stream.read(CHUNK) frames.append(data) # save the audio data stream.stop_stream() stream.close() with tempfile.NamedTemporaryFile(mode='w+b') as f: wav_fp = wave.open(f, 'wb') wav_fp.setnchannels(1) wav_fp.setsampwidth(pyaudio.get_sample_size(pyaudio.paInt16)) wav_fp.setframerate(RATE) wav_fp.writeframes(''.join(frames)) wav_fp.close() f.seek(0) # check if PERSONA was said json = self.passive_stt_engine.transcribe(f) if json['_text'] and json['outcomes'][0]: transcribed = [] transcribed.append(json['outcomes'][0]['intent'].upper()) if any(PERSONA in phrase for phrase in transcribed): return (THRESHOLD, PERSONA) else: return (False, transcribed) return (False, '')