def speech_recog(self, model): # Create a decoder with certain model config = Decoder.default_config() config.set_string('-hmm', '/usr/local/share/pocketsphinx/model/en-us/en-us') config.set_int('-ds', 2) config.set_int('-topn', 3) config.set_int('-maxwpf', 5) #config.set_string('-kws', MODELDIR + model + '.txt') config.set_string('-lm', MODELDIR + model + '.lm') config.set_string('-dict', MODELDIR + model + '.dict') decoder = Decoder(config) decoder.start_utt() recog_text = '' with self.stream_in as stream: audio_generator = stream.generator() for content in audio_generator: decoder.process_raw(content, False, False) if decoder.hyp() and decoder.hyp().hypstr != '': recog_text += decoder.hyp().hypstr if len(recog_text) > 1: decoder.end_utt() logging.info("recog text: %s", recog_text) return recog_text return recog_text
def main(): abspath = os.path.dirname(os.path.abspath(__file__)) abspath = os.path.join(abspath, '..') model_dir = os.path.join(abspath, 'model') hmm = os.path.join(model_dir, HMM) lm = os.path.join(model_dir, LM) dic = os.path.join(model_dir, DIC) config = Decoder.default_config() config.set_string('-hmm', hmm) config.set_string('-lm', lm) config.set_string('-dict', dic) config.set_string('-logfn', '/dev/null') decoder = Decoder(config) p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=BUFFER) stream.start_stream() in_speech_bf = True decoder.start_utt() while True: buf = stream.read(BUFFER) if buf: decoder.process_raw(buf, False, False) if decoder.get_in_speech(): sys.stdout.write('.') sys.stdout.flush() if decoder.get_in_speech() == in_speech_bf: continue in_speech_bf = decoder.get_in_speech() if in_speech_bf: continue decoder.end_utt() try: if decoder.hyp().hypstr != '': print('You said:', decoder.hyp().hypstr) except AttributeError: pass decoder.start_utt() else: break decoder.end_utt() print('An Error occured:', decoder.hyp().hypstr)
def process_stream(self, stream, callback): """ Processes continuosly an audio stream and trigger the callback when text is detected """ decoder = Decoder(self.config) decoder.start_utt() while True: buf = stream.read(1024) decoder.process_raw(buf, False, False) if decoder.hyp() is not None and decoder.hyp().hypstr is not None: decoder.end_utt() callback(decoder.hyp().hypstr) decoder.start_utt()
class PocketSphinxEngine(Engine): def __init__(self, keyword, sensitivity): config = Decoder.default_config() config.set_string('-logfn', '/dev/null') config.set_string('-hmm', os.path.join(get_model_path(), 'en-us')) config.set_string('-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict')) config.set_string('-keyphrase', keyword if keyword != 'snowboy' else 'snow boy') config.set_float('-kws_threshold', 10**-sensitivity) self._decoder = Decoder(config) self._decoder.start_utt() def process(self, pcm): assert pcm.dtype == np.int16 self._decoder.process_raw(pcm.tobytes(), False, False) detected = self._decoder.hyp() if detected: self._decoder.end_utt() self._decoder.start_utt() return detected def release(self): self._decoder.end_utt() def __str__(self): return 'PocketSphinx'
class LocalRecognizer(object): def __init__(self, sample_rate=16000, lang="en-us", key_phrase="mycroft"): self.lang = lang self.key_phrase = key_phrase self.sample_rate = sample_rate self.configure() def configure(self): config = Decoder.default_config() config.set_string('-hmm', os.path.join(BASEDIR, 'model', self.lang, 'hmm')) config.set_string('-dict', os.path.join(BASEDIR, 'model', self.lang, 'mycroft-en-us.dict')) config.set_string('-keyphrase', self.key_phrase) config.set_float('-kws_threshold', float('1e-45')) config.set_float('-samprate', self.sample_rate) config.set_int('-nfft', 2048) config.set_string('-logfn', '/dev/null') self.decoder = Decoder(config) def transcribe(self, byte_data, metrics=None): start = time.time() self.decoder.start_utt() self.decoder.process_raw(byte_data, False, False) self.decoder.end_utt() if metrics: metrics.timer("mycroft.stt.local.time_s", time.time() - start) return self.decoder.hyp() def is_recognized(self, byte_data, metrics): hyp = self.transcribe(byte_data, metrics) return hyp and self.key_phrase in hyp.hypstr.lower() def contains(self, hypothesis): return hypothesis and self.key_phrase in hypothesis.hypstr.lower()
class LocalRecognizer(object): def __init__(self, sample_rate=16000, lang="en-us", key_phrase="mycroft"): self.lang = lang self.key_phrase = key_phrase self.sample_rate = sample_rate self.configure() def configure(self): config = Decoder.default_config() config.set_string('-hmm', os.path.join(BASEDIR, 'model', self.lang, 'hmm')) config.set_string('-dict', os.path.join(BASEDIR, 'model', self.lang, 'mycroft-en-us.dict')) config.set_string('-keyphrase', self.key_phrase) config.set_float('-kws_threshold', float('1e-45')) config.set_float('-samprate', self.sample_rate) config.set_int('-nfft', 2048) config.set_string('-logfn', '/dev/null') self.decoder = Decoder(config) def transcribe(self, byte_data, metrics=None): start = time.time() self.decoder.start_utt() self.decoder.process_raw(byte_data, False, False) self.decoder.end_utt() if metrics: metrics.timer("mycroft.stt.local.time_s", time.time() - start) return self.decoder.hyp() def is_recognized(self, byte_data, metrics): hyp = self.transcribe(byte_data, metrics) return hyp and self.key_phrase in hyp.hypstr.lower() def found_wake_word(self, hypothesis): return hypothesis and self.key_phrase in hypothesis.hypstr.lower()
def process_file(self, audiofile): """ processes audio file and returns the text """ with open(audiofile, 'rb') as audiofile: decoder = Decoder(self.config) decoder.start_utt() while True: buf = audiofile.read(1024) if buf: decoder.process_raw(buf, False, False) else: break decoder.end_utt() hyp = decoder.hyp() print "Hyp:", hyp if hyp != None: print "Hyp Score", (hyp.prob, hyp.best_score) average_score = 0 seg_count = 0 for seg in decoder.seg(): if seg.word != "<sil>": seg_count += 1 average_score += seg.ascore print(seg.word, seg.ascore, seg.lscore) print "hyp:", hyp.hypstr print average_score / seg_count return hyp.hypstr return None
class SphinxDecoder(): def __init__(self): self.MODELDIR = 'speech/' self.wav_name = 'media/temp.wav' self.raw_name = 'media/temp.raw' config = Decoder.default_config() config.set_string('-hmm', self.MODELDIR + 'ru_ru/') config.set_string('-dict', self.MODELDIR + 'ru.dic') self.decoder = Decoder(config) jsgf = Jsgf(self.MODELDIR + 'gr.gram') rule = jsgf.get_rule('gr.rule') fsg = jsgf.build_fsg(rule, self.decoder.get_logmath(), 7.5) fsg.writefile('gr.fsg') self.decoder.set_fsg('gr', fsg) self.decoder.set_search('gr') self.rec = Recognizer() self.mic = Microphone() def wav_to_raw(self): audio_file = AudioSegment.from_wav(self.wav_name) audio_file = audio_file.set_frame_rate(16000) audio_file.export(self.raw_name, format='raw') def record_audio(self): with self.mic as source: self.rec.adjust_for_ambient_noise(source) system('aplay media/beep.wav') audio = self.rec.listen(source) with open(self.wav_name, 'wb') as new_audio: new_audio.write(audio.get_wav_data()) self.wav_to_raw() def get_from_audio(self): self.record_audio() self.decoder.start_utt() stream = open(self.raw_name, 'rb') while True: buf = stream.read(1024) if buf: self.decoder.process_raw(buf, False, False) else: break self.decoder.end_utt() stream.close() try: return self.decoder.hyp().hypstr except: return None
def start_listening(self): ''' Starts streaming. Pauses until self.resume has been called ''' config = Decoder.default_config() config.set_string('-hmm', path.join(self.model_dir, self.hmm)) config.set_string('-lm', path.join(self.model_dir, self.lm)) config.set_string('-dict', path.join(self.model_dir, self.dictionary)) config.set_string('-logfn', self.logfn) # This takes a while decoder = Decoder(config) p = pyaudio.PyAudio() print(self.input_source_index) stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, input_device_index=self.input_source_index, frames_per_buffer=1024) stream.start_stream() in_speech_bf = False decoder.start_utt() self.wait_to_resume_lock.acquire() while self.is_running: while self.paused: pass buf = stream.read(1024, exception_on_overflow=False) if buf: decoder.process_raw(buf, False, False) if decoder.get_in_speech() != in_speech_bf: in_speech_bf = decoder.get_in_speech() if not in_speech_bf: decoder.end_utt() # if self.wait_to_resume: # stream.stop_stream() phrase = decoder.hyp().hypstr if phrase != "": self.all_speech_data.append(phrase) # if self.wait_to_resume: # # print("waiting") # self.wait_to_resume_lock.acquire() # # print("resuming") # if self.wait_to_resume: # stream.start_stream() decoder.start_utt() else: break decoder.end_utt()
def speech_recog(self, model): # Create a decoder with certain model config = Decoder.default_config() config.set_string('-hmm', '/usr/local/share/pocketsphinx/model/en-us/en-us') config.set_int('-ds', 2) config.set_int('-topn', 3) config.set_int('-maxwpf', 5) #config.set_string('-kws', MODELDIR + model + '.txt') config.set_string('-lm', MODELDIR + model + '.lm') config.set_string('-dict', MODELDIR + model + '.dict') decoder = Decoder(config) decoder.start_utt() tstamp = time.time() recog_text = '' while len(recog_text) < 1: try: buf = self.stream_in.read(CHUNK_SIZE) logging.info("actual voice") decoder.process_raw(buf, False, False) if decoder.hyp().hypstr != '': recog_text += decoder.hyp().hypstr print "text: " + decoder.hyp().hypstr tstamp = time.time() except IOError as ex: if ex[1] != pyaudio.paInputOverflowed: raise buf = '\x00' * CHUNK_SIZE #white noise logging.info("white noise") except AttributeError: pass decoder.end_utt() logging.info("recog text: " + recog_text) return recog_text
class LocalRecognizer(object): def __init__(self, key_phrase, phonemes, threshold, sample_rate=16000, lang="en-us"): self.lang = lang self.key_phrase = key_phrase self.sample_rate = sample_rate self.threshold = threshold self.phonemes = phonemes dict_name = self.create_dict(key_phrase, phonemes) self.decoder = Decoder(self.create_config(dict_name)) def create_dict(self, key_phrase, phonemes): (fd, file_name) = tempfile.mkstemp() words = key_phrase.split() phoneme_groups = phonemes.split('.') with os.fdopen(fd, 'w') as f: for word, phoneme in zip(words, phoneme_groups): f.write(word + ' ' + phoneme + '\n') return file_name def create_config(self, dict_name): config = Decoder.default_config() config.set_string('-hmm', os.path.join(BASEDIR, 'model', self.lang, 'hmm')) config.set_string('-dict', dict_name) config.set_string('-keyphrase', self.key_phrase) config.set_float('-kws_threshold', float(self.threshold)) config.set_float('-samprate', self.sample_rate) config.set_int('-nfft', 2048) config.set_string('-logfn', '/dev/null') return config def transcribe(self, byte_data, metrics=None): start = time.time() self.decoder.start_utt() self.decoder.process_raw(byte_data, False, False) self.decoder.end_utt() if metrics: metrics.timer("mycroft.stt.local.time_s", time.time() - start) return self.decoder.hyp() def is_recognized(self, byte_data, metrics): hyp = self.transcribe(byte_data, metrics) return hyp and self.key_phrase in hyp.hypstr.lower() def found_wake_word(self, hypothesis): return hypothesis and self.key_phrase in hypothesis.hypstr.lower()
class PocketSphinxEngine(Engine): """Pocketsphinx engine.""" def __init__(self, keyword, sensitivity): """ Constructor. :param keyword: keyword to be detected. :param sensitivity: detection sensitivity. """ from pocketsphinx import get_model_path from pocketsphinx.pocketsphinx import Decoder # Set the configuration. config = Decoder.default_config() config.set_string('-logfn', '/dev/null') # Set recognition model to US config.set_string('-hmm', os.path.join(get_model_path(), 'en-us')) config.set_string('-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict')) config.set_string('-keyphrase', keyword) config.set_float('-kws_threshold', sensitivity) self._decoder = Decoder(config) self._decoder.start_utt() def process(self, pcm): pcm = (np.iinfo(np.int16).max * pcm).astype(np.int16).tobytes() self._decoder.process_raw(pcm, False, False) detected = self._decoder.hyp() if detected: self._decoder.end_utt() self._decoder.start_utt() return detected def release(self): self._decoder.end_utt() def __str__(self): return 'PocketSphinx'
class PocketSphinxEngine(Engine): """Pocketsphinx engine.""" def __init__(self, engine_type, keyword, sensitivity): """Initializer. :param engine_type: type of the engine. :param keyword: keyword being used for detection. :param sensitivity: sensitivity passed to the engine. """ super().__init__(engine_type, keyword, sensitivity) # Set the configuration. config = Decoder.default_config() config.set_string('-logfn', '/dev/null') # Set recognition model to US config.set_string('-hmm', os.path.join(get_model_path(), 'en-us')) config.set_string('-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict')) config.set_string('-keyphrase', keyword) config.set_float('-kws_threshold', sensitivity) self._decoder = Decoder(config) self._decoder.start_utt() @prepare_pcm def process(self, pcm): """Process the PCM data for the keyword.""" self._decoder.process_raw(pcm, False, False) detected = self._decoder.hyp() if detected: self._decoder.end_utt() self._decoder.start_utt() return detected def release(self): """Release the resources hold by the engine.""" self._decoder.end_utt()
class SpeechRecognizer(Interpreter): def __init__(self, name: str, sr: str = "pocketsphinx"): super().__init__(name, True) self.logger = self.get_logger() self.sr = sr self.current_data = [] self.setup() def setup(self) -> None: self.RATE = int(os.getenv("RATE")) self.CHUNK = int(os.getenv("CHUNK")) self.setup_pocketsphinx() if (self.sr == "googlespeech"): self.setup_googlespeech() def setup_pocketsphinx(self) -> None: self.logger.info("Setting up PocketSphinx.") self.MODELDIR = "resources/model" config = Decoder.default_config() config.set_string('-hmm', os.path.join(self.MODELDIR, 'es-es')) config.set_string('-lm', os.path.join(self.MODELDIR, 'es-es.lm')) config.set_string('-dict', os.path.join(self.MODELDIR, 'es.dict')) config.set_string('-logfn', '/dev/null') self.decoder = Decoder(config) self.prev_buf_is_speech = False self.decoder.start_utt() self.logger.info("Done setting up PocketSphinx.") def setup_googlespeech(self) -> None: self.logger.info("Setting up Google Speech.") credentials = service_account.Credentials.from_service_account_file( 'resources/keys/credentials.json') config = speech.types.RecognitionConfig( encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, language_code='es-PE', sample_rate_hertz=self.RATE, ) self.client = speech.SpeechClient(credentials=credentials) self.streaming_config = speech.types.StreamingRecognitionConfig( config=config) self.logger.info("Done setting up Google Speech.") def get_destinations_ID(self, raw_data) -> List[Identifier]: return [self.destinations_ID[0]] def preprocess(self, raw_data): """Filtering""" return raw_data def query_gs(self): requests = (speech.types.StreamingRecognizeRequest(audio_content=chunk) for chunk in self.current_data) responses = self.client.streaming_recognize( config=self.streaming_config, requests=requests) try: response = next(responses) data = response.results[0].alternatives[0].transcript conf = response.results[0].alternatives[0].confidence except Exception as e: self.logger.info(f"{self.name}>> {e}") conf = None data = None self.current_data.clear() return data, conf def query_ps(self): try: data = self.decoder.hyp().hypstr conf = self.decoder.hyp().best_score if data == "": data = None except Exception as e: self.logger.info(f"{self.name}>> {e}") conf = None data = None return data, conf def process(self, raw_data) -> Generator: self.decoder.process_raw(raw_data, False, False) cur_buf_is_speech = self.decoder.get_in_speech() data = None self.logger.info( f"prev: {self.prev_buf_is_speech}, current: {cur_buf_is_speech}") force_speech = False if raw_data == bytes([0] * self.CHUNK * 16): force_speech = True self.logger.info("RECEIVED FORCE STOP") if force_speech or (self.prev_buf_is_speech and not cur_buf_is_speech): # No longer in speech -> stop listening and process self.logger.info("No longer in speech, yielding True.") yield True self.decoder.end_utt() if (self.sr == "googlespeech"): data, conf = self.query_gs() elif (self.sr == "pocketsphinx"): data, conf = self.query_ps() self.logger.info( f"{self.name}>> Heard DATA: '{data}' with confidence: {conf}.") self.decoder.start_utt() self.prev_buf_is_speech = cur_buf_is_speech elif not self.prev_buf_is_speech and cur_buf_is_speech: # Now in speech -> Start listening self.current_data.append(raw_data) self.prev_buf_is_speech = cur_buf_is_speech yield False elif self.prev_buf_is_speech and cur_buf_is_speech: # Still in speech -> Keep on listening self.current_data.append(raw_data) self.prev_buf_is_speech = cur_buf_is_speech yield False else: self.prev_buf_is_speech = cur_buf_is_speech yield False yield data return def pass_msg(self, msg: str) -> None: if msg == "RESUME": self.e.set() def dump_history(self, filename: str, data: List[Any]) -> None: pass
class PocketsphinxTrigger(BaseTrigger): type = triggers.TYPES.VOICE def __init__(self, config, trigger_callback): super(PocketsphinxTrigger, self).__init__(config, trigger_callback, 'pocketsphinx') self._enabled_lock = threading.Event() self._disabled_sync_lock = threading.Event() self._decoder = None def setup(self): # PocketSphinx configuration ps_config = Decoder.default_config() # Set recognition model to US ps_config.set_string('-hmm', os.path.join(get_model_path(), self._tconfig['language'])) ps_config.set_string('-dict', os.path.join(get_model_path(), self._tconfig['dictionary'])) # Specify recognition key phrase #ps_config.set_string('-keyphrase', self._tconfig['phrase']) #ps_config.set_float('-kws_threshold', float(self._tconfig['threshold'])) ### Multiple Hotwords #ps_config.set_string('-inmic', 'yes') ps_config.set_string('-kws', '/opt/AlexaPi/src/keyphrase.list') # Hide the VERY verbose logging information when not in debug if logging.getLogger('alexapi').getEffectiveLevel() != logging.DEBUG: ps_config.set_string('-logfn', '/dev/null') # Process audio chunk by chunk. On keyword detected perform action and restart search self._decoder = Decoder(ps_config) def run(self): thread = threading.Thread(target=self.thread, args=()) thread.setDaemon(True) thread.start() def thread(self): while True: self._enabled_lock.wait() # Enable reading microphone raw data inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NORMAL, self._config['sound']['input_device']) inp.setchannels(1) inp.setrate(16000) inp.setformat(alsaaudio.PCM_FORMAT_S16_LE) inp.setperiodsize(1024) self._decoder.start_utt() triggered = False #assistantTriggered = False voice_command = "" while not triggered: if not self._enabled_lock.isSet(): break # Read from microphone _, buf = inp.read() # Detect if keyword/trigger word was said self._decoder.process_raw(buf, False, False) triggered = self._decoder.hyp() is not None # To avoid overflows close the microphone connection inp.close() self._decoder.end_utt() self._disabled_sync_lock.set() if triggered: ### Assistant Starts Here try: voice_command = self._decoder.hyp().hypstr except: voice_command = "" self._trigger_callback(self, voice_command) ### def enable(self): self._enabled_lock.set() self._disabled_sync_lock.clear() def disable(self): self._enabled_lock.clear() self._disabled_sync_lock.wait()
stream = open(sys.argv[1], "rb") else: p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024) stream.start_stream() print('start...') while True: buf = stream.read(1024) if buf: decoder.process_raw(buf, False, False) else: break hypothesis = decoder.hyp() if hypothesis: print('\nhypothesis: %s, score: %d' % (hypothesis.hypstr, hypothesis.best_score)) print ([(seg.word, seg.prob, seg.start_frame, seg.end_frame) for seg in decoder.seg()]) print ("Detected keyword, restarting search") os.system('mpg123 ' + os.path.join(script_dir, 'hi.mp3')) print('restart...') decoder.end_utt() decoder.start_utt() print('ok') # break stream.close()
class AvaRecognizer(object): """Class to add ASR recognition functionality using language model + dictionary Publishes recognition output to recognizer/asr_output.""" def __init__(self): # Initializing publisher with buffer size of 10 messages self.pub_ = rospy.Publisher("recognizer/asr_output", String, queue_size=10) # initialize node rospy.init_node("ava_recognizer") # Call custom function on node shutdown rospy.on_shutdown(self.shutdown) # Params # File containing language model _lm_param = "~lm" # Dictionary _dict_param = "~dict" # HMM Model _hmm_param = "~hmm" # used in process_audio for piecing full utterances self.in_speech_bf = False # Setting param values if rospy.has_param( _dict_param) and rospy.get_param(_dict_param) != ":default": self.dict = rospy.get_param(_dict_param) else: rospy.logerr( "No dictionary found. Please add an appropriate dictionary argument." ) return if rospy.has_param( _lm_param) and rospy.get_param(_lm_param) != ':default': self._use_lm = 1 self.class_lm = rospy.get_param(_lm_param) else: rospy.logerr("No lm found. Please add an appropriate lm argument.") return if rospy.has_param(_hmm_param): self.hmm = rospy.get_param(_hmm_param) if rospy.get_param(_hmm_param) == ":default": if os.path.isdir( "/home/team5/.local/lib/python2.7/site-packages/pocketsphinx/model" ): rospy.loginfo("Loading the default acoustic model") self.hmm = "/home/team5/.local/lib/python2.7/site-packages/pocketsphinx/model/en-us" rospy.loginfo("Done loading the default acoustic model") else: rospy.logerr("Failed to find default model.") return else: rospy.logerr( "No language model specified. Couldn't find default model.") return # All params satisfied. Starting recognizer and audio thread self._audio_queue = Queue.Queue() self._kill_audio = False threading.Thread(target=self.get_audio).start() self.start_recognizer() def start_recognizer(self): """Function to handle lm or grammar processing of audio.""" config = Decoder.default_config() rospy.loginfo("Done initializing pocketsphinx") # Setting configuration of decoder using provided params config.set_string('-dict', self.dict) config.set_string('-lm', self.class_lm) config.set_string('-hmm', self.hmm) self.decoder = Decoder(config) # Start processing input audio self.decoder.start_utt() rospy.loginfo("Decoder started successfully") # Subscribe to audio topic rospy.Subscriber("recognizer/audio_ready", Bool, self.process_audio) rospy.spin() def process_audio(self, isready): """Audio processing based on decoder config.""" # Check if input audio has ended assert (isready) data = self._audio_queue.get() self.decoder.process_raw(data, False, False) if self.decoder.get_in_speech() != self.in_speech_bf: self.in_speech_bf = self.decoder.get_in_speech() if not self.in_speech_bf: self.decoder.end_utt() if self.decoder.hyp() != None: rospy.loginfo('OUTPUT: \"' + self.decoder.hyp().hypstr + '\"') self.pub_.publish(self.decoder.hyp().hypstr) self.decoder.start_utt() @staticmethod def shutdown(): """This function is executed on node shutdown.""" # command executed after Ctrl+C is pressed rospy.loginfo("Stop AvaRecognizer") rospy.sleep(1) def get_audio(self): """ Used for audio parsing thread. """ # parameters for PCM. view PCMs with 'pactl list sources short'. # don't modify me plz. device = 'sysdefault:CARD=Audio' inp = alsaaudio.PCM(type=alsaaudio.PCM_CAPTURE, mode=alsaaudio.PCM_NORMAL, card=device) inp.setchannels(1) inp.setrate(16000) inp.setformat(alsaaudio.PCM_FORMAT_S16_LE) inp.setperiodsize(1024) pub = rospy.Publisher('recognizer/audio_ready', Bool, queue_size=10) while not (self._kill_audio): _, data = inp.read() self._audio_queue.put(data) pub.publish(True) return
def detect(self): # create decoders on the fly if not self.decoders: self.decoders = [] for id, phrase in self.config['triggers'].iteritems(): config = Decoder.default_config() # set recognition model to US config.set_string('-hmm', os.path.join(get_model_path(), 'en-us')) config.set_string( '-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict')) # specify recognition key phrase config.set_string('-keyphrase', phrase) config.set_float('-kws_threshold', 1e-5) # hide the VERY verbose logging information # if not self.config['debug']: config.set_string('-logfn', '/dev/null') decoder = Decoder(config) decoder.id = id self.decoders.append(decoder) events.fire('detection_started') # start decoding for decoder in self.decoders: decoder.start_utt() pcm = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NORMAL, self.config['device']) pcm.setchannels(1) pcm.setrate(16000) pcm.setformat(alsaaudio.PCM_FORMAT_S16_LE) pcm.setperiodsize(1024) phrase = None triggered = False while not triggered: _, buffer = pcm.read() for decoder in self.decoders: decoder.process_raw(buffer, False, False) triggered = decoder.hyp() is not None if triggered: phrase = decoder.id break pcm.close() pcm = None for decoder in self.decoders: decoder.end_utt() events.fire('detection_fullfilled', id=phrase)
class PocketsphinxTrigger(BaseTrigger): type = triggers.TYPES.VOICE def __init__(self, config, trigger_callback): super(PocketsphinxTrigger, self).__init__(config, trigger_callback, 'pocketsphinx') self._enabled_lock = threading.Event() self._disabled_sync_lock = threading.Event() self._decoder = None def setup(self): # PocketSphinx configuration ps_config = Decoder.default_config() # Set recognition model to US ps_config.set_string('-hmm', os.path.join(get_model_path(), 'en-us')) ps_config.set_string( '-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict')) # Specify recognition key phrase ps_config.set_string('-keyphrase', self._tconfig['phrase']) ps_config.set_float('-kws_threshold', float(self._tconfig['threshold'])) # Hide the VERY verbose logging information when not in debug if logging.getLogger('alexapi').getEffectiveLevel() != logging.DEBUG: ps_config.set_string('-logfn', '/dev/null') # Process audio chunk by chunk. On keyword detected perform action and restart search self._decoder = Decoder(ps_config) def run(self): thread = threading.Thread(target=self.thread, args=()) thread.setDaemon(True) thread.start() def thread(self): while True: self._enabled_lock.wait() # Enable reading microphone raw data inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NORMAL, self._config['sound']['input_device']) inp.setchannels(1) inp.setrate(16000) inp.setformat(alsaaudio.PCM_FORMAT_S16_LE) inp.setperiodsize(1024) self._decoder.start_utt() triggered = False while not triggered: if not self._enabled_lock.isSet(): break # Read from microphone _, buf = inp.read() # Detect if keyword/trigger word was said self._decoder.process_raw(buf, False, False) triggered = self._decoder.hyp() is not None # To avoid overflows close the microphone connection inp.close() self._decoder.end_utt() self._disabled_sync_lock.set() if triggered: self._trigger_callback(self) def enable(self): self._enabled_lock.set() self._disabled_sync_lock.clear() def disable(self): self._enabled_lock.clear() self._disabled_sync_lock.wait()
channels=1, rate=16000, input=True, frames_per_buffer=1024) stream.start_stream() print('start...') while True: buf = stream.read(1024) if buf: decoder.process_raw(buf, False, False) else: break hypothesis = decoder.hyp() if hypothesis: print('\nhypothesis: %s, score: %d' % (hypothesis.hypstr, hypothesis.best_score)) print([(seg.word, seg.prob, seg.start_frame, seg.end_frame) for seg in decoder.seg()]) print("Detected keyword, restarting search") os.system('mpg123 ' + os.path.join(script_dir, 'hi.mp3')) print('restart...') decoder.end_utt() decoder.start_utt() print('ok') # break stream.close()
class PocketKeyword(object): AUDIO_CHUNK_SIZE = 1024 AUDIO_RATE = 16000 def __init__(self, phrase, threshold, device_index=0): self._decoder = None self._pa = None self._device_no = device_index self._phrase = phrase self._threshold = float(threshold) # PocketSphinx configuration logging.info('Phrase: ' + phrase + ' Threshold: ' + str(threshold)) ps_config = Decoder.default_config() # Set recognition model to US ps_config.set_string('-hmm', os.path.join(get_model_path_keyword(), 'en-us')) ps_config.set_string( '-dict', os.path.join(get_model_path_keyword(), 'cmudict-en-us.dict')) # Specify recognition key phrase ps_config.set_string('-keyphrase', self._phrase) ps_config.set_float('-kws_threshold', self._threshold) ps_config.set_string('-logfn', '/dev/null') # Process audio chunk by chunk. On keyword detected perform action and restart search self._decoder = Decoder(ps_config) self._pa = pyaudio.PyAudio() def _handle_init(self, rate, chunk_size): self._handle = self._pa.open(input=True, input_device_index=self._device_no, format=pyaudio.paInt16, channels=1, rate=rate, frames_per_buffer=chunk_size) def _handle_release(self): self._handle.stop_stream() self._handle.close() def _handle_read(self, chunk_size): return self._handle.read(chunk_size, exception_on_overflow=False) def getHypothesys(self): # init microphone self._handle_init(self.AUDIO_RATE, self.AUDIO_CHUNK_SIZE) self._decoder.start_utt() triggered = False while not triggered: # Read from microphone and process data = self._handle_read(self.AUDIO_CHUNK_SIZE) self._decoder.process_raw(data, False, False) # best guess from CMU Sphinx STT hypothesis = self._decoder.hyp() triggered = hypothesis is not None # close microphone self._handle_release() self._decoder.end_utt() if triggered: return hypothesis.hypstr
class PocketsphinxTrigger(VoiceTrigger): name = 'pocketsphinx' AUDIO_CHUNK_SIZE = 1024 AUDIO_RATE = 16000 _capture = None def __init__(self, config, trigger_callback, capture): super(PocketsphinxTrigger, self).__init__(config, trigger_callback) self._capture = capture self._enabled_lock = threading.Event() self._disabled_sync_lock = threading.Event() self._decoder = None def setup(self): # PocketSphinx configuration ps_config = Decoder.default_config() # Set recognition model to US ps_config.set_string('-hmm', os.path.join(get_model_path(), 'en-us')) ps_config.set_string( '-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict')) # Specify recognition key phrase ps_config.set_string('-keyphrase', self._tconfig['phrase']) ps_config.set_float('-kws_threshold', float(self._tconfig['threshold'])) # Hide the VERY verbose logging information when not in debug if logging.getLogger('alexapi').getEffectiveLevel() != logging.DEBUG: null_path = '/dev/null' if platform.system() == 'Windows': null_path = 'nul' ps_config.set_string('-logfn', null_path) # Process audio chunk by chunk. On keyword detected perform action and restart search self._detector = Decoder(ps_config) def thread(self): while True: self._enabled_lock.wait() self._capture.handle_init(self.AUDIO_RATE, self.AUDIO_CHUNK_SIZE) self._detector.start_utt() triggered = False while not triggered: if not self._enabled_lock.isSet(): break # Read from microphone data = self._capture.handle_read() # Detect if keyword/trigger word was said self._detector.process_raw(data, False, False) triggered = self._detector.hyp() is not None self._capture.handle_release() self._detector.end_utt() self._disabled_sync_lock.set() if triggered: self._trigger_callback(self)
class PocketsphinxTrigger(BaseTrigger): type = triggers.TYPES.VOICE AUDIO_CHUNK_SIZE = 1024 AUDIO_RATE = 16000 _capture = None def __init__(self, config, trigger_callback, capture): super(PocketsphinxTrigger, self).__init__(config, trigger_callback, 'pocketsphinx') self._capture = capture self._enabled_lock = threading.Event() self._disabled_sync_lock = threading.Event() self._decoder = None def setup(self): # PocketSphinx configuration ps_config = Decoder.default_config() # Set recognition model to US ps_config.set_string( '-hmm', os.path.join(get_model_path(), self._tconfig['language'])) ps_config.set_string( '-dict', os.path.join(get_model_path(), self._tconfig['dictionary'])) # Specify recognition key phrase #ps_config.set_string('-keyphrase', self._tconfig['phrase']) #ps_config.set_float('-kws_threshold', float(self._tconfig['threshold'])) ### Multiple Hotwords #ps_config.set_string('-inmic', 'yes') ps_config.set_string('-kws', '/opt/AlexaPi/src/keyphrase.list') # Hide the VERY verbose logging information when not in debug if logging.getLogger('alexapi').getEffectiveLevel() != logging.DEBUG: null_path = '/dev/null' if platform.system() == 'Windows': null_path = 'nul' ps_config.set_string('-logfn', null_path) # Process audio chunk by chunk. On keyword detected perform action and restart search self._decoder = Decoder(ps_config) def run(self): thread = threading.Thread(target=self.thread, args=()) thread.setDaemon(True) thread.start() def thread(self): while True: self._enabled_lock.wait() self._capture.handle_init(self.AUDIO_RATE, self.AUDIO_CHUNK_SIZE) self._decoder.start_utt() triggered = False #assistantTriggered = False voice_command = "" while not triggered: if not self._enabled_lock.isSet(): break # Read from microphone data = self._capture.handle_read() # Detect if keyword/trigger word was said self._decoder.process_raw(data, False, False) triggered = self._decoder.hyp() is not None self._capture.handle_release() self._decoder.end_utt() self._disabled_sync_lock.set() if triggered: ### Assistant Starts Here try: voice_command = self._decoder.hyp().hypstr except: voice_command = "" self._trigger_callback(self, voice_command) ### def enable(self): self._enabled_lock.set() self._disabled_sync_lock.clear() def disable(self): self._enabled_lock.clear() self._disabled_sync_lock.wait()
class NLUAudio(NLUBase): """Define NLUAudio component For now hotword uses pocketsphinx with speech_recognition and Nuance services has NLU """ def __init__(self, settings, action_queue, tts_queue, logger): NLUBase.__init__(self, settings, action_queue, None, tts_queue, logger) # Init private attributes self._rerun = True self._answer_sound_path = "sounds/answer.wav" self._config = Decoder.default_config() if not self._prepare_decoder(): self._must_run = False def _prepare_decoder(self): """Set decoder config""" # prepare config self._hotword = self._settings['speech']['hotword'] # self._answer = self._settings['hotword']['answer'] if not os.path.isdir("pocketsphinx-data"): raise HotWordError("Missing pocketsphinx-data folder. Please run `make hotword`") acoustic_model = os.path.join("pocketsphinx-data", self._settings['speech']['language'], 'acoustic-model', ) language_model = os.path.join("pocketsphinx-data", self._settings['speech']['language'], 'language-model.lm.bin', ) pocket_dict = os.path.join("pocketsphinx-data", self._settings['speech']['language'], 'pronounciation-dictionary.dict', ) self._config.set_string('-logfn', "/dev/null") self._config.set_string('-hmm', acoustic_model) self._config.set_string('-lm', language_model) self._config.set_string('-dict', pocket_dict) try: self._decoder = Decoder(self._config) except RuntimeError: self.logger.critical("Error get audio decoder. Hotword not started") return False self._decoder.set_keyphrase('wakeup', self._hotword) self._decoder.set_search('wakeup') def stop(self): """Stop process""" self._rerun = False NLUBase.stop(self) def _answering(self): """Play the hotwoard confirmation sound""" f_ans = wave.open(self._answer_sound_path, "rb") stream = self._paudio.open(format=self._paudio.get_format_from_width(f_ans.getsampwidth()), channels=f_ans.getnchannels(), rate=f_ans.getframerate(), output=True) data = f_ans.readframes(1024) while len(data) > 0: stream.write(data) data = f_ans.readframes(1024) f_ans.close() def run(self): """Listen for NLU""" self._rerun = True self._must_run = True self.logger.debug("starting listening hotword %s", self._hotword) while self._rerun: self._rerun = False try: self._paudio = pyaudio.PyAudio() stream = self._paudio.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024) except OSError: self.logger.warning("No audio device found can not listen for NLU") self.logger.warning("Disabling NLU audio") self._must_run = False self._rerun = False return stream.start_stream() self._paudio.get_default_input_device_info() self._decoder.start_utt() while self._must_run: buf = stream.read(1024) self._decoder.process_raw(buf, False, False) if not self.tts_queue.empty(): # If tts_queue is not empty, this means the Droid # is currently speaking. So we don't want to it listen itself # TODO replace this stuff by speaker annulation continue if self._decoder.hyp() and self._decoder.hyp().hypstr == self._hotword: self.logger.debug("Hotword detected") # self.tts_queue.put(gtt(self._answer)) # self.tts_queue.put(gtt("mmm")) self._answering() ret = nlu_audio(self._settings, self.logger) # GOT ACTIONS interpretations = ret.get("nlu_interpretation_results", {}).\ get("payload", {}).get("interpretations", {}) # TODO: what about if len(interpretations) > 1 ?? for interpretation in interpretations: intent = interpretation.get("action", {}).get("intent", {}) self.logger.info("Intent: {}".format(intent.get("value"))) self.logger.info("Confidence: {}".format(intent.get("confidence"))) # TODO log arguments if intent.get("value") == "NO_MATCH": # I don't understand :/ self._misunderstand(0, True, True) elif intent.get("confidence") < 0.8: # I'm not sure to undestand :/ self._misunderstand(intent.get("confidence"), True, True) else: # Check intent name if len(intent.get("value").split("__")) != 2: self.logger.critical("BAD Intent name: " "{}".format(intent.get("value"))) self._misunderstand(0, True, True) # Run function with parameters action, method = intent.get("value").split("__") # Run action # TODO add parameters from NLU response self._run_action(action, method, {}, False, True, True) # TODO run nlu audio detection self._rerun = True break self._decoder.end_utt()
class PocketGrammar(object): AUDIO_CHUNK_SIZE = 1024 AUDIO_RATE = 16000 HMM = 'cmusphinx-5prealpha-en-us-ptm-2.0/' DIC = 'dictionary.dic' GRAMMAR = 'grammar.jsgf' def __init__(self, device_index=0, model_path=None): self._decoder = None self._pa = None self._device_no = device_index self._model_path = model_path # PocketSphinx configuration logging.info('Grammar file:' + os.path.join(model_path, self.GRAMMAR)) ps_config = Decoder.default_config() # Set recognition model to ... ps_config.set_string('-hmm', os.path.join(model_path, self.HMM)) ps_config.set_string('-dict', os.path.join(model_path, self.DIC)) ps_config.set_string('-jsgf', os.path.join(model_path, self.GRAMMAR)) ps_config.set_string('-logfn', '/dev/null') # Process audio chunk by chunk. On keyword detected perform action and restart search self._decoder = Decoder(ps_config) self._pa = pyaudio.PyAudio() def _handle_init(self, rate, chunk_size): self._handle = self._pa.open(input=True, input_device_index=self._device_no, format=pyaudio.paInt16, channels=1, rate=rate, frames_per_buffer=chunk_size) def _handle_release(self): self._handle.stop_stream() self._handle.close() def _handle_read(self, chunk_size): return self._handle.read(chunk_size, exception_on_overflow=False) def getHypothesys(self): # init microphone self._handle_init(self.AUDIO_RATE, self.AUDIO_CHUNK_SIZE) self._decoder.start_utt() # from speech to silence or from silence to speech? utteranceStarted = False triggered = False while not triggered: # Read from microphone and process data = self._handle_read(self.AUDIO_CHUNK_SIZE) self._decoder.process_raw(data, False, False) # checks for transition from silence to speech. inSpeech = self._decoder.get_in_speech() if inSpeech and not utteranceStarted: utteranceStarted = True logging.debug("Silence") # checks for the transition from speech to silence if not inSpeech and utteranceStarted: hypothesis = self._decoder.hyp() triggered = hypothesis is not None # close microphone self._handle_release() self._decoder.end_utt() if triggered: return hypothesis.hypstr
def main(): """ A main method to that does a simple matching of sentences and executes scripts """ notifier = sdnotify.SystemdNotifier() # Load config first config_file = open(os.path.join(os.getcwd(), 'config.yaml'), 'r') config = yaml.load(config_file) interaction_timeout = int(config['interaction_timeout']) # Create Decoder config pocketsphinx_config = Decoder.default_config() pocketsphinx_config.set_string('-hmm', os.path.join(os.getcwd(), config['hmm_path'])) pocketsphinx_config.set_string('-dict', os.path.join(os.getcwd(), config['dict_path'])) pocketsphinx_config.set_string('-featparams', os.path.join(os.getcwd(), config['feat_params_path'])) pocketsphinx_config.set_boolean("-allphone_ci", True) # Using decoder.set_kws & decoder.set_lm_file # pocketsphinx_config.set_string('-lm', os.path.join(os.getcwd(), config['lm_path'])) # pocketsphinx_config.set_string('-kws', os.path.join(os.getcwd(), config['keyphrase_path'])) # Initialize audio p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024) stream.start_stream() # Load invocations and commands invocations = config['invocations'] # Process audio chunk by chunk. On keyword detected perform action and restart search decoder = Decoder(pocketsphinx_config) logmath = decoder.get_logmath() decoder.set_kws('keyword', os.path.join(os.getcwd(), config['invocation_path'])) decoder.set_lm_file('lm', os.path.join(os.getcwd(), config['lm_path'])) invocation_ctx = None in_speech_bf = False # Run some initialization scripts for terminal displays subprocess.Popen([os.path.join(os.getcwd(), config['init_exec'])]).communicate() decoder.set_search('keyword') decoder.start_utt() notifier.notify("READY=1") interaction_time = None while True: notifier.notify("WATCHDOG=1") buf = stream.read(1024, exception_on_overflow = False) if buf: decoder.process_raw(buf, False, False) else: logging.error("Unable to get audio, exiting") break hyp = decoder.hyp() # seg = decoder.seg() hyp_str = hyp.hypstr.lower().strip() if hyp else None now_in_speech = decoder.get_in_speech() if now_in_speech != in_speech_bf: in_speech_bf = now_in_speech if not in_speech_bf: decoder.end_utt() if hyp_str: logging.info("Heard: '%s' while being in '%s' context (score: %d, confidence: %d -> in log scale %d)" % (hyp_str, invocation_ctx, hyp.best_score, logmath.exp(hyp.prob), hyp.prob)) if not invocation_ctx: if hyp_str in invocations: logging.info("Matched invocation: '%s'" % hyp_str) invocation_ctx = hyp_str subprocess.Popen([os.path.join(os.getcwd(), invocations[invocation_ctx]['enter']), invocations[invocation_ctx]['voice_params'], invocation_ctx, hyp_str]).communicate() interaction_time = time.time() decoder.set_search('lm') else: logging.debug('Unknown invocation or wrongly heard, silently ignoring') else: matched = False score_dict = defaultdict(list) commands = invocations[invocation_ctx]['commands'] for command in commands: logging.info("- command: '%s':" % command['name']) for sentence in command['sentence']: score = calc_similarity(command, sentence.lower(), hyp_str) score_dict[score].append(command) logging.debug(" - similarity: %d for sentence: %s" % (score, sentence)) if score == 1000: logging.debug("... seems like found perfect match, ignoring the rest") break for best in sorted(score_dict.items(), reverse=True): if best[0] > 90: command = best[1][0] # here might be some randomness logging.info("The best matching command is '%s', executing: %s" % (command['name'], command['exec'])) subprocess.Popen([os.path.join(os.getcwd(), invocations[invocation_ctx]['ack']), invocations[invocation_ctx]['voice_params'], invocation_ctx, hyp_str]).communicate() subprocess.Popen([os.path.join(os.getcwd(), command['exec']), invocations[invocation_ctx]['voice_params'], invocation_ctx, command['name']]).communicate() subprocess.Popen([os.path.join(os.getcwd(), invocations[invocation_ctx]['exit']), invocations[invocation_ctx]['voice_params'], invocation_ctx, hyp_str]) invocation_ctx = None decoder.set_search('keyword') matched = True break # take only the first which should be the best if not matched: logging.info("... not matched, ignoring") subprocess.Popen([os.path.join(os.getcwd(), invocations[invocation_ctx]['noop']), invocations[invocation_ctx]['voice_params'], invocation_ctx, hyp_str]).communicate() decoder.start_utt() if invocation_ctx and interaction_time and time.time() > interaction_time + interaction_timeout: logging.info("The invocation context has just timed out, returning to listen for invocation word.") subprocess.Popen([os.path.join(os.getcwd(), invocations[invocation_ctx]['exit']), invocations[invocation_ctx]['voice_params'], invocation_ctx]) invocation_ctx = None interaction_time = None decoder.end_utt() decoder.set_search('keyword') decoder.start_utt()
def main(): environment: str = os.getenv("ENVIRONMENT", "dev") config: Dict = load_config(environment) initialize_logger(level=config["logging"]["level"], filename=config["logging"]["filename"]) redis_host = config["redis"]["host"] redis_port = config["redis"]["port"] logger.debug(f"Connecting to redis at {redis_host}:{redis_port}") redis_client: Redis = Redis(host=redis_host, port=redis_port, db=0) logger.debug("Initializing PyAudio interface") audio = pyaudio.PyAudio() microphone_index = get_microphone_index(audio, config["microphone"]["name"]) logger.debug( f"Using microphone device '{config['microphone']['name']}' (card index {microphone_index})" ) logger.debug( f"Intializing pocketsphinx Decoder using model dir {MODELDIR}") decoder_config: DecoderConfig = Decoder.default_config() decoder_config.set_string("-hmm", os.path.join(MODELDIR, "en-us/en-us")) decoder_config.set_string("-lm", os.path.join(MODELDIR, "en-us/en-us.lm.bin")) decoder_config.set_string( "-dict", os.path.join(MODELDIR, "en-us/cmudict-en-us.dict")) decoder = Decoder(decoder_config) logger.debug("Opening audio stream") stream = audio.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=2048, input_device_index=microphone_index) stream.start_stream() in_speech_bf = False decoder.start_utt() try: logger.debug("Starting decoder loop") while cycle([True]): buf = stream.read(2048) if buf: logger.debug("Decoding raw audio") decoder.process_raw(buf, False, False) if decoder.get_in_speech() != in_speech_bf: logger.debug("GOT HERE") in_speech_bf = decoder.get_in_speech() if not in_speech_bf: decoder.end_utt() transcription = decoder.hyp().hypstr logger.debug(f"Result: {transcription}") redis_client.publish("subsystem.listener.recording", transcription) decoder.start_utt() else: logger.debug("Buffer closed. Ending") break decoder.end_utt() except Exception: logger.exception("Something bad happened") finally: redis_client.close()
class InstructionRecogniser(QThread): ''' You should only use keyIn/keyOut, and shutdown after use. The thread starts itself when appropriate. Signals are emitted with any recognised instructions. ''' def __init__(self, gui): QThread.__init__(self, gui) if settings.sphinx_acoustic_model_dir == '': # use default acoustic model acoustic_model_directory = path.join(get_model_path(), 'en-us') else: # use custom acoustic model acoustic_model_directory = settings.sphinx_acoustic_model_dir config = Decoder.default_config() config.set_string('-hmm', acoustic_model_directory) # acoustic model config.set_string( '-dict', settings.prepared_lexicon_file) # lexicon pronunciation config.set_string( '-jsgf', settings.prepared_grammar_file) # language model from grammar config.set_string( '-logfn', settings.outputFileName(sphinx_decoder_log_file_base_name, ext='log')) self.listen = False self.decoder = Decoder(config) self.audio = None self.device = None def startup(self): self.audio = PyAudio() if 0 <= settings.audio_input_device_index < self.audio.get_device_count( ): # out of range or -1 for default self.device = settings.audio_input_device_index else: self.device = None def shutdown(self): self.listen = False self.wait() self.audio.terminate() self.audio = None def keyIn(self): if not self.isRunning(): self.listen = True self.start() def keyOut(self): self.listen = False def run(self): audio_stream = self.audio.open(input_device_index=self.device, channels=1, format=paInt16, rate=audio_sample_rate, frames_per_buffer=audio_chunk_size, input=True) chunks = [] msg_duration = 0 buff = audio_stream.read(audio_chunk_size) while self.listen and len( buff) > 0 and msg_duration < message_duration_limit: chunks.append(buff) buff = audio_stream.read(audio_chunk_size) msg_duration += audio_chunk_size / audio_sample_rate audio_stream.close() audio_message = b''.join(chunks) self.decoder.start_utt( ) # STYLE catch failures here (e.g. grammar/lex files not found) self.decoder.process_raw(audio_message, False, True) self.decoder.end_utt() hyp = self.decoder.hyp() if hyp: SR_log('VOICE: "%s"' % hyp.hypstr) if settings.show_recognised_voice_strings: signals.statusBarMsg.emit('VOICE: "%s"' % hyp.hypstr) callsign_tokens, instr_lst = interpret_string(hyp.hypstr) signals.voiceMsgRecognised.emit(callsign_tokens, instr_lst) else: SR_log('VOICE: no hypothesis, message duration was %g s' % msg_duration) signals.voiceMsgNotRecognised.emit()