def __init__( self, lmd=jasperpath.config("languagemodel.lm"), dictd=jasperpath.config("dictionary.dic"), lmd_persona=jasperpath.data("languagemodel_persona.lm"), dictd_persona=jasperpath.data("dictionary_persona.dic"), lmd_music=None, dictd_music=None, hmm_dir="/usr/local/share/pocketsphinx/model/hmm/en_US/hub4wsj_sc_8k" ): """ Initiates the pocketsphinx instance. Arguments: speaker -- handles platform-independent audio output lmd -- filename of the full language model dictd -- filename of the full dictionary (.dic) lmd_persona -- filename of the 'Persona' language model (containing, e.g., 'Jasper') dictd_persona -- filename of the 'Persona' dictionary (.dic) """ self._logger = logging.getLogger(__name__) # quirky bug where first import doesn't work try: import pocketsphinx as ps except: import pocketsphinx as ps self._logfiles = {} with tempfile.NamedTemporaryFile(prefix='psdecoder_music_', suffix='.log', delete=False) as f: self._logfiles[TranscriptionMode.MUSIC] = f.name with tempfile.NamedTemporaryFile(prefix='psdecoder_keyword_', suffix='.log', delete=False) as f: self._logfiles[TranscriptionMode.KEYWORD] = f.name with tempfile.NamedTemporaryFile(prefix='psdecoder_normal_', suffix='.log', delete=False) as f: self._logfiles[TranscriptionMode.NORMAL] = f.name self._decoders = {} if lmd_music and dictd_music: self._decoders[TranscriptionMode.MUSIC] = ps.Decoder( hmm=hmm_dir, lm=lmd_music, dict=dictd_music, logfn=self._logfiles[TranscriptionMode.MUSIC]) self._decoders[TranscriptionMode.KEYWORD] = ps.Decoder( hmm=hmm_dir, lm=lmd_persona, dict=dictd_persona, logfn=self._logfiles[TranscriptionMode.KEYWORD]) self._decoders[TranscriptionMode.NORMAL] = ps.Decoder( hmm=hmm_dir, lm=lmd, dict=dictd, logfn=self._logfiles[TranscriptionMode.NORMAL])
def __init__(self, lmd="languagemodel.lm", dictd="dictionary.dic", lmd_persona="languagemodel_persona.lm", dictd_persona="dictionary_persona.dic", lmd_music=None, dictd_music=None, **kwargs): """ Initiates the pocketsphinx instance. Arguments: speaker -- handles platform-independent audio output lmd -- filename of the full language model dictd -- filename of the full dictionary (.dic) lmd_persona -- filename of the 'Persona' language model (containing, e.g., 'Jasper') dictd_persona -- filename of the 'Persona' dictionary (.dic) """ # quirky bug where first import doesn't work try: import pocketsphinx as ps except: import pocketsphinx as ps hmdir = "/usr/local/share/pocketsphinx/model/hmm/en_US/hub4wsj_sc_8k" if lmd_music and dictd_music: self.speechRec_music = ps.Decoder(hmm=hmdir, lm=lmd_music, dict=dictd_music) self.speechRec_persona = ps.Decoder(hmm=hmdir, lm=lmd_persona, dict=dictd_persona) self.speechRec = ps.Decoder(hmm=hmdir, lm=lmd, dict=dictd)
def __init__(self, lmd, dictd, lmd_persona, dictd_persona, lmd_music=None, dictd_music=None): """ Initiates the pocketsphinx instance. Arguments: lmd -- filename of the full language model dictd -- filename of the full dictionary (.dic) lmd_persona -- filename of the 'Persona' language model (containing, e.g., 'Rex') dictd_persona -- filename of the 'Persona' dictionary (.dic) """ hmdir = "/usr/local/share/pocketsphinx/model/hmm/en_US/hub4wsj_sc_8k" if lmd_music and dictd_music: self.speechRec_music = ps.Decoder(hmm=hmdir, lm=lmd_music, dict=dictd_music) self.speechRec_persona = ps.Decoder(hmm=hmdir, lm=lmd_persona, dict=dictd_persona) self.speechRec = ps.Decoder(hmm=hmdir, lm=lmd, dict=dictd)
def __init__(self, vocabulary=None, vocabulary_keyword=None, vocabulary_music=None, hmm_dir="/usr/local/share/" + "pocketsphinx/model/hmm/en_US/hub4wsj_sc_8k"): """ Initiates the pocketsphinx instance. Arguments: vocabulary -- a PocketsphinxVocabulary instance vocabulary_keyword -- a PocketsphinxVocabulary instance (containing, e.g., 'Jasper') vocabulary_music -- (optional) a PocketsphinxVocabulary instance hmm_dir -- the path of the Hidden Markov Model (HMM) """ self._logger = logging.getLogger(__name__) # quirky bug where first import doesn't work try: import pocketsphinx as ps except: import pocketsphinx as ps self._logfiles = {} with tempfile.NamedTemporaryFile(prefix='psdecoder_music_', suffix='.log', delete=False) as f: self._logfiles[TranscriptionMode.MUSIC] = f.name with tempfile.NamedTemporaryFile(prefix='psdecoder_keyword_', suffix='.log', delete=False) as f: self._logfiles[TranscriptionMode.KEYWORD] = f.name with tempfile.NamedTemporaryFile(prefix='psdecoder_normal_', suffix='.log', delete=False) as f: self._logfiles[TranscriptionMode.NORMAL] = f.name self._decoders = {} if vocabulary_music is not None: self._decoders[TranscriptionMode.MUSIC] = \ ps.Decoder(hmm=hmm_dir, logfn=self._logfiles[TranscriptionMode.MUSIC], **vocabulary_music.decoder_kwargs) self._decoders[TranscriptionMode.KEYWORD] = \ ps.Decoder(hmm=hmm_dir, logfn=self._logfiles[TranscriptionMode.KEYWORD], **vocabulary_keyword.decoder_kwargs) self._decoders[TranscriptionMode.NORMAL] = \ ps.Decoder(hmm=hmm_dir, logfn=self._logfiles[TranscriptionMode.NORMAL], **vocabulary.decoder_kwargs)
def decodeSpeech(hmmd, lmdir, dictp, wavfile): speechRec = ps.Decoder(hmm=hmmd, lm=lmdir, dict=dictp) wavFile = file(wavfile, 'rb') wavFile.seek(44) speechRec.decode_raw(wavFile) result = speechRec.get_hyp() return result[0]
def __init__(self): # ensure the import for specific linux distrib as Ubuntu 14.04 for example try: import pocketsphinx as psphinx except: import pocketsphinx as psphinx # Checking if hmm directory exists if not path.exists(HMM_DIR): print("hmm_dir in '%s' does not exist!", HMM_DIR) raise EnvironmentError # Checking for missing files in hmm directory missing_hmm_files = [] for missing_file in ('feat.params', 'mdef', 'means', 'noisedict', 'transition_matrices', 'variances'): if not path.exists(path.join(HMM_DIR, missing_file)): missing_hmm_files.append(missing_file) # Checking the rest separately because we need only one of those two files mixweights = path.exists(path.join(HMM_DIR, 'mixture_weights')) sendump = path.exists(path.join(HMM_DIR, 'sendump')) if not mixweights and not sendump: missing_hmm_files.append('mixture_weights or sendump') if missing_hmm_files: print("[Warning] %s : hmm files are missing in hmm directory.", ', '.join(missing_hmm_files)) # Decoding instance and config if everything is OK config = psphinx.Decoder.default_config() config.set_string('-hmm', path.join(HMM_DIR)) config.set_string('-lm', path.join(HMM_DIR, '.lm.bin')) config.set_string('-dict', path.join('static/custom.dict')) config.set_string('-logfn', '/dev/null') self.decoder = psphinx.Decoder(config) self.stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=2048)
def get_decoder(): mind = oa.mind.active ret = oa.stt.decoders[mind.name] if not ret: # Configure Speech to text dictionaries ret = config_stt(mind.cache_dir, mind.kws.keys(), stat_mtime(mind.module)) # Process audio chunk by chunk. On keyphrase detected perform action and restart search config = pocketsphinx.Decoder.default_config() # config.set_string("-hmm", acoustic_parameters_directory) # set the path of the hidden Markov model (HMM) parameter files info('cur lang = ' + mind.lang) config.set_string( '-hmm', os.path.join(os.path.dirname(pocketsphinx.pocketsphinx.__file__), 'model', mind.lang)) # info("-lm", ret.lang_file) config.set_string("-lm", ret.lang_file) config.set_string("-dict", ret.dic_file) config.set_string( "-logfn", os.devnull ) # disable logging (logging causes unwanted output in terminal) ret.decoder = pocketsphinx.Decoder(config) oa.stt.decoders[mind.name] = ret return ret
def decode_speech(wavfile, hmm='/home/ec2-user/download/cmusphinx-5prealpha-en-us-2.0', lm="/home/ec2-user/download/cmusphinx-5.0-en-us.lm.dmp", dic="/home/ec2-user/download/pocketsphinx-0.8/model/lm/en_US/hub4.5000.dic"): """ Decodes a speech file """ try: import pocketsphinx as ps import sphinxbase except: print """Pocketsphinx and sphixbase is not installed in your system. Please install it with package manager. """ return ("Something went wrong","") sample_rate = '16000' speechRec = ps.Decoder(hmm=hmm, lm=lm, dict=dic, samprate=sample_rate) #speechRec = ps.Decoder(hmm=hmm, lm=lm, dict=dic) wavFile = file(wavfile,'rb') wavFile.seek(100) speechRec.decode_raw(wavFile) result = speechRec.get_hyp() print(type(result)) print(result) speech = result[0] ling_stats = analyze_response(speech) # rtn_dict = dict() # rtn_dict['speech'] = speech ling_stats['speech'] = speech return ling_stats
async def _start(self): config = pocketsphinx.Decoder.default_config() config.set_string('-hmm', self.hmm_path) config.set_string('-lm', self.lm_path) config.set_string('-dict', self.dict_path) self._decoder = pocketsphinx.Decoder(config) await super(PocketSphinxTranscriber, self)._start()
def main(): if len(sys.argv) != 2: print("Usage: {0} <wav file>".format(sys.argv[0])) sys.exit("Incorrect number of arguments") path = "/proj/speech/users/cs4706/asrhw/kmh2151/" grammar_file = "gram.jsgf" dictionary_file = "wlist5o.dic" wav_file = sys.argv[1] # Acoustic model am = ps_base + '/share/pocketsphinx/model/hmm/en_US/hub4_16k_4000s' decoder = ps.Decoder(am, os.path.join(path, grammar_file), os.path.join(path, dictionary_file)) # Run the Recognizer fh = file(wav_file, 'rb') decoder.decode_raw(fh) result = decoder.get_hyp() fh.close() output = result.split("'") print("Our output:") print(output[1]) concept_file = file("concept_table.txt", 'w') concept_file.write(output[1] + "\n\n") concept_file.write(createConceptTable(output[1])) concept_file.close()
def recognise_sphinx(audio, dictionary=None): """ Custom sphinx recogniser :param audio: :param dictionary: :return: """ # Ensure audio is of the correct format assert isinstance(audio, sr.AudioData), "``audio_data`` must be audio data" # The included language models require audio to be 16-bit mono 16 kHz in little-endian format raw_data = audio.get_raw_data(convert_rate=16000, convert_width=2) # Create decoder object config = ps.Decoder.default_config() if dictionary is not None and os.path.isfile(dictionary): config.set_string("-dict", dictionary) elif dictionary is not None: with open("log_file.txt", "a") as log_file: log_file.write(str(t.format_exc())) log_file.write('WARNING: "{0}" WAS NOT FOUND'.format(dictionary)) config.set_string("-dict", ps.get_model_path() + '\cmudict-en-us.dict') config.set_string("-hmm", ps.get_model_path() + '\en-us') config.set_string("-lm", ps.get_model_path() + '\en-us.lm.bin') # noinspection SpellCheckingInspection config.set_string("-logfn", os.devnull) decoder = ps.Decoder(config) # Obtain recognition results decoder.start_utt() # Begin utterance processing # Process audio data with recognition enabled (no_search = False), as a full utterance (full_utt = True) decoder.process_raw(raw_data, False, True) decoder.end_utt() # Stop utterance processing return decoder
def __init__(self, vocabulary, hmm_dir="/usr/local/share/" + "pocketsphinx/model/hmm/en_US/hub4wsj_sc_8k"): """ Initiates the pocketsphinx instance. Arguments: vocabulary -- a PocketsphinxVocabulary instance hmm_dir -- the path of the Hidden Markov Model (HMM) """ self._logger = logging.getLogger(__name__) # quirky bug where first import doesn't work try: import pocketsphinx as ps except: import pocketsphinx as ps with tempfile.NamedTemporaryFile(prefix='psdecoder_', suffix='.log', delete=False) as f: self._logfile = f.name self._decoder = ps.Decoder(hmm=hmm_dir, logfn=self._logfile, **vocabulary.decoder_kwargs)
def load_decoder(self) -> None: """Loads speech decoder if not cached.""" if self.decoder is None: import pocketsphinx # Load decoder settings (use speech-to-text configuration as a fallback) hmm_path = self.profile.read_path( self.profile.get("wake.pocketsphinx.acoustic_model", None) or self.profile.get("speech_to_text.pocketsphinx.acoustic_model") ) dict_path = self.profile.read_path( self.profile.get("wake.pocketsphinx.dictionary", None) or self.profile.get("speech_to_text.pocketsphinx.dictionary") ) self.threshold = float( self.profile.get("wake.pocketsphinx.threshold", 1e-40) ) self.keyphrase = self.profile.get("wake.pocketsphinx.keyphrase", "") assert len(self.keyphrase) > 0, "No wake keyphrase" # Verify that keyphrase words are in dictionary keyphrase_words = re.split(r"\s+", self.keyphrase) with open(dict_path, "r") as dict_file: word_dict = read_dict(dict_file) dict_upper = self.profile.get("speech_to_text.dictionary_upper", False) for word in keyphrase_words: if dict_upper: word = word.upper() else: word = word.lower() if word not in word_dict: self._logger.warning("%s not in dictionary", word) self._logger.debug( "Loading wake decoder with hmm=%s, dict=%s", hmm_path, dict_path ) decoder_config = pocketsphinx.Decoder.default_config() decoder_config.set_string("-hmm", hmm_path) decoder_config.set_string("-dict", dict_path) decoder_config.set_string("-keyphrase", self.keyphrase) decoder_config.set_string("-logfn", "/dev/null") decoder_config.set_float("-kws_threshold", self.threshold) mllr_path = self.profile.read_path( self.profile.get("wake.pocketsphinx.mllr_matrix") ) if os.path.exists(mllr_path): self._logger.debug( "Using tuned MLLR matrix for acoustic model: %s", mllr_path ) decoder_config.set_string("-mllr", mllr_path) self.decoder = pocketsphinx.Decoder(decoder_config) self.decoder_started = False
def __init__(self, vocabulary, hmm_dir="/usr/local/share/" + "pocketsphinx/model/hmm/en_US/hub4wsj_sc_8k", **kwargs): """ Initiates the pocketsphinx instance. Arguments: vocabulary -- a PocketsphinxVocabulary instance hmm_dir -- the path of the Hidden Markov Model (HMM) """ self._logger = logging.getLogger(__name__) # quirky bug where first import doesn't work try: import pocketsphinx as ps except Exception: import pocketsphinx as ps with tempfile.NamedTemporaryFile(prefix='psdecoder_', suffix='.log', delete=False) as f: self._logfile = f.name self._logger.debug( "Initializing PocketSphinx Decoder with hmm_dir " + "'%s'", hmm_dir) # Perform some checks on the hmm_dir so that we can display more # meaningful error messages if neccessary if not os.path.exists(hmm_dir): msg = ("hmm_dir '%s' does not exist! Please make sure that you " + "have set the correct hmm_dir in your profile.") % hmm_dir self._logger.error(msg) raise RuntimeError(msg) # Lets check if all required files are there. Refer to: # http://cmusphinx.sourceforge.net/wiki/acousticmodelformat # for details missing_hmm_files = [] for fname in ('mdef', 'feat.params', 'means', 'noisedict', 'transition_matrices', 'variances'): if not os.path.exists(os.path.join(hmm_dir, fname)): missing_hmm_files.append(fname) mixweights = os.path.exists(os.path.join(hmm_dir, 'mixture_weights')) sendump = os.path.exists(os.path.join(hmm_dir, 'sendump')) if not mixweights and not sendump: # We only need mixture_weights OR sendump missing_hmm_files.append('mixture_weights or sendump') if missing_hmm_files: self._logger.warning( "hmm_dir '%s' is missing files: %s. Please " + "make sure that you have set the correct " + "hmm_dir in your profile.", hmm_dir, ', '.join(missing_hmm_files)) self._decoder = ps.Decoder(hmm=hmm_dir, logfn=self._logfile, **vocabulary.decoder_kwargs)
def load_decoder(self) -> None: if self.decoder is None: # Load decoder import pocketsphinx ps_config = self.profile.get("speech_to_text.pocketsphinx") # Load decoder settings hmm_path = self.profile.read_path(ps_config["acoustic_model"]) dict_path = self.profile.read_path(ps_config["dictionary"]) lm_path = self.profile.read_path(ps_config["language_model"]) self._logger.debug("Loading decoder with hmm=%s, dict=%s, lm=%s" % (hmm_path, dict_path, lm_path)) decoder_config = pocketsphinx.Decoder.default_config() decoder_config.set_string("-hmm", hmm_path) decoder_config.set_string("-dict", dict_path) decoder_config.set_string("-lm", lm_path) decoder_config.set_string("-logfn", "/dev/null") mllr_path = self.profile.read_path(ps_config["mllr_matrix"]) if os.path.exists(mllr_path): self._logger.debug( "Using tuned MLLR matrix for acoustic model: %s" % mllr_path) decoder_config.set_string("-mllr", mllr_path) self.decoder = pocketsphinx.Decoder(decoder_config)
def get_decoder( acoustic_model: str, dictionary: str, language_model: str, mllr_matrix: str, debug: bool = False, ) -> pocketsphinx.Decoder: """Loads the pocketsphinx decoder from command-line arguments.""" start_time = time.time() decoder_config = pocketsphinx.Decoder.default_config() decoder_config.set_string("-hmm", acoustic_model) decoder_config.set_string("-dict", dictionary) decoder_config.set_string("-lm", language_model) if not debug: decoder_config.set_string("-logfn", os.devnull) if mllr_matrix and os.path.exists(mllr_matrix): decoder_config.set_string("-mllr", mllr_matrix) decoder = pocketsphinx.Decoder(decoder_config) end_time = time.time() logger.debug( f"Successfully loaded decoder in {end_time - start_time} second(s)") return decoder
def recognize(wav_file): """ Run speech recognition on a given file. """ speech_rec = pocketsphinx.Decoder(hmm=HMDIR, lm=LMDIR, dict=DICTD) wav_file = file(wav_file, 'rb') speech_rec.decode_raw(wav_file) result = speech_rec.get_hyp() return result
def __init__(self, lmd, dictd, lmd_persona, dictd_persona, lmd_music=None, dictd_music=None): hmdir = "/usr/local/share/pocketsphinx/model/hmm/en_US/hub4wsj_sc_8k" if lmd_music and dictd_music: self.speechRec_music = ps.Decoder(hmm=hmdir, lm=lmd_music, dict=dictd_music) self.speechRec_persona = ps.Decoder(hmm=hmdir, lm=lmd_persona, dict=dictd_persona) self.speechRec = ps.Decoder(hmm=hmdir, lm=lmd, dict=dictd)
def decode(self, audio, do_finalize, sample_rate=DEFAULT_SAMPLE_RATE, stream_id=DEFAULT_STREAM_ID): if self._engine == ASR_ENGINE_NNET3: if not stream_id in self.asr_decoders: self.asr_decoders[stream_id] = KaldiNNet3OnlineDecoder( self.nnet3_model) decoder = self.asr_decoders[stream_id] decoder.decode(sample_rate, np.array(audio, dtype=np.float32), do_finalize) hstr, confidence = decoder.get_decoded_string() hstr = hstr.strip() elif self._engine == ASR_ENGINE_POCKETSPHINX: if sample_rate != self.ps_samplerate: raise Exception( 'decode: samplerate does not match model: %d vs %d' % (sample_rate, self.ps_samplerate)) if not stream_id in self.asr_decoders: import pocketsphinx self.asr_decoders[stream_id] = pocketsphinx.Decoder( self.ps_config) self.asr_in_utt[stream_id] = False decoder = self.asr_decoders[stream_id] if not self.asr_in_utt[stream_id]: decoder.start_utt() self.asr_in_utt[stream_id] = True audios = struct.pack('<%dh' % len(audio), *audio) decoder.process_raw(audios, False, False) if not do_finalize: return None, 0.0 decoder.end_utt() self.asr_in_utt[stream_id] = False hypothesis = decoder.hyp() logmath = decoder.get_logmath() hstr = hypothesis.hypstr.decode('utf8').strip() confidence = logmath.exp(hypothesis.prob) else: raise Exception('unknown ASR engine: %s' % self._engine) return hstr, confidence
def decodeSpeech(hmdir, lmd, dictd, audio_file): # pocketsphinx wav recognition process. Do not modify ! subprocess.call(record, shell=True) speechRec = ps.Decoder(hmm=hmdir, lm=lmd, dict=dictd) audio_file2 = file(audio_file, 'rb') audio_file2.seek(44) speechRec.decode_raw(audio_file2) result = speechRec.get_hyp() return result[0]
def __init__(self, acoustic_parameters_directory, language_model_file, phoneme_dictionary_file): self.config = pocketsphinx.Decoder.default_config() self.config.set_string( "-hmm", acoustic_parameters_directory ) # set the path of the hidden Markov model (HMM) parameter files self.config.set_string("-lm", language_model_file) self.config.set_string("-dict", phoneme_dictionary_file) self.config.set_string("-logfn", os.devnull) self.decoder = pocketsphinx.Decoder(self.config)
def decode_recording(filename): """ Decode recording """ recognition = pocketsphinx.Decoder(hmm=HMDIR, lm=LMDIR, dict=DICTD) filename = file(filename, 'rb') filename.seek(44) print "debug" recognition.decode_raw(filename) command = recognition.get_hyp() return command
def main(): d = ps.Decoder() addLM(d, LM1, LM1_NAME) addLM(d, LM2, LM2_NAME) decodeAudio(d, "utt01", RAW_FILE, LM1_NAME) decodeAudio(d, "utt02", RAW_FILE, LM2_NAME) decodeAudio(d, "utt03", RAW_FILE, LM1_NAME) decodeAudio(d, "utt04", RAW_FILE, LM2_NAME)
def decodeSpeech(hmmd,lmdir,dictp,wavfile): import pocketsphinx as ps import sphinxbase speechRec = ps.Decoder(hmm = hmmd, lm= lmdir, dict = dictp) wavFile = file(wavfile,'rb') wavFile.seek(44) speechRec.decode_raw(wavFile) result = speechRec.get_hyp() return result[0]
def load_decoder(self) -> None: """Load Pocketsphinx HMM/LM/Dictionary.""" if self.decoder is None: # Load decoder import pocketsphinx ps_config = self.profile.get("speech_to_text.pocketsphinx", {}) # Load decoder settings hmm_path = self.profile.read_path( ps_config.get("acoustic_model", "acoustic_model") ) if self.open_transcription: self._logger.debug("Open transcription mode") # Use base language model/dictionary dict_path = self.profile.read_path( ps_config.get("base_dictionary", "base_dictionary.txt") ) lm_path = self.profile.read_path( ps_config.get("base_language_model", "base_language_model.txt") ) else: # Custom voice commands dict_path = self.profile.read_path( ps_config.get("dictionary", "dictionary.txt") ) lm_path = self.profile.read_path( ps_config.get("language_model", "language_model.txt") ) self._logger.debug( "Loading decoder with hmm=%s, dict=%s, lm=%s", hmm_path, dict_path, lm_path, ) decoder_config = pocketsphinx.Decoder.default_config() decoder_config.set_string("-hmm", hmm_path) decoder_config.set_string("-dict", dict_path) decoder_config.set_string("-lm", lm_path) decoder_config.set_string("-logfn", "/dev/null") mllr_path = self.profile.read_path(ps_config["mllr_matrix"]) if os.path.exists(mllr_path): self._logger.debug( "Using tuned MLLR matrix for acoustic model: %s", mllr_path ) decoder_config.set_string("-mllr", mllr_path) self.decoder = pocketsphinx.Decoder(decoder_config)
def recognize_sphinx(self, frame_data): print("recognizing using PocketSphinx") LMD = "/home/root/led-speech-edison/lm/0410.lm" DICTD = "/home/root/led-speech-edison/lm/0410.dic" decoder = ps.Decoder(lm=LMD, dict=DICTD) decoder.start_utt() decoder.process_raw(frame_data, False, True) decoder.end_utt() #print("decoder works fine") hypothesis = decoder.get_hyp() if hypothesis is not None: return hypothesis[0]
def stt_local(hmmd, lmdir, dictp, wavfile): try: import pocketsphinx as ps import sphinxbase except: print ''' Pocket sphinx and sphinxbase is not installed..''' speechRec = ps.Decoder(hmm=hmmd, lm=lmdir, dict=dictp) wavFile = file(wavfile, 'rb') #??? wavFile.seek(44) speechRec.decode_raw(wavFile) result = speechRec.get_hyp() return result[0]
def __init__(self, lmd="languagemodel.lm", dictd="dictionary.dic", lmd_persona="languagemodel_persona.lm", dictd_persona="dictionary_persona.dic", lmd_music=None, dictd_music=None, **kwargs): """ Initiates the pocketsphinx instance. Arguments: speaker -- handles platform-independent audio output lmd -- filename of the full language model dictd -- filename of the full dictionary (.dic) lmd_persona -- filename of the 'Persona' language model (containing, e.g., 'Jasper') dictd_persona -- filename of the 'Persona' dictionary (.dic) """ # quirky bug where first import doesn't work try: import pocketsphinx as ps except: import pocketsphinx as ps hmm_dir = None # Try to get hmm_dir from config profile_path = os.path.join(os.path.dirname(__file__), 'profile.yml') if os.path.exists(profile_path): with open(profile_path, 'r') as f: profile = yaml.safe_load(f) if 'pocketsphinx' in profile and 'hmm_dir' in profile['pocketsphinx']: hmm_dir = profile['pocketsphinx']['hmm_dir'] if not hmm_dir: hmm_dir = "/usr/local/share/pocketsphinx/model/hmm/en_US/hub4wsj_sc_8k" if lmd_music and dictd_music: self.speechRec_music = ps.Decoder(hmm=hmm_dir, lm=lmd_music, dict=dictd_music) self.speechRec_persona = ps.Decoder( hmm=hmm_dir, lm=lmd_persona, dict=dictd_persona) self.speechRec = ps.Decoder(hmm=hmm_dir, lm=lmd, dict=dictd)
def run_decoder(): decoder = wake_decoders.get(profile.name) if decoder is None: logging.info('Loading wake decoder with hmm=%s, dict=%s' % (hmm_path, dict_path)) decoder_config = pocketsphinx.Decoder.default_config() decoder_config.set_string('-hmm', hmm_path) decoder_config.set_string('-dict', dict_path) decoder_config.set_string('-keyphrase', keyphrase) decoder_config.set_float('-kws_threshold', kws_threshold) decoder = pocketsphinx.Decoder(decoder_config) wake_decoders[profile.name] = decoder decoder.start_utt() finished_event = threading.Event() def stream_callback(data, frame_count, time_info, status): decoder.process_raw(data, False, False) hyp = decoder.hyp() if hyp: decoder.end_utt() logging.debug('Keyphrase detected') finished_event.set() return (data, pyaudio.paComplete) return (data, pyaudio.paContinue) audio = pyaudio.PyAudio() data_format = pyaudio.get_format_from_width(2) mic = audio.open(format=data_format, channels=1, rate=16000, input=True, input_device_index=device_index, stream_callback=stream_callback) # Block until wake word is detected mic.start_stream() finished_event.wait() # Shut down audio input mic.stop_stream() mic.close() audio.terminate() # Pass to next stage wake_word_detected()
def __init__(self): """modeldir = '/home/sudhin/pocketsphinx-5prealpha/model' config = ps.Decoder.default_config() config.set_string('-hmm', path.join(modeldir, 'en-us/en-us')) config.set_string('-lm', path.join(modeldir, 'en-us/en-us.lm.bin')) config.set_string('-dict', path.join(modeldir, 'en-us/cmudict-en-us.dict')) self._decoder = ps.Decoder(config)""" config = ps.Decoder.default_config() config.set_string('-hmm', '/usr/local/share/pocketsphinx/model/en-us/en-us') config.set_string('-lm', '/home/sudhin/JARVIS/beg/mine/1894.lm') config.set_string('-dict', '/home/sudhin/JARVIS/beg/mine/1894.dic') self._decoder = ps.Decoder(config)