def setup(self): # PocketSphinx configuration ps_config = Decoder.default_config() # Set recognition model to US ps_config.set_string('-hmm', os.path.join(get_model_path(), 'en-us')) ps_config.set_string('-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict')) # Specify recognition key phrase ps_config.set_string('-keyphrase', self._tconfig['phrase']) ps_config.set_float('-kws_threshold', float(self._tconfig['threshold'])) # Hide the VERY verbose logging information when not in debug if logging.getLogger('alexapi').getEffectiveLevel() != logging.DEBUG: ps_config.set_string('-logfn', '/dev/null') # Process audio chunk by chunk. On keyword detected perform action and restart search self._decoder = Decoder(ps_config)
""" https://pypi.org/project/pocketsphinx/ https://cmusphinx.github.io/wiki/ """ import os from pocketsphinx import LiveSpeech, get_model_path model_path = get_model_path() speech = LiveSpeech( verbose=False, sampling_rate=16000, buffer_size=2048, no_search=False, full_utt=False, hmm=os.path.join(model_path, 'en-us'), lm=os.path.join(model_path, 'en-us.lm.bin'), dic=os.path.join(model_path, 'cmudict-en-us.dict') ) for phrase in speech: print(phrase)
from alexapi.device_platforms.desktop import DesktopPlatform platform = DesktopPlatform(config) # Setup recorded = False servers = ["127.0.0.1:11211"] mc = Client(servers, debug=1) path = os.path.realpath(__file__).rstrip(os.path.basename(__file__)) resources_path = os.path.join(path, 'resources', '') tmp_path = os.path.join(tempfile.mkdtemp(prefix='AlexaPi-runtime-'), '') # PocketSphinx configuration ps_config = Decoder.default_config() # Set recognition model to US ps_config.set_string('-hmm', os.path.join(get_model_path(), 'en-us')) ps_config.set_string('-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict')) # Specify recognition key phrase ps_config.set_string('-keyphrase', config['sphinx']['trigger_phrase']) ps_config.set_float('-kws_threshold', 1e-5) # Hide the VERY verbose logging information if not debug: ps_config.set_string('-logfn', '/dev/null') # Process audio chunk by chunk. On keyword detected perform action and restart search decoder = Decoder(ps_config) decoder.start_utt() # Variables
import os from pocketsphinx import LiveSpeech, get_model_path model_path = get_model_path() BASE_PATH = os.path.dirname(os.path.realpath(__file__)) HMDIR = os.path.join(BASE_PATH, "hmm/en-us/cmusphinx-en-us.tar.gz") LMDIR = os.path.join(BASE_PATH, "lm/en-us/en-us.lm.bin") DICTD = os.path.join(BASE_PATH, "dict/en-us/cmudict-en-us.dict") print(LMDIR) print(os.path.join(model_path, 'en-us/en-us.lm.bin')) speech = LiveSpeech( verbose=False, sampling_rate=16000, buffer_size=2048, no_search=False, full_utt=False, hmm=os.path.join(model_path, 'en-us'), lm=os.path.join(model_path, 'en-us.lm.bin'), dic=os.path.join(model_path, 'cmudict-en-us.dict') ) for phrase in speech: print(phrase)
platform = DesktopPlatform(config) #Setup recorded = False servers = ["127.0.0.1:11211"] mc = Client(servers, debug=1) path = os.path.realpath(__file__).rstrip(os.path.basename(__file__)) resources_path = os.path.join(path, 'resources', '') tmp_path = os.path.join(tempfile.mkdtemp(prefix='AlexaPi-runtime-'), '') # PocketSphinx configuration ps_config = Decoder.default_config() # Set recognition model to US ps_config.set_string('-hmm', os.path.join(get_model_path(), 'en-us')) ps_config.set_string('-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict')) #Specify recognition key phrase ps_config.set_string('-keyphrase', config['sphinx']['trigger_phrase']) ps_config.set_float('-kws_threshold',1e-5) # Hide the VERY verbose logging information ps_config.set_string('-logfn', '/dev/null') # Process audio chunk by chunk. On keyword detected perform action and restart search decoder = Decoder(ps_config) decoder.start_utt() #Variables p = None
""" Constants, used by the app Attributes: _PACKAGE_LOCATION (str): location of the application WORKING_DIRECTORY (str): location of the working directory within the application SPEECH_MODEL_PATH (str): location of the speech recognition model SPEECH_DATA_PATH (str): location of the speech recognition data, used by the pocketsphinx library """ import os from pocketsphinx import get_data_path, get_model_path _PACKAGE_LOCATION = '/'.join(os.path.realpath(__file__).split('/')[:-2]) WORKING_DIRECTORY = _PACKAGE_LOCATION SPEECH_MODEL_PATH = get_model_path() SPEECH_DATA_PATH = get_data_path() if __name__ == '__main__': print(WORKING_DIRECTORY)
Всегда работают 2 процесса: ''' import wave import time import math import struct import threading import os from os import path import numpy as np import pyaudio from pocketsphinx import get_model_path from pocketsphinx.pocketsphinx import * modeldir = get_model_path() # директория, где лежат файлы словаря PocketSphinx md = path.dirname(__file__) # Create a decoder with certain model config = Decoder.default_config() config.set_string('-logfn', '/dev/null') config.set_string('-hmm', os.path.join(modeldir, 'en-us')) config.set_string('-dict', os.path.join(md, '8070.dic')) config.set_string('-lm', os.path.join(md, '8070.lm')) THRESHOLD = 15 # Порог громкости, если звук выше - сработает распознавание PRE_REC_LEN = 3 # длина предзаписи(чтоб ловить начало слова) REC_LEN = 8 # длина записи самого слова SHORT_NORMALIZE = (1.0 / 32768.0) CHUNK = 6144 # длина массива для записи 1 кванта звука для 48КГц
RATE = 16000 THRESHOLD = 1000 # The threshold intensity that defines silence # and noise signal (an int. lower than THRESHOLD is silence). SILENCE_LIMIT = 1 # Silence limit in seconds. The max ammount of seconds where # only silence is recorded. When this time passes the # recording finishes and the file is delivered. PREV_AUDIO = 1.0 # Previous audio (in seconds) to prepend. When noise # is detected, how much of previously recorded audio is # prepended. This helps to prevent chopping the beggining # of the phrase. HOTWORD_THRESHOLD = -5000 # Higher values indicate that pocketsphinx # is sure about the word. Adjust to your needs. # Pocketsphinx files MODELDIR = get_model_path() # Default model path, replace if # you use a custom model in a custom location. # WORDS HOTWORD = "Anastasia" SPEAK_UNDERSTOOD = "Understood." SPEAK_FAILURE = "Sorry?" SPEAK_SUCCESS = "Okay." SPEAK_READY = HOTWORD + ". At your service." # Decoder setup config = DefaultConfig() config.set_string('-hmm', path.join(MODELDIR, 'en-us')) # you can use a custom model config.set_string('-lm', path.join(MODELDIR, 'en-us.lm.bin')) # you can use a custom dictionary
try: import google.cloud.speech import google.auth.exceptions except (ImportError, ModuleNotFoundError): logging.warning( "Speech-to-text recognition using Google online services is not " "available (use command `pip install google-api-core google-auth " "google-cloud google-cloud-speech googleapis-common-protos` to get " "it). Transcription will be unavailable using that service this " "session.") _hasGoogleCloud = False try: import pocketsphinx sphinxLangs = [folder.stem for folder in Path(pocketsphinx.get_model_path()).glob('??-??')] haveSphinx = True except ModuleNotFoundError: haveSphinx = False sphinxLangs = None # Constants related to the transcription system. TRANSCR_LANG_DEFAULT = 'en-US' # Values for specifying recognizer engines. This dictionary is used by Builder # to populate the component property dropdown. recognizerEngineValues = { 0: ('sphinx', "CMU Pocket Sphinx", "Offline, Built-in"), 1: ('google', "Google Cloud Speech API", "Online, Key Required"), }