def speech_to_intent(self): """ Starts an audio stream and listens to everything mentioned. If any commmands are detected, performs inference to determine intent. Returned types of intent are """ # Initialize libraries rhino = None pa = None stream = None try: rhino = pvrhino.create(library_path=self.rhino_library_path, model_path=self.rhino_model_path, context_path=self.rhino_context_path) # Initialize PyAudio and an audio stream pa = pyaudio.PyAudio() stream = pa.open( format=FORMAT, channels=CHANNELS, rate=rhino.sample_rate, input=True, # Specify as input frames_per_buffer=rhino.frame_length) # Initialize voice activity detector and audio stream with agressiveness between 0 and 3 spinner = Halo(spinner='line', color='magenta') print(rhino.context_info) # Rhino detection while True: spinner.start() pcm = stream.read(rhino.frame_length) unpacked = struct.unpack_from( "h" * rhino.frame_length, pcm ) # Unpack "frame length" amount of "short" data types in C ("h" string) from buffer. Read more on struct Format Strings to understand. done = rhino.process(unpacked) if done: spinner.stop() result = rhino.get_inference() self.print_intent(result) return 1 except KeyboardInterrupt: print("Stopping speech-to-intent detection...") finally: if rhino is not None: rhino.delete() if stream is not None: stream.close() if pa is not None: pa.terminate()
def __init__(self, speaker): self.speaker = speaker # used to create the beep() sound self.handle = pvrhino.create(context_path='./models/Irma_Rules_2.rhn', sensitivity=0.25) print("sample_rate", self.handle.sample_rate, "frame_len:", self.handle.frame_length) self.pa = pyaudio.PyAudio() self.audio_stream = self.pa.open( rate=self.handle.sample_rate, channels=1, format=pyaudio.paInt16, input=True, frames_per_buffer=self.handle.frame_length) self.recognizer = sr.Recognizer() # obtain audio from the microphone print('NONSPEAKING', self.recognizer.non_speaking_duration) print('PAUSE THRESHOLD', self.recognizer.pause_threshold) self.recognizer.pause_threshold = 0.5 # default 0.8 self.recognizer.operation_timeout = 2 self.recognizer.energy_threshold = 3000 with sr.Microphone() as source: self.recognizer.adjust_for_ambient_noise(source) # Thread and flags self.ON = True self.running = True self.wakeword_flag = False self.voice_item = parsepy.item() self.voice_item.upc = ' ' self.voice_item.imageURL = ' ' self.voice_item.name = ' ' self.command = 'None' self.wakeword_thread = threading.Thread(target=self.wakeword_run, name="wakeword_thread") self.wakeword_thread.start() print('WakeWord Initialized')
def run(self): """ Creates an input audio stream, instantiates an instance of Rhino object, and infers the intent from spoken commands. """ rhino = None recorder = None wav_file = None try: rhino = pvrhino.create(access_key=self._access_key, library_path=self._library_path, model_path=self._model_path, context_path=self._context_path, require_endpoint=self._require_endpoint) recorder = PvRecorder(device_index=self._audio_device_index, frame_length=rhino.frame_length) recorder.start() if self._output_path is not None: wav_file = wave.open(self._output_path, "w") wav_file.setparams((1, 2, 16000, 512, "NONE", "NONE")) print(rhino.context_info) print() print(f"Using device: {recorder.selected_device}") print("Listening...") print() while True: pcm = recorder.read() if wav_file is not None: wav_file.writeframes(struct.pack("h" * len(pcm), *pcm)) is_finalized = rhino.process(pcm) if is_finalized: inference = rhino.get_inference() if inference.is_understood: print('{') print(" intent : '%s'" % inference.intent) print(' slots : {') for slot, value in inference.slots.items(): print(" %s : '%s'" % (slot, value)) print(' }') print('}\n') else: print("Didn't understand the command.\n") except pvrhino.RhinoInvalidArgumentError as e: print("One or more arguments provided to Rhino is invalid: {\n" + f"\t{self._access_key=}\n" + f"\t{self._library_path=}\n" + f"\t{self._model_path=}\n" + f"\t{self._context_path=}\n" + f"\t{self._require_endpoint=}\n" + "}") print( f"If all other arguments seem valid, ensure that '{self._access_key}' is a valid AccessKey" ) raise e except pvrhino.RhinoActivationError as e: print("AccessKey activation error") raise e except pvrhino.RhinoActivationLimitError as e: print( f"AccessKey '{self._access_key}' has reached it's temporary device limit" ) raise e except pvrhino.RhinoActivationRefusedError as e: print(f"AccessKey '{self._access_key}' refused") raise e except pvrhino.RhinoActivationThrottledError as e: print(f"AccessKey '{self._access_key}' has been throttled") raise e except pvrhino.RhinoError as e: print(f"Failed to initialize Rhino") raise e except KeyboardInterrupt: print('Stopping ...') finally: if recorder is not None: recorder.delete() if rhino is not None: rhino.delete() if wav_file is not None: wav_file.close()
def run(self): """ Creates an input audio stream, instantiates an instance of Rhino object, and infers the intent from spoken commands. """ rhino = None pa = None audio_stream = None try: rhino = pvrhino.create(library_path=self._library_path, model_path=self._model_path, context_path=self._context_path) pa = pyaudio.PyAudio() audio_stream = pa.open(rate=rhino.sample_rate, channels=1, format=pyaudio.paInt16, input=True, frames_per_buffer=rhino.frame_length, input_device_index=self._audio_device_index) print(rhino.context_info) print() while True: pcm = audio_stream.read(rhino.frame_length) pcm = struct.unpack_from("h" * rhino.frame_length, pcm) if self._output_path is not None: self._recorded_frames.append(pcm) is_finalized = rhino.process(pcm) if is_finalized: inference = rhino.get_inference() if inference.is_understood: print('{') print(" intent : '%s'" % inference.intent) print(' slots : {') for slot, value in inference.slots.items(): print(" %s : '%s'" % (slot, value)) print(' }') print('}\n') else: print("Didn't understand the command.\n") except KeyboardInterrupt: print('Stopping ...') finally: if audio_stream is not None: audio_stream.close() if pa is not None: pa.terminate() if rhino is not None: rhino.delete() if self._output_path is not None and len( self._recorded_frames) > 0: recorded_audio = np.concatenate(self._recorded_frames, axis=0).astype(np.int16) soundfile.write(os.path.expanduser(self._output_path), recorded_audio, samplerate=rhino.sample_rate, subtype='PCM_16')
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input_audio_path', help='Absolute path to input audio file.', required=True) parser.add_argument('--context_path', help="Absolute path to context file.", required=True) parser.add_argument('--library_path', help='Absolute path to dynamic library.', default=pvrhino.LIBRARY_PATH) parser.add_argument( '--model_path', help='Absolute path to the file containing model parameters.', default=pvrhino.MODEL_PATH) parser.add_argument( '--sensitivity', help= "Inference sensitivity. It should be a number within [0, 1]. A higher sensitivity value results in " + "fewer misses at the cost of (potentially) increasing the erroneous inference rate.", default=0.5) args = parser.parse_args() rhino = pvrhino.create(library_path=args.library_path, model_path=args.model_path, context_path=args.context_path, sensitivity=args.sensitivity) audio, sample_rate = soundfile.read(args.input_audio_path, dtype='int16') if audio.ndim == 2: print( "Picovoice processes single-channel audio but stereo file is provided. Processing left channel only." ) audio = audio[0, :] if sample_rate != rhino.sample_rate: raise ValueError("Audio file should have a sample rate of %d. got %d" % (rhino.sample_rate, sample_rate)) num_frames = len(audio) // rhino.frame_length for i in range(num_frames): frame = audio[i * rhino.frame_length:(i + 1) * rhino.frame_length] is_finalized = rhino.process(frame) if is_finalized: inference = rhino.get_inference() if inference.is_understood: print('{') print(" intent : '%s'" % inference.intent) print(' slots : {') for slot, value in inference.slots.items(): print(" %s : '%s'" % (slot, value)) print(' }') print('}') else: print("Didn't understand the command.") break rhino.delete()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input_audio_path', help='Absolute path to input audio file.', required=True) parser.add_argument( '--access_key', help= 'AccessKey obtained from Picovoice Console (https://picovoice.ai/console/)', required=True) parser.add_argument('--context_path', help="Absolute path to context file.", required=True) parser.add_argument('--library_path', help='Absolute path to dynamic library.', default=pvrhino.LIBRARY_PATH) parser.add_argument( '--model_path', help='Absolute path to the file containing model parameters.', default=pvrhino.MODEL_PATH) parser.add_argument( '--sensitivity', help= "Inference sensitivity. It should be a number within [0, 1]. A higher sensitivity value results in " + "fewer misses at the cost of (potentially) increasing the erroneous inference rate.", type=float, default=0.5) parser.add_argument( '--require_endpoint', help= "If set to `False`, Rhino does not require an endpoint (chunk of silence) before finishing inference.", default='True', choices=['True', 'False']) args = parser.parse_args() if args.require_endpoint.lower() == 'false': require_endpoint = False else: require_endpoint = True try: rhino = pvrhino.create(access_key=args.access_key, library_path=args.library_path, model_path=args.model_path, context_path=args.context_path, sensitivity=args.sensitivity, require_endpoint=require_endpoint) except pvrhino.RhinoInvalidArgumentError as e: print(f"One or more arguments provided to Rhino is invalid: {args}") print( f"If all other arguments seem valid, ensure that '{args.access_key}' is a valid AccessKey" ) raise e except pvrhino.RhinoActivationError as e: print("AccessKey activation error") raise e except pvrhino.RhinoActivationLimitError as e: print( f"AccessKey '{args.access_key}' has reached it's temporary device limit" ) raise e except pvrhino.RhinoActivationRefusedError as e: print(f"AccessKey '{args.access_key}' refused") raise e except pvrhino.RhinoActivationThrottledError as e: print(f"AccessKey '{args.access_key}' has been throttled") raise e except pvrhino.RhinoError as e: print(f"Failed to initialize Rhino") raise e audio = read_file(args.input_audio_path, rhino.sample_rate) num_frames = len(audio) // rhino.frame_length for i in range(num_frames): frame = audio[i * rhino.frame_length:(i + 1) * rhino.frame_length] is_finalized = rhino.process(frame) if is_finalized: inference = rhino.get_inference() if inference.is_understood: print('{') print(" intent : '%s'" % inference.intent) print(' slots : {') for slot, value in inference.slots.items(): print(" %s : '%s'" % (slot, value)) print(' }') print('}') else: print("Didn't understand the command.") break rhino.delete()
def __init__(self, keyword_path, wake_word_callback, context_path, inference_callback, porcupine_library_path=None, porcupine_model_path=None, porcupine_sensitivity=0.5, rhino_library_path=None, rhino_model_path=None, rhino_sensitivity=0.5): """ Constructor. :param keyword_path: Absolute path to Porcupine's keyword model file. :param wake_word_callback: User-defined callback invoked upon detection of the wake phrase. The callback accepts no input arguments. :param context_path: Absolute path to file containing context parameters. A context represents the set of expressions (spoken commands), intents, and intent arguments (slots) within a domain of interest. :param inference_callback: User-defined callback invoked upon completion of intent inference. The callback accepts a single input argument of type `Inference` that exposes the following immutable fields: (1) `is_understood` is a flag indicating if the spoken command is understood. (2) `intent` is the inferred intent from the voice command. If the command is not understood then it's set to `None`. (3) `slots` is a dictionary mapping slot keys to their respective values. If the command is not understood then it's set to an empty dictionary. :param porcupine_library_path: Absolute path to Porcupine's dynamic library. :param porcupine_model_path: Absolute path to the file containing Porcupine's model parameters. :param porcupine_sensitivity: Wake word detection sensitivity. It should be a number within [0, 1]. A higher sensitivity results in fewer misses at the cost of increasing the false alarm rate. :param rhino_library_path: Absolute path to Rhino's dynamic library. :param rhino_model_path: Absolute path to the file containing Rhino's model parameters. :param rhino_sensitivity: Inference sensitivity. It should be a number within [0, 1]. A higher sensitivity value results in fewer misses at the cost of (potentially) increasing the erroneous inference rate. """ if not os.path.exists(keyword_path): raise ValueError( "Couldn't find Porcupine's keyword file at '%s'." % keyword_path) if not callable(wake_word_callback): raise ValueError("Invalid wake word callback.") if not os.path.exists(context_path): raise ValueError("Couldn't find Rhino's context file at '%s'." % context_path) if not callable(inference_callback): raise ValueError("Invalid inference callback.") if porcupine_library_path is not None and not os.path.exists( porcupine_library_path): raise ValueError( "Couldn't find Porcupine's dynamic library at '%s'." % porcupine_library_path) if porcupine_model_path is not None and not os.path.exists( porcupine_model_path): raise ValueError("Couldn't find Porcupine's model file at '%s'." % porcupine_model_path) if not 0 <= porcupine_sensitivity <= 1: raise ValueError( "Porcupine's sensitivity should be within [0, 1].") if rhino_library_path is not None and not os.path.exists( rhino_library_path): raise ValueError("Couldn't find Rhino's dynamic library at '%s'." % rhino_library_path) if rhino_model_path is not None and not os.path.exists( rhino_model_path): raise ValueError("Couldn't find Rhino's model file at '%s'." % rhino_model_path) if not 0 <= rhino_sensitivity <= 1: raise ValueError("Rhino's sensitivity should be within [0, 1]") self._porcupine = pvporcupine.create( library_path=porcupine_library_path, model_path=porcupine_model_path, keyword_paths=[keyword_path], sensitivities=[porcupine_sensitivity]) self._wake_word_callback = wake_word_callback self._is_wake_word_detected = False self._rhino = pvrhino.create(library_path=rhino_library_path, model_path=rhino_model_path, context_path=context_path, sensitivity=rhino_sensitivity) self._inference_callback = inference_callback assert self._porcupine.sample_rate == self._rhino.sample_rate self._sample_rate = self._porcupine.sample_rate assert self._porcupine.frame_length == self._rhino.frame_length self._frame_length = self._porcupine.frame_length
import pvrhino import struct import pyaudio import os pa = None handle = None audio_stream = None try: pa = pyaudio.PyAudio() inpath = "/home/pi/python/picovoice/chess_en_raspberry-pi_2021-08-02-utc_v1_6_0.rhn" handle = pvrhino.create(inpath) audio_stream = pa.open(rate=handle.sample_rate, channels=1, format=pyaudio.paInt16, input=True, frames_per_buffer=handle.frame_length) def get_next_audio_frame(): pcm = audio_stream.read(handle.frame_length) pcm = struct.unpack_from("h" * handle.frame_length, pcm) return pcm letterDic = { "ALPHA": 'A', "BRAVO": 'B', "CHARLIE": 'C', "DELTA": 'D',