def speech_to_intent(self):
        """
        Starts an audio stream and listens to everything mentioned. If any commmands are detected,
        performs inference to determine intent. Returned types of intent are 
        """

        # Initialize libraries
        rhino = None
        pa = None
        stream = None

        try:

            rhino = pvrhino.create(library_path=self.rhino_library_path,
                                   model_path=self.rhino_model_path,
                                   context_path=self.rhino_context_path)

            # Initialize PyAudio and an audio stream
            pa = pyaudio.PyAudio()
            stream = pa.open(
                format=FORMAT,
                channels=CHANNELS,
                rate=rhino.sample_rate,
                input=True,  # Specify as input
                frames_per_buffer=rhino.frame_length)

            # Initialize voice activity detector and audio stream with agressiveness between 0 and 3
            spinner = Halo(spinner='line', color='magenta')
            print(rhino.context_info)

            # Rhino detection
            while True:

                spinner.start()
                pcm = stream.read(rhino.frame_length)
                unpacked = struct.unpack_from(
                    "h" * rhino.frame_length, pcm
                )  # Unpack "frame length" amount of "short" data types in C ("h" string) from buffer. Read more on struct Format Strings to understand.
                done = rhino.process(unpacked)

                if done:
                    spinner.stop()
                    result = rhino.get_inference()
                    self.print_intent(result)
                    return 1

        except KeyboardInterrupt:
            print("Stopping speech-to-intent detection...")

        finally:
            if rhino is not None:
                rhino.delete()

            if stream is not None:
                stream.close()

            if pa is not None:
                pa.terminate()
Exemplo n.º 2
0
    def __init__(self, speaker):
        self.speaker = speaker  # used to create the beep() sound

        self.handle = pvrhino.create(context_path='./models/Irma_Rules_2.rhn',
                                     sensitivity=0.25)
        print("sample_rate", self.handle.sample_rate, "frame_len:",
              self.handle.frame_length)
        self.pa = pyaudio.PyAudio()
        self.audio_stream = self.pa.open(
            rate=self.handle.sample_rate,
            channels=1,
            format=pyaudio.paInt16,
            input=True,
            frames_per_buffer=self.handle.frame_length)
        self.recognizer = sr.Recognizer()  # obtain audio from the microphone
        print('NONSPEAKING', self.recognizer.non_speaking_duration)
        print('PAUSE THRESHOLD', self.recognizer.pause_threshold)
        self.recognizer.pause_threshold = 0.5  # default 0.8
        self.recognizer.operation_timeout = 2
        self.recognizer.energy_threshold = 3000

        with sr.Microphone() as source:
            self.recognizer.adjust_for_ambient_noise(source)

        # Thread and flags
        self.ON = True
        self.running = True
        self.wakeword_flag = False
        self.voice_item = parsepy.item()
        self.voice_item.upc = ' '
        self.voice_item.imageURL = ' '
        self.voice_item.name = ' '
        self.command = 'None'
        self.wakeword_thread = threading.Thread(target=self.wakeword_run,
                                                name="wakeword_thread")
        self.wakeword_thread.start()
        print('WakeWord Initialized')
Exemplo n.º 3
0
    def run(self):
        """
         Creates an input audio stream, instantiates an instance of Rhino object, and infers the intent from spoken
         commands.
         """

        rhino = None
        recorder = None
        wav_file = None

        try:
            rhino = pvrhino.create(access_key=self._access_key,
                                   library_path=self._library_path,
                                   model_path=self._model_path,
                                   context_path=self._context_path,
                                   require_endpoint=self._require_endpoint)

            recorder = PvRecorder(device_index=self._audio_device_index,
                                  frame_length=rhino.frame_length)
            recorder.start()

            if self._output_path is not None:
                wav_file = wave.open(self._output_path, "w")
                wav_file.setparams((1, 2, 16000, 512, "NONE", "NONE"))

            print(rhino.context_info)
            print()

            print(f"Using device: {recorder.selected_device}")
            print("Listening...")
            print()

            while True:
                pcm = recorder.read()

                if wav_file is not None:
                    wav_file.writeframes(struct.pack("h" * len(pcm), *pcm))

                is_finalized = rhino.process(pcm)
                if is_finalized:
                    inference = rhino.get_inference()
                    if inference.is_understood:
                        print('{')
                        print("  intent : '%s'" % inference.intent)
                        print('  slots : {')
                        for slot, value in inference.slots.items():
                            print("    %s : '%s'" % (slot, value))
                        print('  }')
                        print('}\n')
                    else:
                        print("Didn't understand the command.\n")
        except pvrhino.RhinoInvalidArgumentError as e:
            print("One or more arguments provided to Rhino is invalid: {\n" +
                  f"\t{self._access_key=}\n" + f"\t{self._library_path=}\n" +
                  f"\t{self._model_path=}\n" + f"\t{self._context_path=}\n" +
                  f"\t{self._require_endpoint=}\n" + "}")
            print(
                f"If all other arguments seem valid, ensure that '{self._access_key}' is a valid AccessKey"
            )
            raise e
        except pvrhino.RhinoActivationError as e:
            print("AccessKey activation error")
            raise e
        except pvrhino.RhinoActivationLimitError as e:
            print(
                f"AccessKey '{self._access_key}' has reached it's temporary device limit"
            )
            raise e
        except pvrhino.RhinoActivationRefusedError as e:
            print(f"AccessKey '{self._access_key}' refused")
            raise e
        except pvrhino.RhinoActivationThrottledError as e:
            print(f"AccessKey '{self._access_key}' has been throttled")
            raise e
        except pvrhino.RhinoError as e:
            print(f"Failed to initialize Rhino")
            raise e
        except KeyboardInterrupt:
            print('Stopping ...')

        finally:
            if recorder is not None:
                recorder.delete()

            if rhino is not None:
                rhino.delete()

            if wav_file is not None:
                wav_file.close()
Exemplo n.º 4
0
    def run(self):
        """
         Creates an input audio stream, instantiates an instance of Rhino object, and infers the intent from spoken
         commands.
         """

        rhino = None
        pa = None
        audio_stream = None

        try:
            rhino = pvrhino.create(library_path=self._library_path,
                                   model_path=self._model_path,
                                   context_path=self._context_path)

            pa = pyaudio.PyAudio()

            audio_stream = pa.open(rate=rhino.sample_rate,
                                   channels=1,
                                   format=pyaudio.paInt16,
                                   input=True,
                                   frames_per_buffer=rhino.frame_length,
                                   input_device_index=self._audio_device_index)

            print(rhino.context_info)
            print()

            while True:
                pcm = audio_stream.read(rhino.frame_length)
                pcm = struct.unpack_from("h" * rhino.frame_length, pcm)

                if self._output_path is not None:
                    self._recorded_frames.append(pcm)

                is_finalized = rhino.process(pcm)
                if is_finalized:
                    inference = rhino.get_inference()
                    if inference.is_understood:
                        print('{')
                        print("  intent : '%s'" % inference.intent)
                        print('  slots : {')
                        for slot, value in inference.slots.items():
                            print("    %s : '%s'" % (slot, value))
                        print('  }')
                        print('}\n')
                    else:
                        print("Didn't understand the command.\n")

        except KeyboardInterrupt:
            print('Stopping ...')

        finally:
            if audio_stream is not None:
                audio_stream.close()

            if pa is not None:
                pa.terminate()

            if rhino is not None:
                rhino.delete()

            if self._output_path is not None and len(
                    self._recorded_frames) > 0:
                recorded_audio = np.concatenate(self._recorded_frames,
                                                axis=0).astype(np.int16)
                soundfile.write(os.path.expanduser(self._output_path),
                                recorded_audio,
                                samplerate=rhino.sample_rate,
                                subtype='PCM_16')
Exemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('--input_audio_path',
                        help='Absolute path to input audio file.',
                        required=True)

    parser.add_argument('--context_path',
                        help="Absolute path to context file.",
                        required=True)

    parser.add_argument('--library_path',
                        help='Absolute path to dynamic library.',
                        default=pvrhino.LIBRARY_PATH)

    parser.add_argument(
        '--model_path',
        help='Absolute path to the file containing model parameters.',
        default=pvrhino.MODEL_PATH)

    parser.add_argument(
        '--sensitivity',
        help=
        "Inference sensitivity. It should be a number within [0, 1]. A higher sensitivity value results in "
        +
        "fewer misses at the cost of (potentially) increasing the erroneous inference rate.",
        default=0.5)

    args = parser.parse_args()

    rhino = pvrhino.create(library_path=args.library_path,
                           model_path=args.model_path,
                           context_path=args.context_path,
                           sensitivity=args.sensitivity)

    audio, sample_rate = soundfile.read(args.input_audio_path, dtype='int16')
    if audio.ndim == 2:
        print(
            "Picovoice processes single-channel audio but stereo file is provided. Processing left channel only."
        )
        audio = audio[0, :]
    if sample_rate != rhino.sample_rate:
        raise ValueError("Audio file should have a sample rate of %d. got %d" %
                         (rhino.sample_rate, sample_rate))

    num_frames = len(audio) // rhino.frame_length
    for i in range(num_frames):
        frame = audio[i * rhino.frame_length:(i + 1) * rhino.frame_length]
        is_finalized = rhino.process(frame)
        if is_finalized:
            inference = rhino.get_inference()
            if inference.is_understood:
                print('{')
                print("  intent : '%s'" % inference.intent)
                print('  slots : {')
                for slot, value in inference.slots.items():
                    print("    %s : '%s'" % (slot, value))
                print('  }')
                print('}')
            else:
                print("Didn't understand the command.")
            break

    rhino.delete()
Exemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('--input_audio_path',
                        help='Absolute path to input audio file.',
                        required=True)

    parser.add_argument(
        '--access_key',
        help=
        'AccessKey obtained from Picovoice Console (https://picovoice.ai/console/)',
        required=True)

    parser.add_argument('--context_path',
                        help="Absolute path to context file.",
                        required=True)

    parser.add_argument('--library_path',
                        help='Absolute path to dynamic library.',
                        default=pvrhino.LIBRARY_PATH)

    parser.add_argument(
        '--model_path',
        help='Absolute path to the file containing model parameters.',
        default=pvrhino.MODEL_PATH)

    parser.add_argument(
        '--sensitivity',
        help=
        "Inference sensitivity. It should be a number within [0, 1]. A higher sensitivity value results in "
        +
        "fewer misses at the cost of (potentially) increasing the erroneous inference rate.",
        type=float,
        default=0.5)

    parser.add_argument(
        '--require_endpoint',
        help=
        "If set to `False`, Rhino does not require an endpoint (chunk of silence) before finishing inference.",
        default='True',
        choices=['True', 'False'])

    args = parser.parse_args()

    if args.require_endpoint.lower() == 'false':
        require_endpoint = False
    else:
        require_endpoint = True

    try:
        rhino = pvrhino.create(access_key=args.access_key,
                               library_path=args.library_path,
                               model_path=args.model_path,
                               context_path=args.context_path,
                               sensitivity=args.sensitivity,
                               require_endpoint=require_endpoint)
    except pvrhino.RhinoInvalidArgumentError as e:
        print(f"One or more arguments provided to Rhino is invalid: {args}")
        print(
            f"If all other arguments seem valid, ensure that '{args.access_key}' is a valid AccessKey"
        )
        raise e
    except pvrhino.RhinoActivationError as e:
        print("AccessKey activation error")
        raise e
    except pvrhino.RhinoActivationLimitError as e:
        print(
            f"AccessKey '{args.access_key}' has reached it's temporary device limit"
        )
        raise e
    except pvrhino.RhinoActivationRefusedError as e:
        print(f"AccessKey '{args.access_key}' refused")
        raise e
    except pvrhino.RhinoActivationThrottledError as e:
        print(f"AccessKey '{args.access_key}' has been throttled")
        raise e
    except pvrhino.RhinoError as e:
        print(f"Failed to initialize Rhino")
        raise e

    audio = read_file(args.input_audio_path, rhino.sample_rate)

    num_frames = len(audio) // rhino.frame_length
    for i in range(num_frames):
        frame = audio[i * rhino.frame_length:(i + 1) * rhino.frame_length]
        is_finalized = rhino.process(frame)
        if is_finalized:
            inference = rhino.get_inference()
            if inference.is_understood:
                print('{')
                print("  intent : '%s'" % inference.intent)
                print('  slots : {')
                for slot, value in inference.slots.items():
                    print("    %s : '%s'" % (slot, value))
                print('  }')
                print('}')
            else:
                print("Didn't understand the command.")
            break

    rhino.delete()
Exemplo n.º 7
0
    def __init__(self,
                 keyword_path,
                 wake_word_callback,
                 context_path,
                 inference_callback,
                 porcupine_library_path=None,
                 porcupine_model_path=None,
                 porcupine_sensitivity=0.5,
                 rhino_library_path=None,
                 rhino_model_path=None,
                 rhino_sensitivity=0.5):
        """
        Constructor.

        :param keyword_path: Absolute path to Porcupine's keyword model file.
        :param wake_word_callback: User-defined callback invoked upon detection of the wake phrase. The callback accepts
        no input arguments.
        :param context_path: Absolute path to file containing context parameters. A context represents the set of
        expressions (spoken commands), intents, and intent arguments (slots) within a domain of interest.
        :param inference_callback: User-defined callback invoked upon completion of intent inference. The callback
        accepts a single input argument of type `Inference` that exposes the following immutable fields:
        (1) `is_understood` is a flag indicating if the spoken command is understood.
        (2) `intent` is the inferred intent from the voice command. If the command is not understood then it's set to
        `None`.
        (3) `slots` is a dictionary mapping slot keys to their respective values. If the command is not understood then
        it's set to an empty dictionary.
        :param porcupine_library_path: Absolute path to Porcupine's dynamic library.
        :param porcupine_model_path: Absolute path to the file containing Porcupine's model parameters.
        :param porcupine_sensitivity: Wake word detection sensitivity. It should be a number within [0, 1]. A higher
        sensitivity results in fewer misses at the cost of increasing the false alarm rate.
        :param rhino_library_path: Absolute path to Rhino's dynamic library.
        :param rhino_model_path: Absolute path to the file containing Rhino's model parameters.
        :param rhino_sensitivity: Inference sensitivity. It should be a number within [0, 1]. A higher sensitivity value
        results in fewer misses at the cost of (potentially) increasing the erroneous inference rate.
        """

        if not os.path.exists(keyword_path):
            raise ValueError(
                "Couldn't find Porcupine's keyword file at '%s'." %
                keyword_path)

        if not callable(wake_word_callback):
            raise ValueError("Invalid wake word callback.")

        if not os.path.exists(context_path):
            raise ValueError("Couldn't find Rhino's context file at '%s'." %
                             context_path)

        if not callable(inference_callback):
            raise ValueError("Invalid inference callback.")

        if porcupine_library_path is not None and not os.path.exists(
                porcupine_library_path):
            raise ValueError(
                "Couldn't find Porcupine's dynamic library at '%s'." %
                porcupine_library_path)

        if porcupine_model_path is not None and not os.path.exists(
                porcupine_model_path):
            raise ValueError("Couldn't find Porcupine's model file at '%s'." %
                             porcupine_model_path)

        if not 0 <= porcupine_sensitivity <= 1:
            raise ValueError(
                "Porcupine's sensitivity should be within [0, 1].")

        if rhino_library_path is not None and not os.path.exists(
                rhino_library_path):
            raise ValueError("Couldn't find Rhino's dynamic library at '%s'." %
                             rhino_library_path)

        if rhino_model_path is not None and not os.path.exists(
                rhino_model_path):
            raise ValueError("Couldn't find Rhino's model file at '%s'." %
                             rhino_model_path)

        if not 0 <= rhino_sensitivity <= 1:
            raise ValueError("Rhino's sensitivity should be within [0, 1]")

        self._porcupine = pvporcupine.create(
            library_path=porcupine_library_path,
            model_path=porcupine_model_path,
            keyword_paths=[keyword_path],
            sensitivities=[porcupine_sensitivity])

        self._wake_word_callback = wake_word_callback

        self._is_wake_word_detected = False

        self._rhino = pvrhino.create(library_path=rhino_library_path,
                                     model_path=rhino_model_path,
                                     context_path=context_path,
                                     sensitivity=rhino_sensitivity)

        self._inference_callback = inference_callback

        assert self._porcupine.sample_rate == self._rhino.sample_rate
        self._sample_rate = self._porcupine.sample_rate

        assert self._porcupine.frame_length == self._rhino.frame_length
        self._frame_length = self._porcupine.frame_length
Exemplo n.º 8
0
import pvrhino
import struct
import pyaudio
import os

pa = None
handle = None
audio_stream = None

try:
    pa = pyaudio.PyAudio()
    inpath = "/home/pi/python/picovoice/chess_en_raspberry-pi_2021-08-02-utc_v1_6_0.rhn"

    handle = pvrhino.create(inpath)

    audio_stream = pa.open(rate=handle.sample_rate,
                           channels=1,
                           format=pyaudio.paInt16,
                           input=True,
                           frames_per_buffer=handle.frame_length)

    def get_next_audio_frame():
        pcm = audio_stream.read(handle.frame_length)
        pcm = struct.unpack_from("h" * handle.frame_length, pcm)
        return pcm

    letterDic = {
        "ALPHA": 'A',
        "BRAVO": 'B',
        "CHARLIE": 'C',
        "DELTA": 'D',