def __init__(self, keyword_path, context_path, porcupine_library_path=None, porcupine_model_path=None, porcupine_sensitivity=0.5, rhino_library_path=None, rhino_model_path=None, rhino_sensitivity=0.5, output_path=None): super(PicovoiceDemo, self).__init__() self._picovoice = Picovoice( keyword_path=keyword_path, wake_word_callback=self._wake_word_callback, context_path=context_path, inference_callback=self._inference_callback, porcupine_library_path=porcupine_library_path, porcupine_model_path=porcupine_model_path, porcupine_sensitivity=porcupine_sensitivity, rhino_library_path=rhino_library_path, rhino_model_path=rhino_model_path, rhino_sensitivity=rhino_sensitivity) self.output_path = output_path if self.output_path is not None: self._recorded_frames = list()
def setUp(self): self._pv = Picovoice( keyword_path=pvporcupine.KEYWORD_PATHS['picovoice'], wake_word_callback=self._wake_word_callback, context_path=self._context_path(), inference_callback=self._inference_callback) self._is_wake_word_detected = False self._inference = None
def __init__(self, keyword_path, context_path, porcupine_sensitivity=0.75, rhino_sensitivity=0.25): super(PicovoiceDemo, self).__init__() def inference_callback(inference): return self._inference_callback(inference) self._picovoice = Picovoice( keyword_path=keyword_path, wake_word_callback=self._wake_word_callback, context_path=context_path, inference_callback=inference_callback, porcupine_sensitivity=porcupine_sensitivity, rhino_sensitivity=rhino_sensitivity) self._context = self._picovoice.context_info self._color = 'blue'
def run(self): pv = None py_audio = None audio_stream = None try: pv = Picovoice(keyword_path=self._keyword_path(), porcupine_sensitivity=0.75, wake_word_callback=self._wake_word_callback, context_path=self._context_path(), inference_callback=self._inference_callback) print(pv.context_info) py_audio = pyaudio.PyAudio() audio_stream = py_audio.open(rate=pv.sample_rate, channels=1, format=pyaudio.paInt16, input=True, frames_per_buffer=pv.frame_length) self._is_ready = True while not self._stop: pcm = audio_stream.read(pv.frame_length) pcm = struct.unpack_from("h" * pv.frame_length, pcm) pv.process(pcm) finally: if audio_stream is not None: audio_stream.close() if py_audio is not None: py_audio.terminate() if pv is not None: pv.delete() self._is_stopped = True
class PicovoiceDemo(Thread): def __init__(self, keyword_path, context_path, porcupine_sensitivity=0.75, rhino_sensitivity=0.25): super(PicovoiceDemo, self).__init__() def inference_callback(inference): return self._inference_callback(inference) self._picovoice = Picovoice( keyword_path=keyword_path, wake_word_callback=self._wake_word_callback, context_path=context_path, inference_callback=inference_callback, porcupine_sensitivity=porcupine_sensitivity, rhino_sensitivity=rhino_sensitivity) self._context = self._picovoice.context_info self._color = 'blue' @staticmethod def _set_color(color): for i in range(12): driver.set_pixel(i, color[0], color[1], color[2]) driver.show() @staticmethod def _wake_word_callback(): print('[wake word]\n') def _inference_callback(self, inference): print('{') print(" is_understood : '%s'," % 'true' if inference.is_understood else 'false') if inference.is_understood: print(" intent : '%s'," % inference.intent) if len(inference.slots) > 0: print(' slots : {') for slot, value in inference.slots.items(): print(" '%s' : '%s'," % (slot, value)) print(' }') print('}\n') if inference.is_understood: if inference.intent == 'turnLights': if inference.slots['state'] == 'off': self._set_color((0, 0, 0)) else: self._set_color(COLORS_RGB[self._color]) elif inference.intent == 'changeColor': self._color = inference.slots['color'] self._set_color(COLORS_RGB[self._color]) else: raise NotImplementedError() def run(self): pa = None audio_stream = None try: pa = pyaudio.PyAudio() audio_stream = pa.open( rate=self._picovoice.sample_rate, channels=1, format=pyaudio.paInt16, input=True, frames_per_buffer=self._picovoice.frame_length) print(self._context) print('[Listening ...]') while True: pcm = audio_stream.read(self._picovoice.frame_length) pcm = struct.unpack_from("h" * self._picovoice.frame_length, pcm) self._picovoice.process(pcm) except KeyboardInterrupt: sys.stdout.write('\b' * 2) print('Stopping ...') finally: if audio_stream is not None: audio_stream.close() if pa is not None: pa.terminate() self._picovoice.delete()
context_path = abs_path + "/context/Lights_fr_windows_2021-05-07-utc_v1_6_0.rhn" context_model_path = abs_path + "/context/rhino_params_fr.pv" def inference_callback(inference): print("GOT inference") print(inference.is_understood) if inference.is_understood: control = inference.intent print(control) if control == "power": for state, location in inference.slots.items(): # print(control[location] + ":"+control[state]) print(" %s : '%s'" % (state, location)) handle = Picovoice(keyword_path=keyword_path, porcupine_model_path=model_path, porcupine_sensitivity=0.5, wake_word_callback=wake_word_callback, context_path=context_path, rhino_sensitivity=0.5, rhino_model_path=context_model_path, inference_callback=inference_callback) def get_next_audio_frame(): pass
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input_audio_path', help='Absolute path to input audio file.', required=True) parser.add_argument('--keyword_path', help="Absolute path to a Porcupine keyword file.", required=True) parser.add_argument('--context_path', help="Absolute path to a Rhino context file.", required=True) parser.add_argument('--porcupine_library_path', help="Absolute path to Porcupine's dynamic library.", default=None) parser.add_argument('--porcupine_model_path', help="Absolute path to Porcupine's model file.", default=None) parser.add_argument( '--porcupine_sensitivity', help="Sensitivity for detecting wake word. Each value should be a number within [0, 1]. A higher sensitivity " + "results in fewer misses at the cost of increasing the false alarm rate.", default=0.5) parser.add_argument('--rhino_library_path', help="Absolute path to Rhino's dynamic library.", default=None) parser.add_argument('--rhino_model_path', help="Absolute path to Rhino's model file.", default=None) parser.add_argument( '--rhino_sensitivity', help="Inference sensitivity. It should be a number within [0, 1]. A higher sensitivity value results in fewer" + "misses at the cost of (potentially) increasing the erroneous inference rate.", default=0.5) args = parser.parse_args() def wake_word_callback(): print('[wake word]\n') def inference_callback(inference): if inference.is_understood: print('{') print(" intent : '%s'" % inference.intent) print(' slots : {') for slot, value in inference.slots.items(): print(" %s : '%s'" % (slot, value)) print(' }') print('}\n') else: print("Didn't understand the command.\n") pv = Picovoice( keyword_path=args.keyword_path, wake_word_callback=wake_word_callback, context_path=args.context_path, inference_callback=inference_callback, porcupine_library_path=args.porcupine_library_path, porcupine_model_path=args.porcupine_model_path, porcupine_sensitivity=args.porcupine_sensitivity, rhino_library_path=args.rhino_library_path, rhino_model_path=args.rhino_model_path, rhino_sensitivity=args.rhino_sensitivity) audio, sample_rate = soundfile.read(args.input_audio_path, dtype='int16') if audio.ndim == 2: print("Picovoice processes single-channel audio but stereo file is provided. Processing left channel only.") audio = audio[0, :] if sample_rate != pv.sample_rate: raise ValueError("Input audio file should have a sample rate of %d. got %d" % (pv.sample_rate, sample_rate)) for i in range(len(audio) // pv.frame_length): frame = audio[i * pv.frame_length:(i + 1) * pv.frame_length] pv.process(frame) pv.delete()
hash = hashlib.md5(text.encode()).hexdigest() file = 'speech-cache/{}.wav'.format(hash) cmd = 'play {}'.format(file) if not os.path.isfile(file): cmd = 'pico2wave -w {} "{}" && {}'.format(file, text, cmd) os.system(cmd) def joke(): j = requests.get('https://v2.jokeapi.dev/joke/Any?format=txt').text print(j) say(j) handle = Picovoice(keyword_path=keyword_path, wake_word_callback=wake_word_callback, context_path=context_path, inference_callback=inference_callback) pa = pyaudio.PyAudio() audio_stream = pa.open(rate=16000, channels=1, format=pyaudio.paInt16, input=True, frames_per_buffer=512, input_device_index=6) while True: pcm = audio_stream.read(512, exception_on_overflow=False) pcm = struct.unpack_from("h" * 512, pcm)
class PicovoiceTestCase(unittest.TestCase): @staticmethod def _context_path(): if platform.system() == 'Darwin': return os.path.join( os.path.dirname(__file__), '../../resources/rhino/resources/contexts/mac/coffee_maker_mac.rhn' ) elif platform.system() == 'Linux': if platform.machine() == 'x86_64': return os.path.join( os.path.dirname(__file__), '../../resources/rhino/resources/contexts/linux/coffee_maker_linux.rhn' ) else: cpu_info = subprocess.check_output(['cat', '/proc/cpuinfo']).decode() hardware_info = [ x for x in cpu_info.split('\n') if 'Hardware' in x ][0] if 'BCM' in hardware_info: return os.path.join( os.path.dirname(__file__), '../../resources/rhino/resources/contexts/raspberry-pi/coffee_maker_raspberry-pi.rhn' ) elif 'AM33' in hardware_info: return os.path.join( os.path.dirname(__file__), '../../resources/rhino/resources/contexts/beaglebone/coffee_maker_beaglebone.rhn' ) else: raise NotImplementedError('Unsupported CPU:\n%s' % cpu_info) elif platform.system() == 'Windows': return os.path.join( os.path.dirname(__file__), '../../resources/rhino/resources/contexts/windows/coffee_maker_windows.rhn' ) else: raise NotImplementedError('Unsupported platform') def _wake_word_callback(self): self._is_wake_word_detected = True def _inference_callback(self, inference): self._inference = inference def setUp(self): self._pv = Picovoice( keyword_path=pvporcupine.KEYWORD_PATHS['picovoice'], wake_word_callback=self._wake_word_callback, context_path=self._context_path(), inference_callback=self._inference_callback) self._is_wake_word_detected = False self._inference = None def tearDown(self): self._pv.delete() def test_process(self): audio, sample_rate = \ soundfile.read( os.path.join(os.path.dirname(__file__), '../../resources/audio_samples/picovoice-coffee.wav'), dtype='int16') for i in range(len(audio) // self._pv.frame_length): frame = audio[i * self._pv.frame_length:(i + 1) * self._pv.frame_length] self._pv.process(frame) self.assertTrue(self._is_wake_word_detected) self.assertEqual(self._inference.intent, 'orderBeverage') self.assertEqual(self._inference.slots, dict(size='large', beverage='coffee')) def test_process_again(self): self.test_process()
class PicovoiceDemo(Thread): def __init__(self, keyword_path, context_path, porcupine_library_path=None, porcupine_model_path=None, porcupine_sensitivity=0.5, rhino_library_path=None, rhino_model_path=None, rhino_sensitivity=0.5, output_path=None): super(PicovoiceDemo, self).__init__() self._picovoice = Picovoice( keyword_path=keyword_path, wake_word_callback=self._wake_word_callback, context_path=context_path, inference_callback=self._inference_callback, porcupine_library_path=porcupine_library_path, porcupine_model_path=porcupine_model_path, porcupine_sensitivity=porcupine_sensitivity, rhino_library_path=rhino_library_path, rhino_model_path=rhino_model_path, rhino_sensitivity=rhino_sensitivity) self.output_path = output_path if self.output_path is not None: self._recorded_frames = list() @staticmethod def _wake_word_callback(): print('[wake word]\n') @staticmethod def _inference_callback(inference): if inference.is_understood: print('{') print(" intent : '%s'" % inference.intent) print(' slots : {') for slot, value in inference.slots.items(): print(" %s : '%s'" % (slot, value)) print(' }') print('}\n') else: print("Didn't understand the command.\n") def run(self): pa = None audio_stream = None try: pa = pyaudio.PyAudio() audio_stream = pa.open( rate=self._picovoice.sample_rate, channels=1, format=pyaudio.paInt16, input=True, frames_per_buffer=self._picovoice.frame_length) print('[Listening ...]') while True: pcm = audio_stream.read(self._picovoice.frame_length) pcm = struct.unpack_from("h" * self._picovoice.frame_length, pcm) if self.output_path is not None: self._recorded_frames.append(pcm) self._picovoice.process(pcm) except KeyboardInterrupt: sys.stdout.write('\b' * 2) print('Stopping ...') finally: if audio_stream is not None: audio_stream.close() if pa is not None: pa.terminate() if self.output_path is not None and len(self._recorded_frames) > 0: recorded_audio = np.concatenate(self._recorded_frames, axis=0).astype(np.int16) soundfile.write(self.output_path, recorded_audio, samplerate=self._picovoice.sample_rate, subtype='PCM_16') self._picovoice.delete() @classmethod def show_audio_devices(cls): fields = ('index', 'name', 'defaultSampleRate', 'maxInputChannels') pa = pyaudio.PyAudio() for i in range(pa.get_device_count()): info = pa.get_device_info_by_index(i) print(', '.join("'%s': '%s'" % (k, str(info[k])) for k in fields)) pa.terminate()