def detectKeywords(libpath): audio_stream = AudiostreamSource() extractor = FeatureExtractor(libpath) detector = AudioRecognition(libpath) extactor_gain = 1.0 #Add one or more keyword models keywordIdFirefox = detector.addModel( '../../models/Hotword/firefox_v1.4.5.premium', 0.6) keywordIdSheila = detector.addModel( '../../models/Hotword/sheila_v1.4.5.premium', 0.6) keywordIdMarvin = detector.addModel( '../../models/Hotword/marvin_v1.4.5.premium', 0.6) keywordIdAlexa = detector.addModel( '../../models/Hotword/alexa_v1.4.5.premium', 0.6) bufsize = detector.getInputDataSize() print("Audio Recognition Version: " + detector.getVersionString()) audio_stream.start() try: while (True): frame = audio_stream.read(bufsize * 2, bufsize * 2) if (not frame): time.sleep(0.01) continue features = extractor.signalToMel(frame, extactor_gain) prediction = detector.runDetection(features) if (prediction != 0): now = datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S") if (prediction == keywordIdFirefox): print("Firefox detected:" + now) elif (prediction == keywordIdSheila): print("Sheila detected:" + now) elif (prediction == keywordIdMarvin): print("Marvin detected:" + now) elif (prediction == keywordIdAlexa): print("Alexa detected:" + now) os.system(play_command + " ../resources/ding.wav") except KeyboardInterrupt: print("Terminating") audio_stream.stop() sys.exit(0)
#!/usr/bin/env python import socket import sys import os sys.path.append('../../python/src') from libnyumaya import AudioRecognition from auto_platform import default_libpath serversocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) serversocket.bind(('', 9999)) serversocket.listen(5) # become a server socket, maximum 5 connections detector = AudioRecognition(default_libpath) keywordIdFirefox = detector.addModel( '../../models/Hotword/firefox_v2.0.23.premium', 0.8) connection, address = serversocket.accept() while True: buf = connection.recv(640) if len(buf) > 0: prediction = detector.runDetection(buf) if (prediction != 0): print("Keyword detected")
def recordActivations(libpath): audio_stream = AudiostreamSource() extractor = FeatureExtractor(libpath) detectors = {} framebuffersFront = {} framebuffersBack = {} extactor_gain = 1.0 recordBefore = 2.5 # Seconds before the activation recordAfter = 0.5 # Seconds after the activation activationCount = 0 ensure_dir(saveDirectory) rbFrontSize = int(recordBefore * bytesPerSample * framesPerSecond) rbBackSize = int(recordAfter * bytesPerSample * framesPerSecond) for mpath, msens, mname in models: detector = AudioRecognition(libpath) detector.addModel(mpath, msens) detectors[mname] = detector framebuffersFront[mname] = bytearray() framebuffersBack[mname] = bytearray() bufsize = detector.getInputDataSize() print("Audio Recognition Version: " + detector.getVersionString()) audio_stream.start() try: while (True): frame = audio_stream.read(bufsize * 2, bufsize * 2) if (not frame): time.sleep(0.01) continue for mname in detectors: #Fill audio before the activation framebuffersFront[mname] = framebuffersFront[mname] + frame if (len(framebuffersFront[mname]) > rbFrontSize): framebuffersFront[mname] = framebuffersFront[mname][ -rbFrontSize:] features = extractor.signalToMel(frame, extactor_gain) for mname in detectors: detector = detectors[mname] prediction = detector.runDetection(features) if (prediction != 0): #FIXME: Record after is currently ignored #Fill audio after the activation #while(len(framebuffersBack[mname]) < rbBackSize): # frame = audio_stream.read(bufsize*2,bufsize*2) # if(not frame): # time.sleep(0.01) # continue # framebuffersBack[mname] = framebuffersBack[mname] + frame savePath = saveDirectory + "/activation_{}_{}_{}.wav".format( mname, activationCount, time.time_ns()) save_wav(framebuffersFront[mname], savePath) #save_wav(framebufferFront+framebufferBack,savePath) print("Saving Activation to {}".format(savePath)) activationCount += 1 except KeyboardInterrupt: print("Terminating") audio_stream.stop() sys.exit(0)
def detectKeywords(libpath): audio_stream = AudiostreamSource() extractor = FeatureExtractor(libpath) detector = AudioRecognition(libpath) framerate = 16000 model = Model("model") #Let's define a custom dictionary rec = KaldiRecognizer( model, framerate, '["oh one two three four five six seven eight nine zero", "[unk]"]') extactor_gain = 1.0 #Add one or more keyword models keywordIdAlexa = detector.addModel( '../../models/Hotword/alexa_v3.0.35.premium', 0.85) bufsize = detector.getInputDataSize() print("Audio Recognition Version: " + detector.getVersionString()) command_started = False audio_stream.start() try: while (True): # Wakeword loop if (not command_started): frame = audio_stream.read(bufsize * 2, bufsize * 2) if (not frame): time.sleep(0.01) continue features = extractor.signalToMel(frame, extactor_gain) prediction = detector.runDetection(features) if (prediction != 0): now = datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S") if (prediction == keywordIdAlexa): print("Alexa detected:" + now) os.system(play_command + " ../resources/ding.wav") command_started = True # vosk loop else: frame = audio_stream.read(4000, 4000) if (not frame): time.sleep(0.01) continue if rec.AcceptWaveform(bytes(frame)): print(rec.Result()) command_started = False print(rec.FinalResult()) except KeyboardInterrupt: print("Terminating") audio_stream.stop() sys.exit(0)
class MultiDetector(): def __init__(self, libpath, timeout=40): self.current_index = 0 self.number_detectors = 0 self.countdown = 0 self.timeout = timeout self.detector = None self.commands = [] self.libpath = libpath self.history = [] self.last_frames = [] self.max_last_frames = 5 self.detector = AudioRecognition(self.libpath) self.keyword_map = {} #Given the current history which words are we checking for? def get_possible_words(self, history): words = [] for cmd in self.commands: index = command_starts_with_history(cmd['command'], history) if (index >= len(cmd['command'])): print("Error index out of range:") print("Command: " + str(cmd)) print("Index: " + str(index)) print("History: " + str(history)) return [] if (index >= 0): cmd = cmd['command'][index] if (not cmd in words): words.append(cmd) return words def UpdateLastFrames(self, frame): self.last_frames.append(frame) if len(self.last_frames) > self.max_last_frames: self.last_frames.pop(0) def add_command(self, command, callback_function): if (len(command.split(",")) == 0): print("No valid command") return self.commands.append({ 'command': command.split(","), 'function': callback_function }) self.update_word_and_detector() def add_word(self, graph, name, sensitivity): keywordId = self.detector.addModel(graph, sensitivity) self.keyword_map[keywordId] = name def add_reset_history_callback(self, callback_function): self.history_callback = callback_function def add_detected_callback(self, callback_function): self.detected_callback = callback_function def GetInputDataSize(self): return self.detector.getInputDataSize() def maby_execute(self): executed_cmd = False for cmd in self.commands: if (cmd['command'] == self.history): cmd['function']() self.history = [] self.countdown = 0 self.last_frames = [] executed_cmd = True return executed_cmd def check_timeout(self): if (self.countdown > 0): self.countdown -= 1 if (self.countdown == 0): self.history = [] self.update_word_and_detector() if (self.history_callback): self.history_callback() def update_word_and_detector(self): self.possible_words = self.get_possible_words(self.history) #Set possible words active #Set impossible words inactive print(self.possible_words) for id in self.keyword_map: key = self.keyword_map[id] if (key in self.possible_words): self.detector.setActive(id, True) else: self.detector.setActive(id, False) def run_frame(self, frame, update_frames=True): if (update_frames): self.UpdateLastFrames(frame) self.check_timeout() prediction = self.detector.runDetection(frame) if (prediction): label = self.keyword_map[prediction] if (label in self.possible_words): print("Got prediction: " + label) self.countdown = self.timeout self.history.append(label) result = self.maby_execute() self.update_word_and_detector() if (self.detected_callback): self.detected_callback() #Command hasn't finished so run last frames in next detectors if (not result): self.run_last_frames() def run_last_frames(self): for frame in self.last_frames: self.run_frame(frame, update_frames=False) def print_commands(self): for cmd in self.commands: print(cmd)