def main(): args = create_parser(usage).parse_args() print('chunk_size: ', args.chunk_size) def on_activation(): activate_notify() if args.save_dir: global chunk_num nm = join(args.save_dir, args.save_prefix + session_id + '.' + str(chunk_num) + '.wav') save_audio(nm, audio_buffer) print() print('Saved to ' + nm + '.') chunk_num += 1 def on_prediction(conf): print('!' if conf > 0.8 else '.', end='', flush=True) listener = Listener(args.model, args.chunk_size) audio_buffer = np.zeros(listener.pr.buffer_samples, dtype=float) def get_prediction(chunk): nonlocal audio_buffer audio = buffer_to_audio(chunk) audio_buffer = np.concatenate((audio_buffer[len(audio):], audio)) return listener.update(chunk) engine = ListenerEngine(listener, args.chunk_size) engine.get_prediction = get_prediction runner = PreciseRunner(engine, args.threshold, on_activation=on_activation, on_prediction=on_prediction) runner.start() Event().wait() # Wait forever
def main(): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' stdout = sys.stdout sys.stdout = sys.stderr parser = create_parser(usage) parser.add_argument('-v', '--version', action='version', version=__version__) parser.add_argument( 'chunk_size', type=int, nargs='?', default=-1, help='Number of bytes to read before making a prediction.' 'Higher values are less computationally expensive') parser.usage = parser.format_usage().strip().replace('usage: ', '') + ' < audio.wav' args = parser.parse_args() if sys.stdin.isatty(): parser.error('Please pipe audio via stdin using < audio.wav') listener = Listener(args.model_name, args.chunk_size) try: while True: conf = listener.update(sys.stdin.buffer) stdout.buffer.write((str(conf) + '\n').encode('ascii')) stdout.buffer.flush() except (EOFError, KeyboardInterrupt): pass
def __init__(self, connection, address): self.address = address self.connection = connection # type: socket.socket self.stream = ReadWriteStream() self.runner = PreciseRunner( ListenerEngine(Listener(MODEL_NAME, CHUNK_SIZE), CHUNK_SIZE), 1, stream=self.stream, on_activation=self.on_activation, on_prediction=self.on_prediction ) self.runner.start()
def __init__(self, args): super().__init__(args) self.listener = Listener(args.model, args.chunk_size) self.audio_buffer = np.zeros(self.listener.pr.buffer_samples, dtype=float) self.engine = ListenerEngine(self.listener, args.chunk_size) self.engine.get_prediction = self.get_prediction self.runner = PreciseRunner(self.engine, args.trigger_level, sensitivity=args.sensitivity, on_activation=self.on_activation, on_prediction=self.on_prediction) self.session_id, self.chunk_num = '%09d' % randint(0, 999999999), 0
def run(self): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' stdout = sys.stdout sys.stdout = sys.stderr listener = Listener(self.args.model_name, self.args.chunk_size) try: while True: conf = listener.update(sys.stdin.buffer) stdout.buffer.write((str(conf) + '\n').encode('ascii')) stdout.buffer.flush() except (EOFError, KeyboardInterrupt): pass finally: sys.stdout = stdout
def main(): args = create_parser(usage).parse_args() sensitivity = 0.5 def on_activation(): activate_notify() if args.save_dir: global chunk_num nm = join( args.save_dir, args.save_prefix + session_id + '.' + str(chunk_num) + '.wav') save_audio(nm, audio_buffer) print() print('Saved to ' + nm + '.') chunk_num += 1 def on_prediction(conf): if args.light_mode: print('!' if conf > 0.7 else '.', end='', flush=True) else: max_width = 80 width = min(get_terminal_size()[0], max_width) units = int(round(conf * width)) bar = 'X' * units + '-' * (width - units) cutoff = round((1.0 - sensitivity) * width) print(bar[:cutoff] + bar[cutoff:].replace('X', 'x')) listener = Listener(args.model, args.chunk_size) audio_buffer = np.zeros(listener.pr.buffer_samples, dtype=float) def get_prediction(chunk): nonlocal audio_buffer audio = buffer_to_audio(chunk) audio_buffer = np.concatenate((audio_buffer[len(audio):], audio)) return listener.update(chunk) engine = ListenerEngine(listener, args.chunk_size) engine.get_prediction = get_prediction runner = PreciseRunner(engine, args.threshold, sensitivity=sensitivity, on_activation=on_activation, on_prediction=on_prediction) runner.start() Event().wait() # Wait forever
def __init__(self): super().__init__(create_parser(usage)) for i in (join(self.args.folder, 'not-wake-word', 'generated'), join(self.args.folder, 'test', 'not-wake-word', 'generated')): makedirs(i, exist_ok=True) self.trained_fns = load_trained_fns(self.args.model) self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float) if not isfile(self.args.model): params = ModelParams(skip_acc=self.args.no_validation, extra_metrics=self.args.extra_metrics) create_model(self.args.model, params).save(self.args.model) self.listener = Listener(self.args.model, self.args.chunk_size, runner_cls=KerasRunner)
def __init__(self, args): self.args = args self.trained_fns = load_trained_fns(args.model) pr = inject_params(args.model) self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float) from keras.callbacks import ModelCheckpoint self.checkpoint = ModelCheckpoint(args.model, monitor=args.metric_monitor, save_best_only=args.save_best) data = TrainData.from_tags(args.tags_file, args.tags_folder) self.tags_data = data.load(True, not args.no_validation) if not isfile(args.model): create_model(args.model, args.no_validation, args.extra_metrics).save(args.model) self.listener = Listener(args.model, args.chunk_size, runner_cls=KerasRunner)
def main(): rospy.init_node('wake_word_detection_node') print("node is up") def on_activation(): print("activate") playsound(res_path + "/attention.wav") try: requests.get('http://www.google.com') try: response = stop_speech_perception_service(True) print(response) except rospy.ServiceException as exc: print("Service did not process request: " + str(exc)) except requests.ConnectionError: print("no internet") speak_pub.publish( "I'm sorry. I am not connected to the internet now and cannot answer" ) set_emotion_service(state="SADNESS", timeout=5500, restore=True) def on_prediction(conf): print(".") listener = Listener(res_path + "/stevie_10_06.pb", chunk_size) audio_buffer = np.zeros(listener.pr.buffer_samples, dtype=float) def get_prediction(chunk): nonlocal audio_buffer audio = buffer_to_audio(chunk) audio_buffer = np.concatenate((audio_buffer[len(audio):], audio)) return listener.update(chunk) engine = ListenerEngine(listener, chunk_size) engine.get_prediction = get_prediction runner = PreciseRunner(engine, trigger_level=3, sensitivity=0.5, on_activation=on_activation, on_prediction=on_prediction) runner.start() print("spinning") rospy.spin()
def main(): args = create_parser(usage).parse_args() def on_activation(): activate_notify() if args.save_dir: global chunk_num nm = join( args.save_dir, args.save_prefix + session_id + '.' + str(chunk_num) + '.wav') save_audio(nm, audio_buffer) print() print('Saved to ' + nm + '.') chunk_num += 1 def on_prediction(conf): global detecting_flag # print('!' if conf > 0.5 else '.', end='', flush=True) if conf > 0.5: detecting_flag = True if conf < 0.5 and detecting_flag: print(colored("Yeah! I'm Here.", 'green')) detecting_flag = False sunshine_model = './ok-sunshine.net' listener = Listener(sunshine_model, args.chunk_size) audio_buffer = np.zeros(listener.pr.buffer_samples, dtype=float) def get_prediction(chunk): nonlocal audio_buffer audio = buffer_to_audio(chunk) audio_buffer = np.concatenate((audio_buffer[len(audio):], audio)) return listener.update(chunk) engine = ListenerEngine(listener, args.chunk_size) engine.get_prediction = get_prediction runner = PreciseRunner(engine, args.threshold, on_activation=on_activation, on_prediction=on_prediction) runner.start() Event().wait() # Wait forever
def __init__(self, args): super().__init__(args) for i in ( join(self.args.folder, 'not-wake-word', 'generated'), join(self.args.folder, 'test', 'not-wake-word', 'generated') ): makedirs(i, exist_ok=True) self.trained_fns = load_trained_fns(self.args.model) self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float) params = ModelParams( skip_acc=self.args.no_validation, extra_metrics=self.args.extra_metrics, loss_bias=1.0 - self.args.sensitivity ) model = create_model(self.args.model, params) self.listener = Listener(self.args.model, self.args.chunk_size, runner_cls=KerasRunner) self.listener.runner = KerasRunner(self.args.model) self.listener.runner.model = model self.samples_since_train = 0
def __init__(self): parser = create_parser(usage) self.args = args = TrainData.parse_args(parser) self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float) self.vals_buffer = np.zeros(pr.buffer_samples, dtype=float) params = ModelParams(skip_acc=args.no_validation, extra_metrics=args.extra_metrics, loss_bias=1.0 - args.sensitivity) self.model = create_model(args.model, params) self.listener = Listener('', args.chunk_size, runner_cls=lambda x: None) from keras.callbacks import ModelCheckpoint, TensorBoard checkpoint = ModelCheckpoint(args.model, monitor=args.metric_monitor, save_best_only=args.save_best) epoch_fiti = Fitipy(splitext(args.model)[0] + '.epoch') self.epoch = epoch_fiti.read().read(0, int) def on_epoch_end(a, b): self.epoch += 1 epoch_fiti.write().write(self.epoch, str) self.model_base = splitext(self.args.model)[0] self.callbacks = [ checkpoint, TensorBoard(log_dir=self.model_base + '.logs', ), LambdaCallback(on_epoch_end=on_epoch_end) ] self.data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) pos_files, neg_files = self.data.train_files self.neg_files_it = iter(cycle(neg_files)) self.pos_files_it = iter(cycle(pos_files))
def main(): args = create_parser(usage).parse_args() def on_activation(): activate_notify() # TODO: trigger VMSE def on_prediction(conf): if args.basic_mode: print('!' if conf > 0.7 else '.', end='', flush=True) else: max_width = 80 width = min(get_terminal_size()[0], max_width) units = int(round(conf * width)) bar = 'X' * units + '-' * (width - units) cutoff = round((1.0 - args.sensitivity) * width) print(bar[:cutoff] + bar[cutoff:].replace('X', 'x')) listener = Listener(args.model, args.chunk_size) audio_buffer = np.zeros(listener.pr.buffer_samples, dtype=float) def get_prediction(chunk): nonlocal audio_buffer audio = buffer_to_audio(chunk) audio_buffer = np.concatenate((audio_buffer[len(audio):], audio)) return listener.update(chunk) engine = ListenerEngine(listener, args.chunk_size) engine.get_prediction = get_prediction runner = PreciseRunner(engine, args.trigger_level, sensitivity=args.sensitivity, on_activation=on_activation, on_prediction=on_prediction) runner.start() Event().wait() # Wait forever
def main(): args = create_parser(usage).parse_args() os.chdir(os.getcwd() + "/Precise") def on_activation_normal(): print("activated\n", flush=True) def on_activation_safe(): global chunk_num nm = join( args.save_dir, args.save_prefix + session_id + '.' + str(chunk_num) + '.wav') save_audio(nm, audio_buffer) print() print('Saved to ' + nm + '.') chunk_num += 1 def on_prediction(conf): max_width = 80 width = min(get_terminal_size()[0], max_width) units = int(round(conf * width)) bar = 'X' * units + '-' * (width - units) cutoff = round((1.0 - args.sensitivity) * width) print(bar[:cutoff] + bar[cutoff:].replace('X', 'x') + "\n", flush=True) def get_prediction(chunk): nonlocal audio_buffer audio = buffer_to_audio(chunk) audio_buffer = np.concatenate((audio_buffer[len(audio):], audio)) return listener.update(chunk) while True: line = sys.stdin.readline().rstrip() if ("start" in line): if (os.path.isfile("./model.pb")): listener = Listener("model.pb", args.chunk_size) else: listener = Listener("model.net", args.chunk_size) engine = ListenerEngine(listener, args.chunk_size) if ("visual" not in line): if (line == "start normal"): runner = PreciseRunner(engine, args.trigger_level, sensitivity=args.sensitivity, on_activation=on_activation_normal) elif (line == "start safe"): runner = PreciseRunner(engine, args.trigger_level, sensitivity=args.sensitivity, on_activation=on_activation_safe) else: if (line == "start normal visual"): audio_buffer = np.zeros(listener.pr.buffer_samples, dtype=float) engine.get_prediction = get_prediction runner = PreciseRunner(engine, args.trigger_level, sensitivity=args.sensitivity, on_activation=on_activation_normal, on_prediction=on_prediction) elif (line == "start safe visual"): audio_buffer = np.zeros(listener.pr.buffer_samples, dtype=float) engine.get_prediction = get_prediction runner = PreciseRunner(engine, args.trigger_level, sensitivity=args.sensitivity, on_activation=on_activation_safe, on_prediction=on_prediction) runner.start() elif (line == "stop"): runner.stop() elif (line == "running"): print(runner.running)
import keras from keras.models import Sequential from keras.models import load_model from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D from keras.utils import to_categorical import numpy as np import os import librosa from keras import backend as K from tensorflow.python.framework import graph_util from tensorflow.python.framework import graph_io from sonopy import mfcc_spec, chop_array, power_spec, filterbanks, safe_log, dct from precise.network_runner import Listener samples, sample_rate = librosa.load("/tmp/fixed.wav", sr=16000) listener = Listener("qqq.pb", -1) copy = samples[:] for i in (4096, 4096, 4096, 3532, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096): chunk = copy[:i] print(listener.update(chunk)) copy = copy[i:] # This code produces 21 outputs # bakerloo produces 75. Hmm.