def __init__(self, uri): self.uri = uri self.decoder_pipeline = DecoderPipeline(self._on_word, self._on_eos) WebSocketClient.__init__(self, url=uri, heartbeat_freq=10) self.pipeline_initialized = False self.state = self.STATE_CREATED self.last_decoder_message = time.time() self.request_id = "<undefined>" self.timeout_decoder = 5 self.num_segments = 0 self.last_partial_result = "" self.partial_transcript = ""
def main(): logging.basicConfig(level=logging.DEBUG, format="%(levelname)8s %(asctime)s %(message)s ") logging.debug('Starting up worker') parser = argparse.ArgumentParser(description='Worker for kaldigstserver') parser.add_argument('-u', '--uri', default="ws://localhost:8888/worker/ws/speech", dest="uri", help="Server<-->worker websocket URI") parser.add_argument('-f', '--fork', default=1, dest="fork", type=int) parser.add_argument('-c', '--conf', dest="conf", help="YAML file with decoder configuration") args = parser.parse_args() if args.fork > 1: logging.info("Forking into %d processes" % args.fork) tornado.process.fork_processes(args.fork) conf = {} if args.conf: with open(args.conf) as f: conf = yaml.safe_load(f) if "logging" in conf: logging.config.dictConfig(conf["logging"]) # fork off the post-processors before we load the model into memory post_processor = None if "post-processor" in conf: STREAM = tornado.process.Subprocess.STREAM post_processor = tornado.process.Subprocess(conf["post-processor"], shell=True, stdin=PIPE, stdout=STREAM) full_post_processor = None if "full-post-processor" in conf: full_post_processor = Popen(conf["full-post-processor"], shell=True, stdin=PIPE, stdout=PIPE) global USE_NNET2 USE_NNET2 = conf.get("use-nnet2", False) global SILENCE_TIMEOUT SILENCE_TIMEOUT = conf.get("silence-timeout", 5) if USE_NNET2: decoder_pipeline = DecoderPipeline2(conf) else: decoder_pipeline = DecoderPipeline(conf) loop = GObject.MainLoop() thread.start_new_thread(loop.run, ()) thread.start_new_thread(tornado.ioloop.IOLoop.instance().start, ()) main_loop(args.uri, decoder_pipeline, post_processor, full_post_processor)
def main(): logging.basicConfig(level=logging.DEBUG, format="%(levelname)8s %(asctime)s %(message)s ") logging.debug('Starting up worker') parser = argparse.ArgumentParser(description='Worker for kaldigstserver') parser.add_argument('-u', '--uri', default="ws://localhost:8888/worker/ws/speech", dest="uri", help="Server<-->worker websocket URI") parser.add_argument('-f', '--fork', default=1, dest="fork", type=int) parser.add_argument('-c', '--conf', dest="conf", help="YAML file with decoder configuration") args = parser.parse_args() if args.fork > 1: import tornado.process logging.info("Forking into %d processes" % args.fork) tornado.process.fork_processes(args.fork) conf = {} if args.conf: with open(args.conf) as f: conf = yaml.safe_load(f) if "logging" in conf: logging.config.dictConfig(conf["logging"]) global USE_NNET2 USE_NNET2 = conf.get("use-nnet2", False) global SILENCE_TIMEOUT SILENCE_TIMEOUT = conf.get("silence-timeout", 5) if USE_NNET2: decoder_pipeline = DecoderPipeline2(conf) else: decoder_pipeline = DecoderPipeline(conf) post_processor = None if "post-processor" in conf: post_processor = Popen(conf["post-processor"], shell=True, stdin=PIPE, stdout=PIPE) full_post_processor = None if "full-post-processor" in conf: full_post_processor = Popen(conf["full-post-processor"], shell=True, stdin=PIPE, stdout=PIPE) loop = GObject.MainLoop() thread.start_new_thread(loop.run, ()) while True: ws = ServerWebsocket(args.uri, decoder_pipeline, post_processor, full_post_processor=full_post_processor) try: logger.info("Opening websocket connection to master server") ws.connect() ws.run_forever() except Exception: logger.error("Couldn't connect to server, waiting for %d seconds", CONNECT_TIMEOUT) time.sleep(CONNECT_TIMEOUT) # fixes a race condition time.sleep(1)
def __init__(self): #logging.basicConfig(level=logging.INFO) # voxforge/tri2b_mmi_b0.05 model: decoder_conf = {"model" : ENGLISH_MODEL_PATH + "final.mdl", "lda-mat" : ENGLISH_MODEL_PATH + "final.mat", "word-syms" : ENGLISH_MODEL_PATH + "words.txt", "fst" : ENGLISH_MODEL_PATH + "HCLG.fst", "silence-phones" : "6"} self.decoder_pipeline = DecoderPipeline({"decoder" : decoder_conf}) self.__class__.words = [] self.__class__.finished = False self.decoder_pipeline.set_word_handler(self.word_getter) self.decoder_pipeline.set_eos_handler(self.set_finished, self.finished) GObject.threads_init() self.loop = GObject.MainLoop() self.gi_thread = Thread(target=self.loop.run, args=()) self.gi_thread.start()
def setUpClass(cls): decoder_conf = { "model": "test/models/estonian/tri2b_mmi_pruned/final.mdl", "lda-mat": "test/models/estonian/tri2b_mmi_pruned/final.mat", "word-syms": "test/models/estonian/tri2b_mmi_pruned/words.txt", "fst": "test/models/estonian/tri2b_mmi_pruned/HCLG.fst", "silence-phones": "6" } cls.decoder_pipeline = DecoderPipeline({"decoder": decoder_conf}) cls.words = [] cls.finished = False cls.decoder_pipeline.set_word_handler(cls.word_getter) cls.decoder_pipeline.set_eos_handler(cls.set_finished, cls.finished) loop = GObject.MainLoop() thread.start_new_thread(loop.run, ())
def worker_thread(uri, conf): # fork off the post-processors before we load the model into memory post_processor = None if "post-processor" in conf: STREAM = tornado.process.Subprocess.STREAM post_processor = tornado.process.Subprocess(conf["post-processor"], shell=True, stdin=PIPE, stdout=STREAM) full_post_processor = None if "full-post-processor" in conf: full_post_processor = Popen(conf["full-post-processor"], shell=True, stdin=PIPE, stdout=PIPE) if USE_NNET2: decoder_pipeline = DecoderPipeline2(conf) else: decoder_pipeline = DecoderPipeline(conf) main_loop(uri, decoder_pipeline, post_processor, full_post_processor)
def main(): logging.basicConfig(level=logging.DEBUG, format="%(levelname)8s %(asctime)s %(message)s ") logging.debug('Starting up worker') parser = argparse.ArgumentParser(description='Worker for kaldigstserver') parser.add_argument('-u', '--uri', default="ws://localhost:8888/worker/ws/speech", dest="uri", help="Server<-->worker websocket URI") parser.add_argument('-f', '--fork', default=1, dest="fork", type=int) parser.add_argument('-c', '--conf', dest="conf", help="YAML file with decoder configuration") parser.add_argument( '-s', '--saver', dest="saver", default="GCS", help="""Platform for saving utterances ( \"gcs\" or \"filesystem\"""") parser.add_argument( '-p', '--path', dest="savepath", default="pagoda_utterances", help="""Path on the chosen platform where utterances will \ be saved (bucket name for GCS, local folder for filesystem""") args = parser.parse_args() if args.fork > 1: logging.info("Forking into %d processes" % args.fork) tornado.process.fork_processes(args.fork) saver = args.saver.lower() if saver == "gcs": saver = GCSSaver(args.savepath) elif saver == "filesystem": saver = FSSaver(args.savepath) conf = {} if args.conf: with open(args.conf) as f: conf = yaml.safe_load(f) if "logging" in conf: logging.config.dictConfig(conf["logging"]) # fork off the post-processors before we load the model into memory tornado.process.Subprocess.initialize() post_processor = None if "post-processor" in conf: STREAM = tornado.process.Subprocess.STREAM post_processor = tornado.process.Subprocess(conf["post-processor"], shell=True, stdin=PIPE, stdout=STREAM) full_post_processor = None if "full-post-processor" in conf: full_post_processor = Popen(conf["full-post-processor"], shell=True, stdin=PIPE, stdout=PIPE) global USE_NNET2 USE_NNET2 = conf.get("use-nnet2", False) global SILENCE_TIMEOUT SILENCE_TIMEOUT = conf.get("silence-timeout", 5) if USE_NNET2: decoder_pipeline = DecoderPipeline2(conf) else: decoder_pipeline = DecoderPipeline(conf) loop = GObject.MainLoop() thread.start_new_thread(loop.run, ()) thread.start_new_thread(main_loop, (args.uri, saver, decoder_pipeline, post_processor, full_post_processor)) tornado.ioloop.IOLoop.current().start()
def main(): logging.basicConfig(level=logging.DEBUG, format="%(levelname)8s %(asctime)s %(message)s ") logging.debug('Starting up worker') parser = argparse.ArgumentParser(description='Worker for kaldigstserver') parser.add_argument('-u', '--uri', default="ws://localhost:8888/worker/ws/speech", dest="uri", help="Server<-->worker websocket URI") parser.add_argument('-f', '--fork', default=1, dest="fork", type=int) parser.add_argument('-c', '--conf', dest="conf", help="YAML file with decoder configuration") args = parser.parse_args() if args.fork > 1: logging.info("Forking into %d processes" % args.fork) tornado.process.fork_processes( args.fork ) # starts multiple worker processes (no shared memory between any server code) conf = {} if args.conf: with open(args.conf) as f: conf = yaml.safe_load( f ) # parse the first YAML document in a stream and produce the corresponding Python object if "logging" in conf: logging.config.dictConfig(conf["logging"]) # fork off the post-processors before we load the model into memory tornado.process.Subprocess.initialize( ) # initializes the SIGCHLD signal handler (run on an .IOLoop to avoid locking issues) post_processor = None if "post-processor" in conf: STREAM = tornado.process.Subprocess.STREAM # makes the corresponding attribute of the resulting Subprocess a .PipeIOStream # (the caller is responsible for closing the streams) post_processor = tornado.process.Subprocess(conf["post-processor"], shell=True, stdin=PIPE, stdout=STREAM) # PIPE indicates that a new pipe to the child should be created # since shell=True, post-processor command will be execusted through the shell full_post_processor = None if "full-post-processor" in conf: full_post_processor = Popen(conf["full-post-processor"], shell=True, stdin=PIPE, stdout=PIPE) global USE_NNET2 USE_NNET2 = conf.get( "use-nnet2", False) # get "use-nnet" value, if not available, set to "False" global SILENCE_TIMEOUT SILENCE_TIMEOUT = conf.get( "silence-timeout", 5) # get "silence-timeout" value, if not available, set to "5" if USE_NNET2: decoder_pipeline = DecoderPipeline2(conf) else: decoder_pipeline = DecoderPipeline(conf) loop = GObject.MainLoop() # main event loop thread.start_new_thread(loop.run, ()) thread.start_new_thread( main_loop, (args.uri, decoder_pipeline, post_processor, full_post_processor)) tornado.ioloop.IOLoop.current().start( ) # I/O event loop for non-blocking sockets
class KaldiRecognizer(): def __init__(self): #logging.basicConfig(level=logging.INFO) # voxforge/tri2b_mmi_b0.05 model: decoder_conf = { "model": ENGLISH_MODEL_PATH + "final.mdl", "lda-mat": ENGLISH_MODEL_PATH + "final.mat", "word-syms": ENGLISH_MODEL_PATH + "words.txt", "fst": ENGLISH_MODEL_PATH + "HCLG.fst", "silence-phones": "6" } self.decoder_pipeline = DecoderPipeline({"decoder": decoder_conf}) self.__class__.words = [] self.__class__.finished = False self.decoder_pipeline.set_word_handler(self.word_getter) self.decoder_pipeline.set_eos_handler(self.set_finished, self.finished) GObject.threads_init() self.loop = GObject.MainLoop() self.gi_thread = Thread(target=self.loop.run, args=()) self.gi_thread.start() @classmethod def word_getter(self, word): self.words.append(word) @classmethod def set_finished(self, finished): self.finished = True def reset(self): self.__class__.words = [] self.__class__.finished = False def recognize(self, args): with noalsaerr(): p = pyaudio.PyAudio() # Create a PyAudio session # Create a stream stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, output=True, frames_per_buffer=CHUNK) try: data = stream.read( CHUNK) # Get first data frame from the microphone # Loop over the frames of the audio / data chunks while data != '': rms = audioop.rms( data, 2) # Calculate Root Mean Square of current chunk if rms >= THRESHOLD: # If Root Mean Square value is greater than THRESHOLD constant self.decoder_pipeline.init_request( "recognize", "audio/x-raw, layout=(string)interleaved, rate=(int)16000, format=(string)S16LE, channels=(int)1" ) self.decoder_pipeline.process_data(data) silence_counter = 0 # Define silence counter while silence_counter < SILENCE_DETECTION: # While silence counter value less than SILENCE_DETECTION constant data = stream.read( CHUNK) # Read a new chunk from the stream if LISTENING: stream.write(data, CHUNK) self.decoder_pipeline.process_data(data) rms = audioop.rms( data, 2 ) # Calculate Root Mean Square of current chunk again if rms < THRESHOLD: # If Root Mean Square value is less than THRESHOLD constant silence_counter += 1 # Then increase silence counter else: # Else silence_counter = 0 # Assign zero value to silence counter stream.stop_stream() self.decoder_pipeline.end_request() while not self.finished: time.sleep(0.1) stream.start_stream() words = self.words words = [x for x in words if x != '<#s>'] com = ' '.join(words) t = Thread(target=VirtualAssistant.command, args=(com, args)) t.start() self.reset() data = stream.read(CHUNK) # Read a new chunk from the stream if LISTENING: stream.write(data, CHUNK) except KeyboardInterrupt: stream.stop_stream() stream.close() p.terminate() self.loop.quit() raise KeyboardInterrupt
class ServerWebsocket(WebSocketClient): STATE_CREATED = 0 STATE_CONNECTED = 1 STATE_INITIALIZED = 2 STATE_PROCESSING = 3 STATE_EOS_RECEIVED = 7 STATE_CANCELLING = 8 STATE_FINISHED = 100 def __init__(self, uri): self.uri = uri self.decoder_pipeline = DecoderPipeline(self._on_word, self._on_eos) WebSocketClient.__init__(self, url=uri, heartbeat_freq=10) self.pipeline_initialized = False self.state = self.STATE_CREATED self.last_decoder_message = time.time() self.request_id = "<undefined>" self.timeout_decoder = 5 self.num_segments = 0 self.last_partial_result = "" self.partial_transcript = "" def opened(self): logger.info("Opened websocket connection to server") self.state = self.STATE_CONNECTED self.last_partial_result = "" def guard_timeout(self): global SILENCE_TIMEOUT while self.state in [ self.STATE_EOS_RECEIVED, self.STATE_CONNECTED, self.STATE_INITIALIZED, self.STATE_PROCESSING ]: if time.time() - self.last_decoder_message > SILENCE_TIMEOUT: logger.warning( "%s: More than %d seconds from last decoder hypothesis update, cancelling" % (self.request_id, SILENCE_TIMEOUT)) self.finish_request() event = dict(status=common.STATUS_NO_SPEECH) try: self.send(json.dumps(event)) except: logger.warning("%s: Failed to send error event to master" % (self.request_id)) self.close() return logger.debug( "%s: Checking that decoder hasn't been silent for more than %d seconds" % (self.request_id, SILENCE_TIMEOUT)) time.sleep(1) def received_message(self, m): #reviewing #print "Received Message {}".format(m) print "Received Message" logger.debug("%s: Got message from server of type %s" % (self.request_id, str(type(m)))) if self.state == self.__class__.STATE_CONNECTED: props = json.loads(str(m)) content_type = props['content_type'] self.request_id = props['id'] self.num_segments = 0 #self.decoder_pipeline.init_request(self.request_id, content_type) self.last_decoder_message = time.time() #thread.start_new_thread(self.guard_timeout, ()) logger.info("%s: Started timeout guard" % self.request_id) logger.info("%s: Initialized request" % self.request_id) self.state = self.STATE_INITIALIZED elif m.data == "EOS": #end of file indication if self.state != self.STATE_CANCELLING and self.state != self.STATE_EOS_RECEIVED and self.state != self.STATE_FINISHED: self.decoder_pipeline.end_request() self.state = self.STATE_EOS_RECEIVED else: logger.info("%s: Ignoring EOS, worker already in state %d" % (self.request_id, self.state)) else: if self.state != self.STATE_CANCELLING and self.state != self.STATE_EOS_RECEIVED and self.state != self.STATE_FINISHED: if isinstance(m, ws4py.messaging.BinaryMessage): print "Process_data called, size: {}, type: {}".format( len(m.data), type(m.data)) self.decoder_pipeline.process_data(m.data) self.state = self.STATE_PROCESSING else: logger.info("%s: Ignoring data, worker already in state %d" % (self.request_id, self.state)) def finish_request(self): if self.state == self.STATE_CONNECTED: # connection closed when we are not doing anything self.decoder_pipeline.finish_request() self.state = self.STATE_FINISHED return if self.state == self.STATE_INITIALIZED: # connection closed when request initialized but with no data sent self.decoder_pipeline.finish_request() self.state = self.STATE_FINISHED return if self.state != self.STATE_FINISHED: logger.info("%s: Master disconnected before decoder reached EOS?" % self.request_id) self.state = self.STATE_CANCELLING self.decoder_pipeline.cancel() counter = 0 while self.state == self.STATE_CANCELLING: counter += 1 if counter > 30: # lost hope that the decoder will ever finish, likely it has hung # FIXME: this might introduce new bugs logger.info("%s: Giving up waiting after %d tries" % (self.request_id, counter)) self.state = self.STATE_FINISHED else: logger.info("%s: Waiting for EOS from decoder" % self.request_id) time.sleep(1) self.decoder_pipeline.finish_request() logger.info("%s: Finished waiting for EOS" % self.request_id) def closed(self, code, reason=None): #done logger.debug("%s: Websocket closed() called" % self.request_id) self.finish_request() logger.debug("%s: Websocket closed() finished" % self.request_id) def _on_word(self, word): #done self.last_decoder_message = time.time() if word != "<#s>": if len(self.partial_transcript) > 0: self.partial_transcript += " " self.partial_transcript += word event = dict(status=common.STATUS_SUCCESS, segment=self.num_segments, result=dict(hypotheses=[ dict(transcript=self.partial_transcript) ], final=False)) self.send(json.dumps(event)) else: #TODO word=<#s> : never called event = dict(status=common.STATUS_SUCCESS, segment=self.num_segments, result=dict(hypotheses=[ dict(transcript=self.partial_transcript) ], final=True)) self.send(json.dumps(event)) self.partial_transcript = "" self.num_segments += 1 def _on_eos(self, data=None): #done (called when stream is over) self.last_decoder_message = time.time() self.state = self.STATE_FINISHED self.close()