Exemplo n.º 1
0
class KaldiRecognizer():
    def __init__(self):
        #logging.basicConfig(level=logging.INFO)

        # voxforge/tri2b_mmi_b0.05 model:
        decoder_conf = {
            "model": ENGLISH_MODEL_PATH + "final.mdl",
            "lda-mat": ENGLISH_MODEL_PATH + "final.mat",
            "word-syms": ENGLISH_MODEL_PATH + "words.txt",
            "fst": ENGLISH_MODEL_PATH + "HCLG.fst",
            "silence-phones": "6"
        }
        self.decoder_pipeline = DecoderPipeline({"decoder": decoder_conf})
        self.__class__.words = []
        self.__class__.finished = False

        self.decoder_pipeline.set_word_handler(self.word_getter)
        self.decoder_pipeline.set_eos_handler(self.set_finished, self.finished)

        GObject.threads_init()
        self.loop = GObject.MainLoop()
        self.gi_thread = Thread(target=self.loop.run, args=())
        self.gi_thread.start()

    @classmethod
    def word_getter(self, word):
        self.words.append(word)

    @classmethod
    def set_finished(self, finished):
        self.finished = True

    def reset(self):
        self.__class__.words = []
        self.__class__.finished = False

    def recognize(self, args):

        with noalsaerr():
            p = pyaudio.PyAudio()  # Create a PyAudio session
        # Create a stream
        stream = p.open(format=FORMAT,
                        channels=CHANNELS,
                        rate=RATE,
                        input=True,
                        output=True,
                        frames_per_buffer=CHUNK)

        try:
            data = stream.read(
                CHUNK)  # Get first data frame from the microphone
            # Loop over the frames of the audio / data chunks
            while data != '':
                rms = audioop.rms(
                    data, 2)  # Calculate Root Mean Square of current chunk
                if rms >= THRESHOLD:  # If Root Mean Square value is greater than THRESHOLD constant
                    self.decoder_pipeline.init_request(
                        "recognize",
                        "audio/x-raw, layout=(string)interleaved, rate=(int)16000, format=(string)S16LE, channels=(int)1"
                    )
                    self.decoder_pipeline.process_data(data)
                    silence_counter = 0  # Define silence counter
                    while silence_counter < SILENCE_DETECTION:  # While silence counter value less than SILENCE_DETECTION constant
                        data = stream.read(
                            CHUNK)  # Read a new chunk from the stream
                        if LISTENING: stream.write(data, CHUNK)
                        self.decoder_pipeline.process_data(data)

                        rms = audioop.rms(
                            data, 2
                        )  # Calculate Root Mean Square of current chunk again
                        if rms < THRESHOLD:  # If Root Mean Square value is less than THRESHOLD constant
                            silence_counter += 1  # Then increase silence counter
                        else:  # Else
                            silence_counter = 0  # Assign zero value to silence counter

                    stream.stop_stream()
                    self.decoder_pipeline.end_request()
                    while not self.finished:
                        time.sleep(0.1)
                    stream.start_stream()
                    words = self.words
                    words = [x for x in words if x != '<#s>']
                    com = ' '.join(words)
                    t = Thread(target=VirtualAssistant.command,
                               args=(com, args))
                    t.start()
                    self.reset()

                data = stream.read(CHUNK)  # Read a new chunk from the stream
                if LISTENING: stream.write(data, CHUNK)

        except KeyboardInterrupt:
            stream.stop_stream()
            stream.close()
            p.terminate()
            self.loop.quit()
            raise KeyboardInterrupt
Exemplo n.º 2
0
class ServerWebsocket(WebSocketClient):
    STATE_CREATED = 0
    STATE_CONNECTED = 1
    STATE_INITIALIZED = 2
    STATE_PROCESSING = 3
    STATE_EOS_RECEIVED = 7
    STATE_CANCELLING = 8
    STATE_FINISHED = 100

    def __init__(self, uri):
        self.uri = uri
        self.decoder_pipeline = DecoderPipeline(self._on_word, self._on_eos)

        WebSocketClient.__init__(self, url=uri, heartbeat_freq=10)
        self.pipeline_initialized = False

        self.state = self.STATE_CREATED
        self.last_decoder_message = time.time()
        self.request_id = "<undefined>"
        self.timeout_decoder = 5
        self.num_segments = 0
        self.last_partial_result = ""
        self.partial_transcript = ""

    def opened(self):
        logger.info("Opened websocket connection to server")
        self.state = self.STATE_CONNECTED
        self.last_partial_result = ""

    def guard_timeout(self):
        global SILENCE_TIMEOUT
        while self.state in [
                self.STATE_EOS_RECEIVED, self.STATE_CONNECTED,
                self.STATE_INITIALIZED, self.STATE_PROCESSING
        ]:
            if time.time() - self.last_decoder_message > SILENCE_TIMEOUT:
                logger.warning(
                    "%s: More than %d seconds from last decoder hypothesis update, cancelling"
                    % (self.request_id, SILENCE_TIMEOUT))
                self.finish_request()
                event = dict(status=common.STATUS_NO_SPEECH)
                try:
                    self.send(json.dumps(event))
                except:
                    logger.warning("%s: Failed to send error event to master" %
                                   (self.request_id))
                self.close()
                return
            logger.debug(
                "%s: Checking that decoder hasn't been silent for more than %d seconds"
                % (self.request_id, SILENCE_TIMEOUT))
            time.sleep(1)

    def received_message(self, m):  #reviewing

        #print "Received Message {}".format(m)
        print "Received Message"

        logger.debug("%s: Got message from server of type %s" %
                     (self.request_id, str(type(m))))
        if self.state == self.__class__.STATE_CONNECTED:
            props = json.loads(str(m))
            content_type = props['content_type']
            self.request_id = props['id']
            self.num_segments = 0
            #self.decoder_pipeline.init_request(self.request_id, content_type)
            self.last_decoder_message = time.time()
            #thread.start_new_thread(self.guard_timeout, ())
            logger.info("%s: Started timeout guard" % self.request_id)
            logger.info("%s: Initialized request" % self.request_id)
            self.state = self.STATE_INITIALIZED
        elif m.data == "EOS":  #end of file indication
            if self.state != self.STATE_CANCELLING and self.state != self.STATE_EOS_RECEIVED and self.state != self.STATE_FINISHED:
                self.decoder_pipeline.end_request()
                self.state = self.STATE_EOS_RECEIVED
            else:
                logger.info("%s: Ignoring EOS, worker already in state %d" %
                            (self.request_id, self.state))
        else:
            if self.state != self.STATE_CANCELLING and self.state != self.STATE_EOS_RECEIVED and self.state != self.STATE_FINISHED:
                if isinstance(m, ws4py.messaging.BinaryMessage):
                    print "Process_data called, size: {}, type: {}".format(
                        len(m.data), type(m.data))
                    self.decoder_pipeline.process_data(m.data)
                    self.state = self.STATE_PROCESSING
            else:
                logger.info("%s: Ignoring data, worker already in state %d" %
                            (self.request_id, self.state))

    def finish_request(self):
        if self.state == self.STATE_CONNECTED:
            # connection closed when we are not doing anything
            self.decoder_pipeline.finish_request()
            self.state = self.STATE_FINISHED
            return
        if self.state == self.STATE_INITIALIZED:
            # connection closed when request initialized but with no data sent
            self.decoder_pipeline.finish_request()
            self.state = self.STATE_FINISHED
            return
        if self.state != self.STATE_FINISHED:
            logger.info("%s: Master disconnected before decoder reached EOS?" %
                        self.request_id)
            self.state = self.STATE_CANCELLING
            self.decoder_pipeline.cancel()
            counter = 0
            while self.state == self.STATE_CANCELLING:
                counter += 1
                if counter > 30:
                    # lost hope that the decoder will ever finish, likely it has hung
                    # FIXME: this might introduce new bugs
                    logger.info("%s: Giving up waiting after %d tries" %
                                (self.request_id, counter))
                    self.state = self.STATE_FINISHED
                else:
                    logger.info("%s: Waiting for EOS from decoder" %
                                self.request_id)
                    time.sleep(1)
            self.decoder_pipeline.finish_request()
            logger.info("%s: Finished waiting for EOS" % self.request_id)

    def closed(self, code, reason=None):  #done
        logger.debug("%s: Websocket closed() called" % self.request_id)
        self.finish_request()
        logger.debug("%s: Websocket closed() finished" % self.request_id)

    def _on_word(self, word):  #done
        self.last_decoder_message = time.time()
        if word != "<#s>":
            if len(self.partial_transcript) > 0:
                self.partial_transcript += " "
            self.partial_transcript += word

            event = dict(status=common.STATUS_SUCCESS,
                         segment=self.num_segments,
                         result=dict(hypotheses=[
                             dict(transcript=self.partial_transcript)
                         ],
                                     final=False))
            self.send(json.dumps(event))
        else:  #TODO word=<#s> : never called
            event = dict(status=common.STATUS_SUCCESS,
                         segment=self.num_segments,
                         result=dict(hypotheses=[
                             dict(transcript=self.partial_transcript)
                         ],
                                     final=True))
            self.send(json.dumps(event))
            self.partial_transcript = ""
            self.num_segments += 1

    def _on_eos(self, data=None):  #done (called when stream is over)
        self.last_decoder_message = time.time()
        self.state = self.STATE_FINISHED
        self.close()