Exemplo n.º 1
0
    def __worker(self, pipe, l_log):
        """The core of the STT program, this is the multiprocessed part

        Note:
            Multiprocessing will require a pipe between the parent and child subprocess.
            Since this is the case, the worker subprocess cannot access non-shared variables

        """

        l_log.debug("STT worker started")

        audio_processor = AudioProcessor(
        )  # Create a new audio processing object
        text_processor = TextProcessor(
        )  # Remember that we can't load the text processor nltk model until the nltk model is set from the client language
        config = Decoder.default_config(
        )  # Create a new pocketsphinx decoder with the default configuration, which is English
        decoder = None
        nltk_model = None
        mutex_flags = {"keyphrases": {"use": False}}
        shutdown_flags = {"shutdown": False, "decoder": None}

        def send_json(pipe, to_send):
            """Internal worker method to send a json through the parent socket

            Arguments:
                pipe (:obj: socket): The response pipe to send to the parent process
                to_send (:obj: dict): A dictionary to be sent to the parent socket

            """
            try:
                ret = self.__send_buffered(
                    pipe, to_send
                )  # Send the message passed by argument back to the parent process
                if not ret[0]:
                    l_log.error(
                        "Failed to send buffered message to the parent process! (err: %s)"
                        % ret[1])
            except Exception as err:
                l_log.error("Failed to send json! (err: %s)" % str(err))

        def send_error(pipe, error):
            """Internal worker method to send a json error through the parent socket

            Arguments:
                pipe (:obj: socket): The response pipe to send to the parent process
                error (str): The string error message to send

            """
            send_json(pipe, {"error": error})

        def load_models(pipe, config, models):
            """Internal worker method to load the language model

            Note:
                Some lanaguages take a long time to load. English is by far
                the fastest language to be loaded as a model.
            
            Arguments:
                pipe (:obj: socket): The response pipe to send to the parent process
                models (dict): The language and nltk models developed by the parent process
           
            Returns: (Decoder)
                The STT decoder object and the nltk model

            """

            language_model = models["language_model"]
            nltk_model = models["nltk_model"]

            if False in [
                    language_model.is_valid_model(),
                    nltk_model.is_valid_model()
            ]:
                l_log.error("The language model %s is invalid!" %
                            str(language_model.name))
                send_error(pipe, "Failed loading language model!")
                return

            # Load the model configurations into pocketsphinx
            config.set_string('-hmm', str(language_model.hmm))
            config.set_string('-lm', str(language_model.lm))
            config.set_string('-dict', str(language_model.dict))
            decoder = Decoder(config)

            send_json(
                pipe,
                {"success": True})  # Send a success message to the client

            l_log.debug("Set the language model to %s" %
                        str(language_model.name))

            return decoder, nltk_model  # Return the new decoder and nltk model

        def process_text(pipe, text, is_final, args):
            """Internal worker method to process the Speech To Text phrase

            Arguments:
                pipe (:obj: socket): The response pipe to send to the parent process
                text (str): The spoken text to further process
                is_final (boo): If the text being processed is the final text else it's a partial result
                args (dict): Any other flags specifically required for a final or partial speech result
            """

            generate_keyphrases = mutex_flags["keyphrases"]["use"]
            keyphrases = []

            if generate_keyphrases:
                text_processor.generate_keyphrases(
                    text)  # Generate keyphrases from the given text
                keyphrases_list = text_processor.get_keyphrases()

                for keyphrase in keyphrases_list:
                    to_append_keyphrase = {
                        "score": keyphrase[0],
                        "keyphrase": keyphrase[1]
                    }
                    keyphrases.append(to_append_keyphrase)
            else:
                keyphrases = text  # Don't do any processing and just pass the text into the keyphrases

            # Generate the json to be sent back to the client
            hypothesis_results = args
            hypothesis_results["keyphrases"] = generate_keyphrases
            if is_final:
                hypothesis_results["hypothesis"] = keyphrases
            else:
                hypothesis_results["partial_hypothesis"] = keyphrases

            print(hypothesis_results)

            # Send the results back to the client
            send_json(pipe, hypothesis_results)

        def start_audio(pipe, decoder, args):
            """Internal worker method to start the audio processing chunk sequence

            Note:
                This must be called before the process_audio method or the STT engine will not process the audio chunks

            Arguments:
                pipe (:obj: socket): The response pipe to send to the parent process
                decoder (Decoder): The pocketsphinx decoder to control the STT engine
                args (dict): All of the available arguments passed by the parent process

            """

            if decoder is None:
                l_log.error("Language model is not loaded")
                send_error(pipe, "Language model not loaded!")
                send_json(pipe, {"decoder": False})
                return

            l_log.debug("Starting the audio processing...")

            decoder.start_utt()  # Start the pocketsphinx listener

            # Tell the client that the decoder has successfully been loaded
            send_json(pipe, {"decoder": True})

        def process_audio(pipe, decoder, args):
            """Internal worker method to process an audio chunk

            Note:
                The audio chunk is expected to be in base64 format

            Arguments:
                pipe (:obj: socket): The response pipe to send to the parent process
                decoder (Decoder): The pocketsphinx decoder to control the STT engine
                args (dict): All of the available arguments passed by the parent process

            """
            if decoder is None:
                l_log.error("Language model is not loaded")
                send_error(pipe, "Language model not loaded!")
                return

            l_log.debug("Processing audio chunk!")

            audio_chunk = args["audio"]  # Retrieve the audio data
            processed_wav = audio_processor.process_chunk(
                audio_chunk)  # Process the base64 wrapped audio data

            l_log.debug("Recognizing speech...")

            decoder.process_raw(
                processed_wav, False,
                False)  # Process the audio chunk through the STT engine

            hypothesis = decoder.hyp()  # Get pocketshpinx's hypothesis

            # Send back the results of the decoding
            if hypothesis is None:
                l_log.debug("Silence detected")
                send_json(pipe, {
                    "partial_silence": True,
                    "partial_hypothesis": None
                })
            else:
                hypothesis_results = {
                    "partial_silence":
                    False if len(hypothesis.hypstr) > 0 else True,
                }

                l_log.debug("Partial speech detected: %s" %
                            str(hypothesis.hypstr))
                process_text(pipe, hypothesis.hypstr, False,
                             hypothesis_results)

            l_log.debug("Done decoding speech from audio chunk!")

        def stop_audio(pipe, decoder, args):
            """Internal worker method to stop the audio processing chunk sequence

            Note:
                This must be called after the process_audio method or the STT engine will continue to listen for audio chunks

            Arguments:
                pipe (:obj: socket): The response pipe to send to the parent process
                decoder (Decoder): The pocketsphinx decoder to control the STT engine
                args (dict): All of the available arguments passed by the parent process

            """

            if decoder is None:
                l_log.error("Language model is not loaded")
                send_error(pipe, "Language model not loaded!")
                send_json({"decoder": False})
                return

            l_log.debug("Stopping the audio processing...")

            decoder.end_utt()  # Stop the pocketsphinx listener

            l_log.debug("Done recognizing speech!")

            hypothesis = decoder.hyp()  # Get pocketshpinx's hypothesis
            logmath = decoder.get_logmath()

            # Send back the results of the decoding
            if hypothesis is None:
                l_log.debug("Silence detected")
                send_json(pipe, {"silence": True, "hypothesis": None})
            else:
                hypothesis_results = {
                    "silence": False if len(hypothesis.hypstr) > 0 else True,
                    "score": hypothesis.best_score,
                    "confidence": logmath.exp(hypothesis.prob)
                }

                l_log.debug("Speech detected: %s" % str(hypothesis.hypstr))
                process_text(pipe, hypothesis.hypstr, True, hypothesis_results)

        def shutdown_thread(self, l_log):
            """Worker method to handle the checking of a shutdown call

            Note:
                To reduce overhead, this thread will only be called every 100 milliseconds

            """
            while not shutdown_flags["shutdown"]:
                try:
                    if self._shutdown_event.is_set():
                        l_log.debug("Shutting down worker thread!")
                        shutdown_flags["shutdown"] = True  # Exit the main loop
                        if shutdown_flags["decoder"] is not None:
                            try:
                                shutdown_flags["decoder"].end_utt()
                            except Exception as err:
                                l_log.debug(
                                    "STT decoder object returned a non-zero status"
                                )
                        else:
                            l_log.warning(
                                "The decoder object is already None!")

                        break
                    sleep(0.1)
                except Exception as err:
                    l_log.error(
                        "Failed shutting down worker thread! (err: %s)" %
                        str(err))

        shutdown_t = Thread(target=shutdown_thread, args=(
            self,
            l_log,
        ))
        shutdown_t.setDaemon(True)
        shutdown_t.start()

        p_out, p_in = pipe
        while not shutdown_flags["shutdown"]:
            try:
                try:
                    command = self.__get_buffered(
                        p_out)  # Wait for a command from the parent process
                    if "set_models" in command[
                            "exec"]:  # Check to see if our command is to
                        decoder, nltk_model = load_models(
                            p_out, config, command["args"])
                        text_processor.set_nltk_model(
                            nltk_model)  # Set the text processor nltk model
                        shutdown_flags["decoder"] = decoder
                    elif "start_audio" in command["exec"]:
                        start_audio(p_out, decoder, command["args"])
                    elif "process_audio" in command["exec"]:
                        process_audio(p_out, decoder, command["args"])
                    elif "stop_audio" in command["exec"]:
                        stop_audio(p_out, decoder, command["args"])
                    elif "set_keyphrases" in command["exec"]:
                        mutex_flags["keyphrases"] = command["args"]
                    else:
                        l_log.error("Invalid command %s" % str(command))
                        send_error(socket, "Invalid command!")
                except (EOFError, IOError) as err:
                    continue
            except Exception as err:
                l_log.error(
                    "Failed recieving command from subprocess (id: %d) (err: %s)"
                    % (current_process().pid, str(err)))