Exemplos de TextProcessor.set_nltk_model em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: text_processor

Classe / Tipo: TextProcessor

Método / Função: set_nltk_model

Exemplos em hotexamples.com: 1

TextProcessor.set_nltk_model em Python - 1 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de text_processor.TextProcessor.set_nltk_model em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

TextProcessor(18)

process(4)

text_process(4)

process_text(3)

vocab_creator(2)

getProcessedText(2)

get_index(1)

process_web_action_request(1)

validate_link(1)

do_standardize(1)

set_nltk_model(1)

sentence_to_indices(1)

replace_tickers_with_company_names(1)

process_line(1)

generate_lda(1)

get_keywords(1)

create_corpus(1)

load_and_preprocess_text(1)

iterate_over_texts(1)

get_text(1)

get_output(1)

print_topics(1)

Métodos Frequentes

TextProcessor (18)

process (4)

text_process (4)

process_text (3)

vocab_creator (2)

getProcessedText (2)

get_index (1)

process_web_action_request (1)

validate_link (1)

do_standardize (1)

Métodos Frequentes

set_nltk_model (1)

sentence_to_indices (1)

replace_tickers_with_company_names (1)

process_line (1)

generate_lda (1)

get_keywords (1)

create_corpus (1)

load_and_preprocess_text (1)

iterate_over_texts (1)

get_text (1)

get_output (1)

print_topics (1)

Métodos Frequentes

get_output (1)

print_topics (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: audio_processor.py Projeto: smerkousdavid/rem-sphinx

def __worker(self, pipe, l_log): """The core of the STT program, this is the multiprocessed part Note: Multiprocessing will require a pipe between the parent and child subprocess. Since this is the case, the worker subprocess cannot access non-shared variables """ l_log.debug("STT worker started") audio_processor = AudioProcessor( ) # Create a new audio processing object text_processor = TextProcessor( ) # Remember that we can't load the text processor nltk model until the nltk model is set from the client language config = Decoder.default_config( ) # Create a new pocketsphinx decoder with the default configuration, which is English decoder = None nltk_model = None mutex_flags = {"keyphrases": {"use": False}} shutdown_flags = {"shutdown": False, "decoder": None} def send_json(pipe, to_send): """Internal worker method to send a json through the parent socket Arguments: pipe (:obj: socket): The response pipe to send to the parent process to_send (:obj: dict): A dictionary to be sent to the parent socket """ try: ret = self.__send_buffered( pipe, to_send ) # Send the message passed by argument back to the parent process if not ret[0]: l_log.error( "Failed to send buffered message to the parent process! (err: %s)" % ret[1]) except Exception as err: l_log.error("Failed to send json! (err: %s)" % str(err)) def send_error(pipe, error): """Internal worker method to send a json error through the parent socket Arguments: pipe (:obj: socket): The response pipe to send to the parent process error (str): The string error message to send """ send_json(pipe, {"error": error}) def load_models(pipe, config, models): """Internal worker method to load the language model Note: Some lanaguages take a long time to load. English is by far the fastest language to be loaded as a model. Arguments: pipe (:obj: socket): The response pipe to send to the parent process models (dict): The language and nltk models developed by the parent process Returns: (Decoder) The STT decoder object and the nltk model """ language_model = models["language_model"] nltk_model = models["nltk_model"] if False in [ language_model.is_valid_model(), nltk_model.is_valid_model() ]: l_log.error("The language model %s is invalid!" % str(language_model.name)) send_error(pipe, "Failed loading language model!") return # Load the model configurations into pocketsphinx config.set_string('-hmm', str(language_model.hmm)) config.set_string('-lm', str(language_model.lm)) config.set_string('-dict', str(language_model.dict)) decoder = Decoder(config) send_json( pipe, {"success": True}) # Send a success message to the client l_log.debug("Set the language model to %s" % str(language_model.name)) return decoder, nltk_model # Return the new decoder and nltk model def process_text(pipe, text, is_final, args): """Internal worker method to process the Speech To Text phrase Arguments: pipe (:obj: socket): The response pipe to send to the parent process text (str): The spoken text to further process is_final (boo): If the text being processed is the final text else it's a partial result args (dict): Any other flags specifically required for a final or partial speech result """ generate_keyphrases = mutex_flags["keyphrases"]["use"] keyphrases = [] if generate_keyphrases: text_processor.generate_keyphrases( text) # Generate keyphrases from the given text keyphrases_list = text_processor.get_keyphrases() for keyphrase in keyphrases_list: to_append_keyphrase = { "score": keyphrase[0], "keyphrase": keyphrase[1] } keyphrases.append(to_append_keyphrase) else: keyphrases = text # Don't do any processing and just pass the text into the keyphrases # Generate the json to be sent back to the client hypothesis_results = args hypothesis_results["keyphrases"] = generate_keyphrases if is_final: hypothesis_results["hypothesis"] = keyphrases else: hypothesis_results["partial_hypothesis"] = keyphrases print(hypothesis_results) # Send the results back to the client send_json(pipe, hypothesis_results) def start_audio(pipe, decoder, args): """Internal worker method to start the audio processing chunk sequence Note: This must be called before the process_audio method or the STT engine will not process the audio chunks Arguments: pipe (:obj: socket): The response pipe to send to the parent process decoder (Decoder): The pocketsphinx decoder to control the STT engine args (dict): All of the available arguments passed by the parent process """ if decoder is None: l_log.error("Language model is not loaded") send_error(pipe, "Language model not loaded!") send_json(pipe, {"decoder": False}) return l_log.debug("Starting the audio processing...") decoder.start_utt() # Start the pocketsphinx listener # Tell the client that the decoder has successfully been loaded send_json(pipe, {"decoder": True}) def process_audio(pipe, decoder, args): """Internal worker method to process an audio chunk Note: The audio chunk is expected to be in base64 format Arguments: pipe (:obj: socket): The response pipe to send to the parent process decoder (Decoder): The pocketsphinx decoder to control the STT engine args (dict): All of the available arguments passed by the parent process """ if decoder is None: l_log.error("Language model is not loaded") send_error(pipe, "Language model not loaded!") return l_log.debug("Processing audio chunk!") audio_chunk = args["audio"] # Retrieve the audio data processed_wav = audio_processor.process_chunk( audio_chunk) # Process the base64 wrapped audio data l_log.debug("Recognizing speech...") decoder.process_raw( processed_wav, False, False) # Process the audio chunk through the STT engine hypothesis = decoder.hyp() # Get pocketshpinx's hypothesis # Send back the results of the decoding if hypothesis is None: l_log.debug("Silence detected") send_json(pipe, { "partial_silence": True, "partial_hypothesis": None }) else: hypothesis_results = { "partial_silence": False if len(hypothesis.hypstr) > 0 else True, } l_log.debug("Partial speech detected: %s" % str(hypothesis.hypstr)) process_text(pipe, hypothesis.hypstr, False, hypothesis_results) l_log.debug("Done decoding speech from audio chunk!") def stop_audio(pipe, decoder, args): """Internal worker method to stop the audio processing chunk sequence Note: This must be called after the process_audio method or the STT engine will continue to listen for audio chunks Arguments: pipe (:obj: socket): The response pipe to send to the parent process decoder (Decoder): The pocketsphinx decoder to control the STT engine args (dict): All of the available arguments passed by the parent process """ if decoder is None: l_log.error("Language model is not loaded") send_error(pipe, "Language model not loaded!") send_json({"decoder": False}) return l_log.debug("Stopping the audio processing...") decoder.end_utt() # Stop the pocketsphinx listener l_log.debug("Done recognizing speech!") hypothesis = decoder.hyp() # Get pocketshpinx's hypothesis logmath = decoder.get_logmath() # Send back the results of the decoding if hypothesis is None: l_log.debug("Silence detected") send_json(pipe, {"silence": True, "hypothesis": None}) else: hypothesis_results = { "silence": False if len(hypothesis.hypstr) > 0 else True, "score": hypothesis.best_score, "confidence": logmath.exp(hypothesis.prob) } l_log.debug("Speech detected: %s" % str(hypothesis.hypstr)) process_text(pipe, hypothesis.hypstr, True, hypothesis_results) def shutdown_thread(self, l_log): """Worker method to handle the checking of a shutdown call Note: To reduce overhead, this thread will only be called every 100 milliseconds """ while not shutdown_flags["shutdown"]: try: if self._shutdown_event.is_set(): l_log.debug("Shutting down worker thread!") shutdown_flags["shutdown"] = True # Exit the main loop if shutdown_flags["decoder"] is not None: try: shutdown_flags["decoder"].end_utt() except Exception as err: l_log.debug( "STT decoder object returned a non-zero status" ) else: l_log.warning( "The decoder object is already None!") break sleep(0.1) except Exception as err: l_log.error( "Failed shutting down worker thread! (err: %s)" % str(err)) shutdown_t = Thread(target=shutdown_thread, args=( self, l_log, )) shutdown_t.setDaemon(True) shutdown_t.start() p_out, p_in = pipe while not shutdown_flags["shutdown"]: try: try: command = self.__get_buffered( p_out) # Wait for a command from the parent process if "set_models" in command[ "exec"]: # Check to see if our command is to decoder, nltk_model = load_models( p_out, config, command["args"]) text_processor.set_nltk_model( nltk_model) # Set the text processor nltk model shutdown_flags["decoder"] = decoder elif "start_audio" in command["exec"]: start_audio(p_out, decoder, command["args"]) elif "process_audio" in command["exec"]: process_audio(p_out, decoder, command["args"]) elif "stop_audio" in command["exec"]: stop_audio(p_out, decoder, command["args"]) elif "set_keyphrases" in command["exec"]: mutex_flags["keyphrases"] = command["args"] else: l_log.error("Invalid command %s" % str(command)) send_error(socket, "Invalid command!") except (EOFError, IOError) as err: continue except Exception as err: l_log.error( "Failed recieving command from subprocess (id: %d) (err: %s)" % (current_process().pid, str(err)))