async def handle_query( self, query: NluQuery ) -> typing.AsyncIterable[typing.Union[NluIntentParsed, typing.Tuple[ NluIntent, TopicArgs], NluIntentNotRecognized, NluError, ]]: """Do intent recognition.""" original_input = query.input try: if not self.intent_graph and self.graph_path and self.graph_path.is_file( ): # Load graph from file _LOGGER.debug("Loading %s", self.graph_path) with open(self.graph_path, mode="rb") as graph_file: self.intent_graph = rhasspynlu.gzip_pickle_to_graph( graph_file) if self.intent_graph: def intent_filter(intent_name: str) -> bool: """Filter out intents.""" if query.intent_filter: return intent_name in query.intent_filter return True # Replace digits with words if self.replace_numbers: # Have to assume whitespace tokenization words = rhasspynlu.replace_numbers(query.input.split(), self.language) query.input = " ".join(words) input_text = query.input # Fix casing for output event if self.word_transform: input_text = self.word_transform(input_text) if self.failure_token and (self.failure_token in query.input.split()): # Failure token was found in input recognitions = [] else: # Pass in raw query input so raw values will be correct recognitions = recognize( query.input, self.intent_graph, intent_filter=intent_filter, word_transform=self.word_transform, fuzzy=self.fuzzy, extra_converters=self.extra_converters, ) else: _LOGGER.error("No intent graph loaded") recognitions = [] if NluHermesMqtt.is_success(recognitions): # Use first recognition only. recognition = recognitions[0] assert recognition is not None assert recognition.intent is not None intent = Intent( intent_name=recognition.intent.name, confidence_score=recognition.intent.confidence, ) slots = [ Slot( entity=(e.source or e.entity), slot_name=e.entity, confidence=1.0, value=e.value_dict, raw_value=e.raw_value, range=SlotRange( start=e.start, end=e.end, raw_start=e.raw_start, raw_end=e.raw_end, ), ) for e in recognition.entities ] if query.custom_entities: # Copy user-defined entities for entity_name, entity_value in query.custom_entities.items( ): slots.append( Slot( entity=entity_name, confidence=1.0, value={"value": entity_value}, )) # intentParsed yield NluIntentParsed( input=recognition.text, id=query.id, site_id=query.site_id, session_id=query.session_id, intent=intent, slots=slots, ) # intent yield ( NluIntent( input=recognition.text, id=query.id, site_id=query.site_id, session_id=query.session_id, intent=intent, slots=slots, asr_tokens=[ NluIntent.make_asr_tokens(recognition.tokens) ], asr_confidence=query.asr_confidence, raw_input=original_input, wakeword_id=query.wakeword_id, lang=(query.lang or self.lang), custom_data=query.custom_data, ), { "intent_name": recognition.intent.name }, ) else: # Not recognized yield NluIntentNotRecognized( input=query.input, id=query.id, site_id=query.site_id, session_id=query.session_id, custom_data=query.custom_data, ) except Exception as e: _LOGGER.exception("handle_query") yield NluError( site_id=query.site_id, session_id=query.session_id, error=str(e), context=original_input, )
async def recognize(args: argparse.Namespace, core: Voice2JsonCore) -> None: """Recognize intent from sentence(s).""" import networkx as nx import rhasspynlu from .train import WordCasing # Make sure profile has been trained assert core.check_trained(), "Not trained" # Load settings language_code = pydash.get(core.profile, "language.code", "en-US") word_casing = WordCasing( pydash.get(core.profile, "training.word-casing", "ignore").lower()) intent_graph_path = core.ppath("training.intent-graph", "intent.pickle.gz") converters_dir = core.ppath("training.converters-directory", "converters") stop_words_path = core.ppath("intent-recognition.stop-words", "stop_words.txt") fuzzy = pydash.get(core.profile, "intent-recognition.fuzzy", True) # Load stop words stop_words: typing.Optional[typing.Set[str]] = None if stop_words_path and stop_words_path.is_file(): stop_words = set() with open(stop_words_path, "r") as stop_words_file: for line in stop_words_file: line = line.strip() if line: stop_words.add(line) # Load converters extra_converters: typing.Optional[typing.Dict[str, typing.Any]] = {} if converters_dir: extra_converters = load_converters(converters_dir) # Case transformation for input words word_transform = None if word_casing == WordCasing.UPPER: word_transform = str.upper elif word_casing == WordCasing.LOWER: word_transform = str.lower if args.sentence: sentences = args.sentence else: if os.isatty(sys.stdin.fileno()): print("Reading sentences from stdin", file=sys.stderr) sentences = sys.stdin # Whitelist function for intents if args.intent_filter: args.intent_filter = set(args.intent_filter) def intent_filter(intent_name: str) -> bool: """Filter out intents.""" if args.intent_filter: return intent_name in args.intent_filter return True # Load intent graph _LOGGER.debug("Loading %s", intent_graph_path) with gzip.GzipFile(intent_graph_path, mode="rb") as graph_gzip: intent_graph = nx.readwrite.gpickle.read_gpickle(graph_gzip) # Process sentences try: for sentence in sentences: if args.text_input: # Input is plain text text = sentence sentence_object = {"text": text} else: # Input is JSON sentence_object = json.loads(sentence) text = sentence_object.get(args.transcription_property, "") # Tokenize text = text.strip() tokens = text.split() if args.replace_numbers: tokens = list( rhasspynlu.replace_numbers(tokens, language=language_code)) # Recognize intent recognitions = rhasspynlu.recognize( tokens, intent_graph, fuzzy=fuzzy, stop_words=stop_words, word_transform=word_transform, extra_converters=extra_converters, intent_filter=intent_filter, ) if recognitions: # Use first recognition recognition = recognitions[0] else: # Recognition failure recognition = rhasspynlu.intent.Recognition.empty() result = dataclasses.asdict(recognition) # Add slots result["slots"] = {e.entity: e.value for e in recognition.entities} # Merge with input object for key, value in result.items(): if (key not in sentence_object) or (value is not None): sentence_object[key] = value if not sentence_object["text"]: sentence_object["text"] = text # Keep text from transcription sentence_object["raw_text"] = text if args.perplexity: # Compute perplexity of input text for one or more language # models (stored in FST binary format). perplexity = {} for lm_fst_path in args.perplexity: try: perplexity[ lm_fst_path] = rhasspynlu.arpa_lm.get_perplexity( text, lm_fst_path, debug=args.debug) except Exception: _LOGGER.exception(lm_fst_path) sentence_object["perplexity"] = perplexity print_json(sentence_object) except KeyboardInterrupt: pass
async def handle_query( self, query: NluQuery ) -> typing.AsyncIterable[typing.Union[ NluIntentParsed, NluIntentNotRecognized, NluError, ]]: """Do intent recognition.""" try: # Replace digits with words if self.replace_numbers: # Have to assume whitespace tokenization words = rhasspynlu.replace_numbers(query.input.split(), self.number_language) query.input = " ".join(words) input_text = query.input # Fix casing for output event if self.word_transform: input_text = self.word_transform(input_text) parse_url = urljoin(self.rasa_url, "model/parse") _LOGGER.debug(parse_url) async with self.http_session.post( parse_url, json={ "text": input_text, "project": self.rasa_project }, ssl=self.ssl_context, ) as response: response.raise_for_status() intent_json = await response.json() intent = intent_json.get("intent", {}) intent_name = intent.get("name", "") if intent_name and (query.intent_filter is None or intent_name in query.intent_filter): confidence_score = float(intent.get("confidence", 0.0)) slots = [ Slot( entity=e.get("entity", ""), slot_name=e.get("entity", ""), confidence=float(e.get("confidence", 0.0)), value={ "kind": "Unknown", "value": e.get("value", ""), "additional_info": e.get("additional_info", {}), "extractor": e.get("extractor", None), }, raw_value=e.get("value", ""), range=SlotRange( start=int(e.get("start", 0)), end=int(e.get("end", 1)), raw_start=int(e.get("start", 0)), raw_end=int(e.get("end", 1)), ), ) for e in intent_json.get("entities", []) ] # intentParsed yield NluIntentParsed( input=input_text, id=query.id, site_id=query.site_id, session_id=query.session_id, intent=Intent(intent_name=intent_name, confidence_score=confidence_score), slots=slots, ) else: # Not recognized yield NluIntentNotRecognized( input=query.input, id=query.id, site_id=query.site_id, session_id=query.session_id, ) except Exception as e: _LOGGER.exception("nlu query") yield NluError( site_id=query.site_id, session_id=query.session_id, error=str(e), context=query.input, )
async def handle_query( self, query: NluQuery ) -> typing.AsyncIterable[typing.Union[NluIntentParsed, typing.Tuple[ NluIntent, TopicArgs], NluIntentNotRecognized, NluError, ]]: """Do intent recognition.""" # Check intent graph try: if (not self.intent_graph and self.intent_graph_path and self.intent_graph_path.is_file()): _LOGGER.debug("Loading %s", self.intent_graph_path) with open(self.intent_graph_path, mode="rb") as graph_file: self.intent_graph = rhasspynlu.gzip_pickle_to_graph( graph_file) # Check examples if (self.intent_graph and self.examples_path and self.examples_path.is_file()): def intent_filter(intent_name: str) -> bool: """Filter out intents.""" if query.intent_filter: return intent_name in query.intent_filter return True original_text = query.input # Replace digits with words if self.replace_numbers: # Have to assume whitespace tokenization words = rhasspynlu.replace_numbers(query.input.split(), self.language) query.input = " ".join(words) input_text = query.input # Fix casing if self.word_transform: input_text = self.word_transform(input_text) recognitions: typing.List[rhasspynlu.intent.Recognition] = [] if input_text: recognitions = rhasspyfuzzywuzzy.recognize( input_text, self.intent_graph, str(self.examples_path), intent_filter=intent_filter, extra_converters=self.extra_converters, ) else: _LOGGER.error("No intent graph or examples loaded") recognitions = [] # Use first recognition only if above threshold if (recognitions and recognitions[0] and recognitions[0].intent and (recognitions[0].intent.confidence >= self.confidence_threshold)): recognition = recognitions[0] assert recognition.intent intent = Intent( intent_name=recognition.intent.name, confidence_score=recognition.intent.confidence, ) slots = [ Slot( entity=(e.source or e.entity), slot_name=e.entity, confidence=1.0, value=e.value_dict, raw_value=e.raw_value, range=SlotRange( start=e.start, end=e.end, raw_start=e.raw_start, raw_end=e.raw_end, ), ) for e in recognition.entities ] if query.custom_entities: # Copy user-defined entities for entity_name, entity_value in query.custom_entities.items( ): slots.append( Slot( entity=entity_name, confidence=1.0, value={"value": entity_value}, )) # intentParsed yield NluIntentParsed( input=recognition.text, id=query.id, site_id=query.site_id, session_id=query.session_id, intent=intent, slots=slots, ) # intent yield ( NluIntent( input=recognition.text, id=query.id, site_id=query.site_id, session_id=query.session_id, intent=intent, slots=slots, asr_tokens=[ NluIntent.make_asr_tokens(recognition.tokens) ], asr_confidence=query.asr_confidence, raw_input=original_text, wakeword_id=query.wakeword_id, lang=(query.lang or self.lang), custom_data=query.custom_data, ), { "intent_name": recognition.intent.name }, ) else: # Not recognized yield NluIntentNotRecognized( input=query.input, id=query.id, site_id=query.site_id, session_id=query.session_id, custom_data=query.custom_data, ) except Exception as e: _LOGGER.exception("handle_query") yield NluError( site_id=query.site_id, session_id=query.session_id, error=str(e), context=original_text, )
async def handle_query( self, query: NluQuery ) -> typing.AsyncIterable[typing.Union[NluIntentParsed, typing.Tuple[ NluIntent, TopicArgs], NluIntentNotRecognized, NluError, ]]: """Do intent recognition.""" try: original_input = query.input # Replace digits with words if self.replace_numbers: # Have to assume whitespace tokenization words = rhasspynlu.replace_numbers(query.input.split(), self.number_language) query.input = " ".join(words) input_text = query.input # Fix casing for output event if self.word_transform: input_text = self.word_transform(input_text) parse_url = urljoin(self.rasa_url, "model/parse") _LOGGER.debug(parse_url) async with self.http_session.post( parse_url, json={ "text": input_text, "project": self.rasa_project }, ssl=self.ssl_context, ) as response: response.raise_for_status() intent_json = await response.json() intent = intent_json.get("intent", {}) intent_name = intent.get("name", "") if intent_name and (query.intent_filter is None or intent_name in query.intent_filter): confidence_score = float(intent.get("confidence", 0.0)) slots = [ Slot( entity=e.get("entity", ""), slot_name=e.get("entity", ""), confidence=float(e.get("confidence", 0.0)), value={ "kind": "Unknown", "value": e.get("value", "") }, raw_value=e.get("value", ""), range=SlotRange( start=int(e.get("start", 0)), end=int(e.get("end", 1)), raw_start=int(e.get("start", 0)), raw_end=int(e.get("end", 1)), ), ) for e in intent_json.get("entities", []) ] if query.custom_entities: # Copy user-defined entities for entity_name, entity_value in query.custom_entities.items( ): slots.append( Slot( entity=entity_name, confidence=1.0, value={"value": entity_value}, )) # intentParsed yield NluIntentParsed( input=input_text, id=query.id, site_id=query.site_id, session_id=query.session_id, intent=Intent(intent_name=intent_name, confidence_score=confidence_score), slots=slots, ) # intent yield ( NluIntent( input=input_text, id=query.id, site_id=query.site_id, session_id=query.session_id, intent=Intent( intent_name=intent_name, confidence_score=confidence_score, ), slots=slots, asr_tokens=[ NluIntent.make_asr_tokens(input_text.split()) ], asr_confidence=query.asr_confidence, raw_input=original_input, lang=(query.lang or self.lang), custom_data=query.custom_data, ), { "intent_name": intent_name }, ) else: # Not recognized yield NluIntentNotRecognized( input=query.input, id=query.id, site_id=query.site_id, session_id=query.session_id, custom_data=query.custom_data, ) except Exception as e: _LOGGER.exception("nlu query") yield NluError( site_id=query.site_id, session_id=query.session_id, error=str(e), context=query.input, )