示例#1
0
    def in_loaded(self, message: Any, sender: RhasspyActor) -> None:
        """Handle messages in loaded state."""
        if isinstance(message, RecognizeIntent):
            try:
                self.load_graph()

                # Assume lower case, white-space separated tokens
                text = message.text
                tokens = re.split(r"\s+", text)

                if self.profile.get("intent.fsticuffs.ignore_unknown_words", True):
                    # Filter tokens
                    tokens = [w for w in tokens if w in self.words]

                recognitions = recognize(
                    tokens, self.graph, fuzzy=self.fuzzy, stop_words=self.stop_words
                )
                assert recognitions, "No intent recognized"

                # Use first intent
                recognition = recognitions[0]

                # Convert to JSON
                intent = recognition.asdict()
            except Exception:
                self._logger.exception("in_loaded")
                intent = empty_intent()

            intent["speech_confidence"] = message.confidence
            self.send(
                message.receiver or sender,
                IntentRecognized(intent, handle=message.handle),
            )
示例#2
0
    def getIntent(self, text):

        # recognitions = rhasspynlu.recognize("set brightness to two", graph)
        # assert recognitions[0].tokens[-1] == 2
        #
        # recognitions = rhasspynlu.recognize("set brightness to one", graph)
        # assert recognitions[0].tokens[-1] == 1

        recognitions = rhasspynlu.recognize(text, self.graph)
        return recognitions
示例#3
0
    def handle_query(self, query: NluQuery):
        """Do intent recognition."""
        def intent_filter(intent_name: str) -> bool:
            """Filter out intents."""
            if query.intentFilter:
                return intent_name in query.intentFilter
            return True

        recognitions = recognize(query.input,
                                 self.graph,
                                 intent_filter=intent_filter)
        if recognitions:
            # Use first recognition only.
            recognition = recognitions[0]
            assert recognition is not None
            assert recognition.intent is not None

            self.publish(
                NluIntent(
                    input=query.input,
                    id=query.id,
                    siteId=query.siteId,
                    sessionId=query.sessionId,
                    intent=Intent(
                        intentName=recognition.intent.name,
                        confidenceScore=recognition.intent.confidence,
                    ),
                    slots=[
                        Slot(
                            entity=e.entity,
                            slotName=e.entity,
                            confidence=1,
                            value=e.value,
                            raw_value=e.raw_value,
                            range=SlotRange(start=e.raw_start, end=e.raw_end),
                        ) for e in recognition.entities
                    ],
                ),
                intentName=recognition.intent.name,
            )
        else:
            # Not recognized
            self.publish(
                NluIntentNotRecognized(
                    input=query.input,
                    id=query.id,
                    siteId=query.siteId,
                    sessionId=query.sessionId,
                ))
示例#4
0
    async def handle_query(
        self, query: NluQuery
    ) -> typing.AsyncIterable[typing.Union[NluIntentParsed, typing.Tuple[
            NluIntent, TopicArgs], NluIntentNotRecognized, NluError, ]]:
        """Do intent recognition."""
        original_input = query.input

        try:
            if not self.intent_graph and self.graph_path and self.graph_path.is_file(
            ):
                # Load graph from file
                _LOGGER.debug("Loading %s", self.graph_path)
                with open(self.graph_path, mode="rb") as graph_file:
                    self.intent_graph = rhasspynlu.gzip_pickle_to_graph(
                        graph_file)

            if self.intent_graph:

                def intent_filter(intent_name: str) -> bool:
                    """Filter out intents."""
                    if query.intent_filter:
                        return intent_name in query.intent_filter
                    return True

                # Replace digits with words
                if self.replace_numbers:
                    # Have to assume whitespace tokenization
                    words = rhasspynlu.replace_numbers(query.input.split(),
                                                       self.language)
                    query.input = " ".join(words)

                input_text = query.input

                # Fix casing for output event
                if self.word_transform:
                    input_text = self.word_transform(input_text)

                if self.failure_token and (self.failure_token
                                           in query.input.split()):
                    # Failure token was found in input
                    recognitions = []
                else:
                    # Pass in raw query input so raw values will be correct
                    recognitions = recognize(
                        query.input,
                        self.intent_graph,
                        intent_filter=intent_filter,
                        word_transform=self.word_transform,
                        fuzzy=self.fuzzy,
                        extra_converters=self.extra_converters,
                    )
            else:
                _LOGGER.error("No intent graph loaded")
                recognitions = []

            if NluHermesMqtt.is_success(recognitions):
                # Use first recognition only.
                recognition = recognitions[0]
                assert recognition is not None
                assert recognition.intent is not None

                intent = Intent(
                    intent_name=recognition.intent.name,
                    confidence_score=recognition.intent.confidence,
                )
                slots = [
                    Slot(
                        entity=(e.source or e.entity),
                        slot_name=e.entity,
                        confidence=1.0,
                        value=e.value_dict,
                        raw_value=e.raw_value,
                        range=SlotRange(
                            start=e.start,
                            end=e.end,
                            raw_start=e.raw_start,
                            raw_end=e.raw_end,
                        ),
                    ) for e in recognition.entities
                ]

                if query.custom_entities:
                    # Copy user-defined entities
                    for entity_name, entity_value in query.custom_entities.items(
                    ):
                        slots.append(
                            Slot(
                                entity=entity_name,
                                confidence=1.0,
                                value={"value": entity_value},
                            ))

                # intentParsed
                yield NluIntentParsed(
                    input=recognition.text,
                    id=query.id,
                    site_id=query.site_id,
                    session_id=query.session_id,
                    intent=intent,
                    slots=slots,
                )

                # intent
                yield (
                    NluIntent(
                        input=recognition.text,
                        id=query.id,
                        site_id=query.site_id,
                        session_id=query.session_id,
                        intent=intent,
                        slots=slots,
                        asr_tokens=[
                            NluIntent.make_asr_tokens(recognition.tokens)
                        ],
                        asr_confidence=query.asr_confidence,
                        raw_input=original_input,
                        wakeword_id=query.wakeword_id,
                        lang=(query.lang or self.lang),
                        custom_data=query.custom_data,
                    ),
                    {
                        "intent_name": recognition.intent.name
                    },
                )
            else:
                # Not recognized
                yield NluIntentNotRecognized(
                    input=query.input,
                    id=query.id,
                    site_id=query.site_id,
                    session_id=query.session_id,
                    custom_data=query.custom_data,
                )
        except Exception as e:
            _LOGGER.exception("handle_query")
            yield NluError(
                site_id=query.site_id,
                session_id=query.session_id,
                error=str(e),
                context=original_input,
            )
示例#5
0
async def recognize(args: argparse.Namespace, core: Voice2JsonCore) -> None:
    """Recognize intent from sentence(s)."""
    import networkx as nx
    import rhasspynlu
    from .train import WordCasing

    # Make sure profile has been trained
    assert core.check_trained(), "Not trained"

    # Load settings
    language_code = pydash.get(core.profile, "language.code", "en-US")
    word_casing = WordCasing(
        pydash.get(core.profile, "training.word-casing", "ignore").lower())
    intent_graph_path = core.ppath("training.intent-graph", "intent.pickle.gz")
    converters_dir = core.ppath("training.converters-directory", "converters")
    stop_words_path = core.ppath("intent-recognition.stop-words",
                                 "stop_words.txt")
    fuzzy = pydash.get(core.profile, "intent-recognition.fuzzy", True)

    # Load stop words
    stop_words: typing.Optional[typing.Set[str]] = None
    if stop_words_path and stop_words_path.is_file():
        stop_words = set()
        with open(stop_words_path, "r") as stop_words_file:
            for line in stop_words_file:
                line = line.strip()
                if line:
                    stop_words.add(line)

    # Load converters
    extra_converters: typing.Optional[typing.Dict[str, typing.Any]] = {}
    if converters_dir:
        extra_converters = load_converters(converters_dir)

    # Case transformation for input words
    word_transform = None
    if word_casing == WordCasing.UPPER:
        word_transform = str.upper
    elif word_casing == WordCasing.LOWER:
        word_transform = str.lower

    if args.sentence:
        sentences = args.sentence
    else:
        if os.isatty(sys.stdin.fileno()):
            print("Reading sentences from stdin", file=sys.stderr)

        sentences = sys.stdin

    # Whitelist function for intents
    if args.intent_filter:
        args.intent_filter = set(args.intent_filter)

    def intent_filter(intent_name: str) -> bool:
        """Filter out intents."""
        if args.intent_filter:
            return intent_name in args.intent_filter

        return True

    # Load intent graph
    _LOGGER.debug("Loading %s", intent_graph_path)
    with gzip.GzipFile(intent_graph_path, mode="rb") as graph_gzip:
        intent_graph = nx.readwrite.gpickle.read_gpickle(graph_gzip)

    # Process sentences
    try:
        for sentence in sentences:
            if args.text_input:
                # Input is plain text
                text = sentence
                sentence_object = {"text": text}
            else:
                # Input is JSON
                sentence_object = json.loads(sentence)
                text = sentence_object.get(args.transcription_property, "")

            # Tokenize
            text = text.strip()
            tokens = text.split()

            if args.replace_numbers:
                tokens = list(
                    rhasspynlu.replace_numbers(tokens, language=language_code))

            # Recognize intent
            recognitions = rhasspynlu.recognize(
                tokens,
                intent_graph,
                fuzzy=fuzzy,
                stop_words=stop_words,
                word_transform=word_transform,
                extra_converters=extra_converters,
                intent_filter=intent_filter,
            )

            if recognitions:
                # Use first recognition
                recognition = recognitions[0]
            else:
                # Recognition failure
                recognition = rhasspynlu.intent.Recognition.empty()

            result = dataclasses.asdict(recognition)

            # Add slots
            result["slots"] = {e.entity: e.value for e in recognition.entities}

            # Merge with input object
            for key, value in result.items():
                if (key not in sentence_object) or (value is not None):
                    sentence_object[key] = value

            if not sentence_object["text"]:
                sentence_object["text"] = text

            # Keep text from transcription
            sentence_object["raw_text"] = text

            if args.perplexity:
                # Compute perplexity of input text for one or more language
                # models (stored in FST binary format).
                perplexity = {}
                for lm_fst_path in args.perplexity:
                    try:
                        perplexity[
                            lm_fst_path] = rhasspynlu.arpa_lm.get_perplexity(
                                text, lm_fst_path, debug=args.debug)
                    except Exception:
                        _LOGGER.exception(lm_fst_path)

                sentence_object["perplexity"] = perplexity

            print_json(sentence_object)
    except KeyboardInterrupt:
        pass
示例#6
0
def recognize(
    text: str,
    engine: SnipsNLUEngine,
    slots_dict: typing.Optional[typing.Dict[str, typing.List[str]]] = None,
    slot_graphs: typing.Optional[typing.Dict[str, nx.DiGraph]] = None,
    **parse_args,
) -> typing.List[Recognition]:
    """Recognize intent using Snips NLU."""
    result = engine.parse(text, **parse_args)
    intent_name = result.get("intent", {}).get("intentName")

    if not intent_name:
        # Recognition failure
        return []

    slots_dict = slots_dict or {}
    slot_graphs = slot_graphs or {}

    recognition = Recognition(text=text,
                              raw_text=text,
                              intent=Intent(name=intent_name, confidence=1.0))

    # Replace Snips slot values with Rhasspy slot values (substituted)
    for slot in result.get("slots", []):
        slot_name = slot.get("slotName")
        slot_value_dict = slot.get("value", {})
        slot_value = slot_value_dict.get("value")

        entity = Entity(
            entity=slot_name,
            source=slot.get("entity", ""),
            value=slot_value,
            raw_value=slot.get("rawValue", slot_value),
            start=slot["range"]["start"],
            end=slot["range"]["end"],
        )
        recognition.entities.append(entity)

        if (not slot_name) or (not slot_value):
            continue

        slot_graph = slot_graphs.get(slot_name)
        if not slot_graph and (slot_name in slots_dict):
            # Convert slot values to graph
            slot_graph = rhasspynlu.sentences_to_graph({
                slot_name: [
                    rhasspynlu.jsgf.Sentence.parse(slot_line)
                    for slot_line in slots_dict[slot_name]
                    if slot_line.strip()
                ]
            })

            slot_graphs[slot_name] = slot_graph

        entity.tokens = slot_value.split()
        entity.raw_tokens = list(entity.tokens)

        if slot_graph:
            # Pass Snips value through graph
            slot_recognitions = rhasspynlu.recognize(entity.tokens, slot_graph)
            if slot_recognitions:
                # Pull out substituted value and replace in Rhasspy entitiy
                new_slot_value = slot_recognitions[0].text
                entity.value = new_slot_value
                entity.tokens = new_slot_value.split()

    return [recognition]