示例#1
0
文件: utils.py 项目: saonam/rhasspy
    def sample_sentences(intent_name: str, intent_fst_path: str):
        rand_fst = fst.Fst.read_from_string(
            subprocess.check_output(
                ["fstrandgen", f"--npath={num_samples}", intent_fst_path]))

        sentences: List[Dict[str, Any]] = []
        for symbols in fstprintall(rand_fst, exclude_meta=False):
            intent = symbols2intent(symbols)
            sentences.append(intent)

        return sentences
示例#2
0
def make_sentences_by_intent(intent_fst: fst.Fst) -> Dict[str, Any]:
    """Get all sentences from an FST."""
    from rhasspy.train.jsgf2fst import fstprintall, symbols2intent

    # { intent: [ { 'text': ..., 'entities': { ... } }, ... ] }
    sentences_by_intent: Dict[str, Any] = defaultdict(list)

    for symbols in fstprintall(intent_fst, exclude_meta=False):
        intent = symbols2intent(symbols)
        intent_name = intent["intent"]["name"]
        sentences_by_intent[intent_name].append(intent)

    return sentences_by_intent
示例#3
0
    def recognize_fuzzy(self, text: str, eps: str = "<eps>") -> Dict[str, Any]:
        """Do fuzzy breadth-first search on FST as graph."""
        from rhasspy.train.jsgf2fst import symbols2intent

        # Assume lower case, white-space separated tokens
        tokens = re.split(r"\s+", text)

        if self.profile.get("intent.fsticuffs.ignore_unknown_words", True):
            # Filter tokens
            tokens = [w for w in tokens if w in self.words]

        # Only run search if there are any tokens
        intents = []
        if len(tokens) > 0:
            intent_symbols_and_costs = FsticuffsRecognizer._get_symbols_and_costs(
                self.graph, tokens, stop_words=self.stop_words, eps=eps
            )
            for symbols, cost in intent_symbols_and_costs.values():
                intent = symbols2intent(symbols, eps=eps)
                intent["intent"]["confidence"] = (len(tokens) - cost) / len(tokens)
                intents.append(intent)

            intents = sorted(
                intents, key=lambda i: i["intent"]["confidence"], reverse=True
            )

        self._logger.debug("Recognized %s intent(s)", len(intents))

        # Use first intent
        if len(intents) > 0:
            intent = intents[0]

            # Add slots
            intent["slots"] = {}
            for ev in intent["entities"]:
                intent["slots"][ev["entity"]] = ev["value"]

            # Add alternative intents
            intent["intents"] = []
            for other_intent in intents[1:]:
                intent["intents"].append(other_intent)

            self._logger.debug(intents)
        else:
            intent = empty_intent()
            intent["text"] = text

        return intent
示例#4
0
    def train(self, intent_fst: fst.Fst):
        """Train using an external program."""
        self._logger.debug(self.command)

        try:
            # { intent: [ { 'text': ..., 'entities': { ... } }, ... ] }
            sentences_by_intent: Dict[str, Any] = defaultdict(list)

            for symbols in fstprintall(intent_fst, exclude_meta=False):
                intent = symbols2intent(symbols)
                intent_name = intent["intent"]["name"]
                sentences_by_intent[intent_name].append(intent)

            # JSON -> STDIN
            json_input = json.dumps(sentences_by_intent).encode()

            subprocess.run(self.command, input=json_input, check=True)
        except Exception:
            self._logger.exception("train")