示例#1
0
    def parse(self,
              text: Text,
              time: Optional[datetime.datetime] = None,
              only_output_properties: bool = True) -> Dict[Text, Any]:
        """Parse the input text, classify it and return pipeline result.

        The pipeline result usually contains intent and entities."""

        if not text:
            # Not all components are able to handle empty strings. So we need
            # to prevent that... This default return will not contain all
            # output attributes of all components, but in the end, no one
            # should pass an empty string in the first place.
            output = self.default_output_attributes()
            output["text"] = ""
            return output

        message = Message(text, self.default_output_attributes(), time=time)

        for component in self.pipeline:
            component.process(message, **self.context)

        output = self.default_output_attributes()
        output.update(
            message.as_dict(only_output_properties=only_output_properties))
        return output
示例#2
0
    def read_from_json(self, js: Dict[Text, Any], **kwargs: Any):
        """Loads training data stored in the WIT.ai data format."""
        from spawn_ai.training_data import Message, TrainingData

        training_examples = []

        for s in js["data"]:
            entities = s.get("entities")
            if entities is None:
                continue
            text = s.get("text")
            intents = [e["value"] for e in entities if e["entity"] == 'intent']
            intent = intents[0].strip("\"") if intents else None

            entities = [e
                        for e in entities
                        if ("start" in e and "end" in e and
                            e["entity"] != 'intent')]
            for e in entities:
                # for some reason wit adds additional quotes around entities
                e["value"] = e["value"].strip("\"")

            data = {}
            if intent:
                data["intent"] = intent
            if entities is not None:
                data["entities"] = entities
            training_examples.append(Message(text, data))
        return TrainingData(training_examples)
示例#3
0
    def _parse_training_example(self, example):
        """Extract entities and synonyms, and convert to plain text."""
        from spawn_ai.training_data import Message

        entities = self._find_entities_in_training_example(example)
        plain_text = re.sub(ent_regex,
                            lambda m: m.groupdict()['entity_text'],
                            example)
        self._add_synonyms(plain_text, entities)
        message = Message(plain_text, {'intent': self.current_title})
        if len(entities) > 0:
            message.set('entities', entities)
        return message
示例#4
0
    def read_from_json(self,
                       js: Dict[Text, Any],
                       **kwargs: Any) -> 'TrainingData':
        """Loads training data stored in the LUIS.ai data format."""
        from spawn_ai.training_data import Message, TrainingData

        training_examples = []
        regex_features = []

        # Simple check to ensure we support this luis data schema version
        if not js["luis_schema_version"].startswith("2"):
            raise Exception("Invalid luis data schema version {}, "
                            "should be 2.x.x. "
                            "Make sure to use the latest luis version "
                            "(e.g. by downloading your data again)."
                            "".format(js["luis_schema_version"]))

        for r in js.get("regex_features", []):
            if r.get("activated", False):
                regex_features.append({"name": r.get("name"),
                                       "pattern": r.get("pattern")})

        for s in js["utterances"]:
            text = s.get("text")
            intent = s.get("intent")
            entities = []
            for e in s.get("entities") or []:
                start, end = e["startPos"], e["endPos"] + 1
                val = text[start:end]
                entities.append({"entity": e["entity"],
                                 "value": val,
                                 "start": start,
                                 "end": end})

            data = {"entities": entities}
            if intent:
                data["intent"] = intent
            training_examples.append(Message(text, data))
        return TrainingData(training_examples, regex_features=regex_features)
示例#5
0
    def filter_trainable_entities(
            self, entity_examples: List[Message]) -> List[Message]:
        """Filters out untrainable entity annotations.

        Creates a copy of entity_examples in which entities that have
        `extractor` set to something other than self.name (e.g. 'ner_crf')
        are removed."""

        filtered = []
        for message in entity_examples:
            entities = []
            for ent in message.get("entities", []):
                extractor = ent.get("extractor")
                if not extractor or extractor == self.name:
                    entities.append(ent)
            data = message.data.copy()
            data['entities'] = entities
            filtered.append(
                Message(text=message.text,
                        data=data,
                        output_properties=message.output_properties,
                        time=message.time))

        return filtered