示例#1
0
    def train(
        self,
        training_data: TrainingData,
        config: Optional[RasaNLUModelConfig] = None,
        **kwargs: Any,
    ) -> None:
        import mitie

        model_file = kwargs.get("mitie_file")
        if not model_file:
            raise Exception("Can not run MITIE entity extractor without a "
                            "language model. Make sure this component is "
                            "preceeded by the 'MitieNLP' component.")

        trainer = mitie.ner_trainer(model_file)
        trainer.num_threads = kwargs.get("num_threads", 1)
        found_one_entity = False

        # filter out pre-trained entity examples
        filtered_entity_examples = self.filter_trainable_entities(
            training_data.nlu_examples)

        for example in filtered_entity_examples:
            sample = self._prepare_mitie_sample(example)

            found_one_entity = sample.num_entities > 0 or found_one_entity
            trainer.add(sample)

        # Mitie will fail to train if there is not a single entity tagged
        if found_one_entity:
            self.ner = trainer.train()
    def train(self,
              training_data: TrainingData,
              config: RasaNLUModelConfig,
              **kwargs: Any) -> None:
        import mitie

        model_file = kwargs.get("mitie_file")
        if not model_file:
            raise Exception("Can not run MITIE entity extractor without a "
                            "language model. Make sure this component is "
                            "preceeded by the 'MitieNLP' component.")

        trainer = mitie.ner_trainer(model_file)
        trainer.num_threads = kwargs.get("num_threads", 1)
        found_one_entity = False

        # filter out pre-trained entity examples
        filtered_entity_examples = self.filter_trainable_entities(
            training_data.training_examples)

        for example in filtered_entity_examples:
            sample = self._prepare_mitie_sample(example)

            found_one_entity = sample.num_entities > 0 or found_one_entity
            trainer.add(sample)

        # Mitie will fail to train if there is not a single entity tagged
        if found_one_entity:
            self.ner = trainer.train()
    def train(self, training_data, config, **kwargs):
        # type: (TrainingData, RasaNLUConfig) -> None
        import mitie

        trainer = mitie.ner_trainer(config["mitie_file"])
        trainer.num_threads = config["num_threads"]
        found_one_entity = False
        for example in training_data.training_examples:
            text = example.text
            tokens = example.get("tokens")
            sample = mitie.ner_training_instance([t.text for t in tokens])
            for ent in example.get("entities", []):
                try:
                    # if the token is not aligned an exception will be raised
                    start, end = MitieEntityExtractor.find_entity(
                        ent, text, tokens)
                except ValueError as e:
                    logger.warning("Example skipped: {}".format(str(e)))
                    continue
                try:
                    # mitie will raise an exception on malicious input - e.g. on overlapping entities
                    sample.add_entity(list(range(start, end)), ent["entity"])
                except Exception as e:
                    logger.warning(
                        "Failed to add entity example '{}' of sentence '{}'. Reason: {}"
                        .format(str(e), str(text), e))
                    continue
                found_one_entity = True

            trainer.add(sample)
        # Mitie will fail to train if there is not a single entity tagged
        if found_one_entity:
            self.ner = trainer.train()
示例#4
0
    def train(self, training_data, config, **kwargs):
        # type: (TrainingData, RasaNLUConfig) -> None
        import mitie

        trainer = mitie.ner_trainer(config["mitie_file"])
        trainer.num_threads = config["num_threads"]
        found_one_entity = False
        for example in training_data.entity_examples:
            text = example.text
            tokens = example.get("tokens")
            sample = mitie.ner_training_instance([t.text for t in tokens])
            for ent in example.get("entities", []):
                try:
                    start, end = MitieEntityExtractor.find_entity(
                        ent, text, tokens)
                except ValueError as e:
                    logger.warning("Example skipped: {}".format(str(e)))
                    continue
                sample.add_entity(list(range(start, end)), ent["entity"])
                found_one_entity = True

            trainer.add(sample)
        # Mitie will fail to train if there is not a single entity tagged
        if found_one_entity:
            self.ner = trainer.train()
    def train(self, training_data, config, **kwargs):
        # type: (TrainingData, RasaNLUConfig) -> None
        import mitie

        trainer = mitie.ner_trainer(config["mitie_file"])
        trainer.num_threads = config["num_threads"]
        found_one_entity = False
        for example in training_data.entity_examples:
            text = example.text
            tokens = example.get("tokens")
            sample = mitie.ner_training_instance([t.text for t in tokens])
            for ent in example.get("entities", []):
                try:
                    # if the token is not aligned an exception will be raised
                    start, end = MitieEntityExtractor.find_entity(ent, text, tokens)
                except ValueError as e:
                    logger.warning("Example skipped: {}".format(str(e)))
                    continue
                try:
                    # mitie will raise an exception on malicious input - e.g. on overlapping entities
                    sample.add_entity(list(range(start, end)), ent["entity"])
                except Exception as e:
                    logger.warning("Failed to add entity example '{}' of sentence '{}'. Reason: {}".format(
                            str(e), str(text), e))
                    continue
                found_one_entity = True

            trainer.add(sample)
        # Mitie will fail to train if there is not a single entity tagged
        if found_one_entity:
            self.ner = trainer.train()
示例#6
0
 def train(self, corpus):
     utterances = get(corpus, "utterances")
     assert utterances is not None, "No training data available"
     samples, trained_utterances = self.prepare(utterances)
     trainer = ner_trainer(self.__get_model_path())
     trainer.num_threads = 4
     trainer.num_c = int(app_config['C'])
     for sample in samples:
         trainer.add(sample)
     self.model = trainer.train()
     return trained_utterances
def train_entity_extractor(entity_examples, fe_file, max_num_threads):
    trainer = ner_trainer(fe_file)
    trainer.num_threads = max_num_threads
    for example in entity_examples:
        text = example["text"]
        tokens = tokenize(text)
        sample = ner_training_instance(tokens)
        for ent in example["entities"]:
            start, end = find_entity(ent, text)
            sample.add_entity(xrange(start, end), ent["entity"])

        trainer.add(sample)
    return trainer.train()
示例#8
0
 def train(self):
     with open('data/training.json') as training_file:
         training = json.load(training_file)
     examples = list()
     for sample in training['samples']:
         examples.append(
             mitie.ner_training_instance(
                 [token.text for token in spacy_nlp(sample['text'])]))
         for entity in sample['entities']:
             examples[-1].add_entity(range(entity['start'], entity['stop']),
                                     entity['type'])
     try:
         trainer = mitie.ner_trainer(
             "models/total_word_feature_extractor.dat")
     except:
         trainer = mitie.ner_trainer(
             "botkit/models/total_word_feature_extractor.dat")
     trainer.num_threads = 2
     for example in examples:
         trainer.add(example)
     self.ner = trainer.train()
     if not os.path.exists('models'): os.mkdir('models')
     self.ner.save_to_disk("models/ner_model.dat")
示例#9
0
    def train(self, training_data, mitie_file, num_threads):
        # type: (TrainingData, str, Optional[int]) -> None
        from mitie import ner_training_instance, ner_trainer, tokenize

        trainer = ner_trainer(mitie_file)
        trainer.num_threads = num_threads
        found_one_entity = False
        for example in training_data.entity_examples:
            text = example["text"]
            tokens = tokenize(text)
            sample = ner_training_instance(tokens)
            for ent in example["entities"]:
                start, end = MitieEntityExtractor.find_entity(ent, text)
                sample.add_entity(list(range(start, end)), ent["entity"])
                found_one_entity = True

            trainer.add(sample)
        # Mitie will fail to train if there is not a single entity tagged
        if found_one_entity:
            self.ner = trainer.train()
示例#10
0
    def train(self, training_data, config, **kwargs):
        # type: (TrainingData, RasaNLUConfig) -> None
        import mitie

        trainer = mitie.ner_trainer(config["mitie_file"])
        trainer.num_threads = config["num_threads"]
        found_one_entity = False
        for example in training_data.entity_examples:
            text = example.text
            tokens = example.get("tokens")
            sample = mitie.ner_training_instance([t.text for t in tokens])
            for ent in example.get("entities", []):
                start, end = MitieEntityExtractor.find_entity(ent, text, tokens)
                sample.add_entity(list(range(start, end)), ent["entity"])
                found_one_entity = True

            trainer.add(sample)
        # Mitie will fail to train if there is not a single entity tagged
        if found_one_entity:
            self.ner = trainer.train()
示例#11
0
    def train(self, training_data: TrainingData, model: MitieModel) -> Resource:
        """Trains a MITIE named entity recognizer.

        Args:
            training_data: the training data
            model: a MitieModel
        Returns:
            resource for loading the trained model
        """
        import mitie

        trainer = mitie.ner_trainer(str(model.model_path))
        trainer.num_threads = self._config["num_threads"]

        # check whether there are any (not pre-trained) entities in the training data
        found_one_entity = False

        # filter out pre-trained entity examples
        filtered_entity_examples = self.filter_trainable_entities(
            training_data.nlu_examples
        )

        for example in filtered_entity_examples:
            sample = self._prepare_mitie_sample(example)

            found_one_entity = sample.num_entities > 0 or found_one_entity
            trainer.add(sample)

        # Mitie will fail to train if there is not a single entity tagged
        if found_one_entity:
            self._ner = trainer.train()
        else:
            rasa.shared.utils.io.raise_warning(
                f"{self.__class__.__name__} could not be trained because no trainable "
                f"entities where found in the given training data. Please add some "
                f"NLU training examples that include entities where the `extractor` "
                f"is either `None` or '{self.__class__.__name__}'."
            )

        self.persist()
        return self._resource
    def __init__(self, dat, **kwargs):

        self.trainer = mitie.ner_trainer(dat)