Exemplo n.º 1
0
    def _prepare_mitie_sample(training_example: Message) -> Any:
        """Prepare a message so that it can be passed to a MITIE trainer."""
        import mitie

        text = training_example.get(TEXT)
        tokens = training_example.get(TOKENS_NAMES[TEXT])
        sample = mitie.ner_training_instance([t.text for t in tokens])
        for ent in training_example.get(ENTITIES, []):
            try:
                # if the token is not aligned an exception will be raised
                start, end = MitieEntityExtractor.find_entity(
                    ent, text, tokens)
            except ValueError as e:
                rasa.shared.utils.io.raise_warning(
                    f"Failed to use example '{text}' to train MITIE "
                    f"entity extractor. Example will be skipped."
                    f"Error: {e}")
                continue
            try:
                # mitie will raise an exception on malicious
                # input - e.g. on overlapping entities
                sample.add_entity(list(range(start, end)), ent["entity"])
            except Exception as e:
                rasa.shared.utils.io.raise_warning(
                    f"Failed to add entity example "
                    f"'{str(e)}' of sentence '{str(text)}'. "
                    f"Example will be ignored. Reason: "
                    f"{e}")
                continue
        return sample
Exemplo n.º 2
0
    def _prepare_mitie_sample(self, training_example: Message) -> Any:
        import mitie

        text = training_example.text
        tokens = self._tokens_without_cls(training_example)
        sample = mitie.ner_training_instance([t.text for t in tokens])
        for ent in training_example.get(ENTITIES_ATTRIBUTE, []):
            try:
                # if the token is not aligned an exception will be raised
                start, end = MitieEntityExtractor.find_entity(
                    ent, text, tokens)
            except ValueError as e:
                raise_warning(f"Failed to use example '{text}' to train MITIE "
                              f"entity extractor. Example will be skipped."
                              f"Error: {e}")
                continue
            try:
                # mitie will raise an exception on malicious
                # input - e.g. on overlapping entities
                sample.add_entity(list(range(start, end)), ent["entity"])
            except Exception as e:
                raise_warning(f"Failed to add entity example "
                              f"'{str(e)}' of sentence '{str(text)}'. "
                              f"Example will be ignored. Reason: "
                              f"{e}")
                continue
        return sample
Exemplo n.º 3
0
 def get_sample(data):
     assert 'mapping' in data, "Token mapping missing from training data"
     assert "utterance" in data, "Utterance text missing from training data"
     try:
         utterance = get(data, "case_converted_utterance")
         logger.debug("Preparing utterance: %s" % utterance)
         mapping = json.loads(get(data, "mapping"))
         assert "tags" in mapping, "Tags missing from training data"
         tags = get(mapping, 'tags')
         tokens = utterance.split()
         sample = ner_training_instance(tokens)
         for tag in tags:
             start = get(tag, 'start')
             end = get(tag, 'end')
             label = get(tag, 'tag')
             label = label.encode('utf-8')
             # ignoreTag = (label.upper() in predefined_tags)
             ignoreTag = (label.upper() in predefined_tags) \
                         or (label in patterns) or (label in phrases)
             if not ignoreTag:
                 assert all(v is not None for v in [start, end, label]), \
                     "Missing information for adding entities to training"
                 logger.info("Adding entity: %s" % label)
                 logger.info("Start range: %s" % start)
                 logger.info("End range: %s" % end)
                 sample.add_entity(range(start, end), label.upper())
                 if not label.upper() in label_list:
                     label_list.append(label.upper())
                 logger.info("label_list %s" % (label_list))
         data['ner_trained'] = True
         return sample, data
     except (TypeError, Exception) as e:
         data['ner_trained'] = False
         return None, data
    def train(self, training_data, config, **kwargs):
        # type: (TrainingData, RasaNLUConfig) -> None
        import mitie

        trainer = mitie.ner_trainer(config["mitie_file"])
        trainer.num_threads = config["num_threads"]
        found_one_entity = False
        for example in training_data.training_examples:
            text = example.text
            tokens = example.get("tokens")
            sample = mitie.ner_training_instance([t.text for t in tokens])
            for ent in example.get("entities", []):
                try:
                    # if the token is not aligned an exception will be raised
                    start, end = MitieEntityExtractor.find_entity(
                        ent, text, tokens)
                except ValueError as e:
                    logger.warning("Example skipped: {}".format(str(e)))
                    continue
                try:
                    # mitie will raise an exception on malicious input - e.g. on overlapping entities
                    sample.add_entity(list(range(start, end)), ent["entity"])
                except Exception as e:
                    logger.warning(
                        "Failed to add entity example '{}' of sentence '{}'. Reason: {}"
                        .format(str(e), str(text), e))
                    continue
                found_one_entity = True

            trainer.add(sample)
        # Mitie will fail to train if there is not a single entity tagged
        if found_one_entity:
            self.ner = trainer.train()
Exemplo n.º 5
0
    def train(self, training_data, config, **kwargs):
        # type: (TrainingData, RasaNLUConfig) -> None
        import mitie

        trainer = mitie.ner_trainer(config["mitie_file"])
        trainer.num_threads = config["num_threads"]
        found_one_entity = False
        for example in training_data.entity_examples:
            text = example.text
            tokens = example.get("tokens")
            sample = mitie.ner_training_instance([t.text for t in tokens])
            for ent in example.get("entities", []):
                try:
                    start, end = MitieEntityExtractor.find_entity(
                        ent, text, tokens)
                except ValueError as e:
                    logger.warning("Example skipped: {}".format(str(e)))
                    continue
                sample.add_entity(list(range(start, end)), ent["entity"])
                found_one_entity = True

            trainer.add(sample)
        # Mitie will fail to train if there is not a single entity tagged
        if found_one_entity:
            self.ner = trainer.train()
Exemplo n.º 6
0
    def _prepare_mitie_sample(self, training_example) -> Any:
        import mitie

        text = training_example.text
        tokens = training_example.get(TOKENS_NAMES[TEXT_ATTRIBUTE])
        sample = mitie.ner_training_instance([t.text for t in tokens])
        for ent in training_example.get(ENTITIES_ATTRIBUTE, []):
            try:
                # if the token is not aligned an exception will be raised
                start, end = MitieEntityExtractor.find_entity(ent, text, tokens)
            except ValueError as e:
                warnings.warn(f"Example skipped: {e}")
                continue
            try:
                # mitie will raise an exception on malicious
                # input - e.g. on overlapping entities
                sample.add_entity(list(range(start, end)), ent["entity"])
            except Exception as e:
                warnings.warn(
                    "Failed to add entity example "
                    f"'{str(e)}' of sentence '{str(text)}'. Reason: "
                    f"{e}"
                )
                continue
        return sample
Exemplo n.º 7
0
    def _prepare_mitie_sample(self, training_example):
        import mitie

        text = training_example.text
        tokens = training_example.get("tokens")
        sample = mitie.ner_training_instance([t.text for t in tokens])
        for ent in training_example.get("entities", []):
            try:
                # if the token is not aligned an exception will be raised
                start, end = MitieEntityExtractor.find_entity(ent, text, tokens)
            except ValueError as e:
                logger.warning("Example skipped: {}".format(str(e)))
                continue
            try:
                # mitie will raise an exception on malicious
                # input - e.g. on overlapping entities
                sample.add_entity(list(range(start, end)), ent["entity"])
            except Exception as e:
                logger.warning(
                    "Failed to add entity example "
                    "'{}' of sentence '{}'. Reason: "
                    "{}".format(str(e), str(text), e)
                )
                continue
        return sample
    def train(self, training_data, config, **kwargs):
        # type: (TrainingData, RasaNLUConfig) -> None
        import mitie

        trainer = mitie.ner_trainer(config["mitie_file"])
        trainer.num_threads = config["num_threads"]
        found_one_entity = False
        for example in training_data.entity_examples:
            text = example.text
            tokens = example.get("tokens")
            sample = mitie.ner_training_instance([t.text for t in tokens])
            for ent in example.get("entities", []):
                try:
                    # if the token is not aligned an exception will be raised
                    start, end = MitieEntityExtractor.find_entity(ent, text, tokens)
                except ValueError as e:
                    logger.warning("Example skipped: {}".format(str(e)))
                    continue
                try:
                    # mitie will raise an exception on malicious input - e.g. on overlapping entities
                    sample.add_entity(list(range(start, end)), ent["entity"])
                except Exception as e:
                    logger.warning("Failed to add entity example '{}' of sentence '{}'. Reason: {}".format(
                            str(e), str(text), e))
                    continue
                found_one_entity = True

            trainer.add(sample)
        # Mitie will fail to train if there is not a single entity tagged
        if found_one_entity:
            self.ner = trainer.train()
    def add_phrase(self, phrase):

        logging.info("add phrase %s" % phrase)

        tokens = list(phrase.tokens())
        sample = mitie.ner_training_instance(tokens)

        for idx, tag in phrase.entities():

            logging.info("%s at position %s" % (tag, idx))
            sample.add_entity(idx, tag)

        self.trainer.add(sample)
Exemplo n.º 10
0
def train_entity_extractor(entity_examples, fe_file, max_num_threads):
    trainer = ner_trainer(fe_file)
    trainer.num_threads = max_num_threads
    for example in entity_examples:
        text = example["text"]
        tokens = tokenize(text)
        sample = ner_training_instance(tokens)
        for ent in example["entities"]:
            start, end = find_entity(ent, text)
            sample.add_entity(xrange(start, end), ent["entity"])

        trainer.add(sample)
    return trainer.train()
Exemplo n.º 11
0
    def train(self, training_data, mitie_file, num_threads):
        # type: (TrainingData, str, Optional[int]) -> None
        from mitie import ner_training_instance, ner_trainer, tokenize

        trainer = ner_trainer(mitie_file)
        trainer.num_threads = num_threads
        found_one_entity = False
        for example in training_data.entity_examples:
            text = example["text"]
            tokens = tokenize(text)
            sample = ner_training_instance(tokens)
            for ent in example["entities"]:
                start, end = MitieEntityExtractor.find_entity(ent, text)
                sample.add_entity(list(range(start, end)), ent["entity"])
                found_one_entity = True

            trainer.add(sample)
        # Mitie will fail to train if there is not a single entity tagged
        if found_one_entity:
            self.ner = trainer.train()
Exemplo n.º 12
0
    def train(self, training_data, config, **kwargs):
        # type: (TrainingData, RasaNLUConfig) -> None
        import mitie

        trainer = mitie.ner_trainer(config["mitie_file"])
        trainer.num_threads = config["num_threads"]
        found_one_entity = False
        for example in training_data.entity_examples:
            text = example.text
            tokens = example.get("tokens")
            sample = mitie.ner_training_instance([t.text for t in tokens])
            for ent in example.get("entities", []):
                start, end = MitieEntityExtractor.find_entity(ent, text, tokens)
                sample.add_entity(list(range(start, end)), ent["entity"])
                found_one_entity = True

            trainer.add(sample)
        # Mitie will fail to train if there is not a single entity tagged
        if found_one_entity:
            self.ner = trainer.train()
Exemplo n.º 13
0
 def train(self):
     with open('data/training.json') as training_file:
         training = json.load(training_file)
     examples = list()
     for sample in training['samples']:
         examples.append(
             mitie.ner_training_instance(
                 [token.text for token in spacy_nlp(sample['text'])]))
         for entity in sample['entities']:
             examples[-1].add_entity(range(entity['start'], entity['stop']),
                                     entity['type'])
     try:
         trainer = mitie.ner_trainer(
             "models/total_word_feature_extractor.dat")
     except:
         trainer = mitie.ner_trainer(
             "botkit/models/total_word_feature_extractor.dat")
     trainer.num_threads = 2
     for example in examples:
         trainer.add(example)
     self.ner = trainer.train()
     if not os.path.exists('models'): os.mkdir('models')
     self.ner.save_to_disk("models/ner_model.dat")
Exemplo n.º 14
0
    def _prepare_mitie_sample(self, training_example):
        import mitie

        text = training_example.text
        tokens = training_example.get("tokens")
        sample = mitie.ner_training_instance([t.text for t in tokens])
        for ent in training_example.get("entities", []):
            try:
                # if the token is not aligned an exception will be raised
                start, end = MitieEntityExtractor.find_entity(
                    ent, text, tokens)
            except ValueError as e:
                logger.warning("Example skipped: {}".format(str(e)))
                continue
            try:
                # mitie will raise an exception on malicious
                # input - e.g. on overlapping entities
                sample.add_entity(list(range(start, end)), ent["entity"])
            except Exception as e:
                logger.warning("Failed to add entity example "
                               "'{}' of sentence '{}'. Reason: "
                               "{}".format(str(e), str(text), e))
                continue
        return sample