def train( self, training_data: TrainingData, config: Optional[RasaNLUModelConfig] = None, **kwargs: Any, ) -> None: import mitie model_file = kwargs.get("mitie_file") if not model_file: raise Exception("Can not run MITIE entity extractor without a " "language model. Make sure this component is " "preceeded by the 'MitieNLP' component.") trainer = mitie.ner_trainer(model_file) trainer.num_threads = kwargs.get("num_threads", 1) found_one_entity = False # filter out pre-trained entity examples filtered_entity_examples = self.filter_trainable_entities( training_data.nlu_examples) for example in filtered_entity_examples: sample = self._prepare_mitie_sample(example) found_one_entity = sample.num_entities > 0 or found_one_entity trainer.add(sample) # Mitie will fail to train if there is not a single entity tagged if found_one_entity: self.ner = trainer.train()
def train(self, training_data: TrainingData, config: RasaNLUModelConfig, **kwargs: Any) -> None: import mitie model_file = kwargs.get("mitie_file") if not model_file: raise Exception("Can not run MITIE entity extractor without a " "language model. Make sure this component is " "preceeded by the 'MitieNLP' component.") trainer = mitie.ner_trainer(model_file) trainer.num_threads = kwargs.get("num_threads", 1) found_one_entity = False # filter out pre-trained entity examples filtered_entity_examples = self.filter_trainable_entities( training_data.training_examples) for example in filtered_entity_examples: sample = self._prepare_mitie_sample(example) found_one_entity = sample.num_entities > 0 or found_one_entity trainer.add(sample) # Mitie will fail to train if there is not a single entity tagged if found_one_entity: self.ner = trainer.train()
def train(self, training_data, config, **kwargs): # type: (TrainingData, RasaNLUConfig) -> None import mitie trainer = mitie.ner_trainer(config["mitie_file"]) trainer.num_threads = config["num_threads"] found_one_entity = False for example in training_data.training_examples: text = example.text tokens = example.get("tokens") sample = mitie.ner_training_instance([t.text for t in tokens]) for ent in example.get("entities", []): try: # if the token is not aligned an exception will be raised start, end = MitieEntityExtractor.find_entity( ent, text, tokens) except ValueError as e: logger.warning("Example skipped: {}".format(str(e))) continue try: # mitie will raise an exception on malicious input - e.g. on overlapping entities sample.add_entity(list(range(start, end)), ent["entity"]) except Exception as e: logger.warning( "Failed to add entity example '{}' of sentence '{}'. Reason: {}" .format(str(e), str(text), e)) continue found_one_entity = True trainer.add(sample) # Mitie will fail to train if there is not a single entity tagged if found_one_entity: self.ner = trainer.train()
def train(self, training_data, config, **kwargs): # type: (TrainingData, RasaNLUConfig) -> None import mitie trainer = mitie.ner_trainer(config["mitie_file"]) trainer.num_threads = config["num_threads"] found_one_entity = False for example in training_data.entity_examples: text = example.text tokens = example.get("tokens") sample = mitie.ner_training_instance([t.text for t in tokens]) for ent in example.get("entities", []): try: start, end = MitieEntityExtractor.find_entity( ent, text, tokens) except ValueError as e: logger.warning("Example skipped: {}".format(str(e))) continue sample.add_entity(list(range(start, end)), ent["entity"]) found_one_entity = True trainer.add(sample) # Mitie will fail to train if there is not a single entity tagged if found_one_entity: self.ner = trainer.train()
def train(self, training_data, config, **kwargs): # type: (TrainingData, RasaNLUConfig) -> None import mitie trainer = mitie.ner_trainer(config["mitie_file"]) trainer.num_threads = config["num_threads"] found_one_entity = False for example in training_data.entity_examples: text = example.text tokens = example.get("tokens") sample = mitie.ner_training_instance([t.text for t in tokens]) for ent in example.get("entities", []): try: # if the token is not aligned an exception will be raised start, end = MitieEntityExtractor.find_entity(ent, text, tokens) except ValueError as e: logger.warning("Example skipped: {}".format(str(e))) continue try: # mitie will raise an exception on malicious input - e.g. on overlapping entities sample.add_entity(list(range(start, end)), ent["entity"]) except Exception as e: logger.warning("Failed to add entity example '{}' of sentence '{}'. Reason: {}".format( str(e), str(text), e)) continue found_one_entity = True trainer.add(sample) # Mitie will fail to train if there is not a single entity tagged if found_one_entity: self.ner = trainer.train()
def train(self, corpus): utterances = get(corpus, "utterances") assert utterances is not None, "No training data available" samples, trained_utterances = self.prepare(utterances) trainer = ner_trainer(self.__get_model_path()) trainer.num_threads = 4 trainer.num_c = int(app_config['C']) for sample in samples: trainer.add(sample) self.model = trainer.train() return trained_utterances
def train_entity_extractor(entity_examples, fe_file, max_num_threads): trainer = ner_trainer(fe_file) trainer.num_threads = max_num_threads for example in entity_examples: text = example["text"] tokens = tokenize(text) sample = ner_training_instance(tokens) for ent in example["entities"]: start, end = find_entity(ent, text) sample.add_entity(xrange(start, end), ent["entity"]) trainer.add(sample) return trainer.train()
def train(self): with open('data/training.json') as training_file: training = json.load(training_file) examples = list() for sample in training['samples']: examples.append( mitie.ner_training_instance( [token.text for token in spacy_nlp(sample['text'])])) for entity in sample['entities']: examples[-1].add_entity(range(entity['start'], entity['stop']), entity['type']) try: trainer = mitie.ner_trainer( "models/total_word_feature_extractor.dat") except: trainer = mitie.ner_trainer( "botkit/models/total_word_feature_extractor.dat") trainer.num_threads = 2 for example in examples: trainer.add(example) self.ner = trainer.train() if not os.path.exists('models'): os.mkdir('models') self.ner.save_to_disk("models/ner_model.dat")
def train(self, training_data, mitie_file, num_threads): # type: (TrainingData, str, Optional[int]) -> None from mitie import ner_training_instance, ner_trainer, tokenize trainer = ner_trainer(mitie_file) trainer.num_threads = num_threads found_one_entity = False for example in training_data.entity_examples: text = example["text"] tokens = tokenize(text) sample = ner_training_instance(tokens) for ent in example["entities"]: start, end = MitieEntityExtractor.find_entity(ent, text) sample.add_entity(list(range(start, end)), ent["entity"]) found_one_entity = True trainer.add(sample) # Mitie will fail to train if there is not a single entity tagged if found_one_entity: self.ner = trainer.train()
def train(self, training_data, config, **kwargs): # type: (TrainingData, RasaNLUConfig) -> None import mitie trainer = mitie.ner_trainer(config["mitie_file"]) trainer.num_threads = config["num_threads"] found_one_entity = False for example in training_data.entity_examples: text = example.text tokens = example.get("tokens") sample = mitie.ner_training_instance([t.text for t in tokens]) for ent in example.get("entities", []): start, end = MitieEntityExtractor.find_entity(ent, text, tokens) sample.add_entity(list(range(start, end)), ent["entity"]) found_one_entity = True trainer.add(sample) # Mitie will fail to train if there is not a single entity tagged if found_one_entity: self.ner = trainer.train()
def train(self, training_data: TrainingData, model: MitieModel) -> Resource: """Trains a MITIE named entity recognizer. Args: training_data: the training data model: a MitieModel Returns: resource for loading the trained model """ import mitie trainer = mitie.ner_trainer(str(model.model_path)) trainer.num_threads = self._config["num_threads"] # check whether there are any (not pre-trained) entities in the training data found_one_entity = False # filter out pre-trained entity examples filtered_entity_examples = self.filter_trainable_entities( training_data.nlu_examples ) for example in filtered_entity_examples: sample = self._prepare_mitie_sample(example) found_one_entity = sample.num_entities > 0 or found_one_entity trainer.add(sample) # Mitie will fail to train if there is not a single entity tagged if found_one_entity: self._ner = trainer.train() else: rasa.shared.utils.io.raise_warning( f"{self.__class__.__name__} could not be trained because no trainable " f"entities where found in the given training data. Please add some " f"NLU training examples that include entities where the `extractor` " f"is either `None` or '{self.__class__.__name__}'." ) self.persist() return self._resource
def __init__(self, dat, **kwargs): self.trainer = mitie.ner_trainer(dat)