def fit(self, dataset, intent): """Fit the slot filler Args: dataset (dict): A valid Snips dataset intent (str): The specific intent of the dataset to train the slot filler on Returns: :class:`CRFSlotFiller`: The same instance, trained """ logger.debug("Fitting %s slot filler...", intent) dataset = validate_and_format_dataset(dataset) self.fit_builtin_entity_parser_if_needed(dataset) self.fit_custom_entity_parser_if_needed(dataset) self.language = dataset[LANGUAGE] self.intent = intent self.slot_name_mapping = get_slot_name_mapping(dataset, intent) if not self.slot_name_mapping: # No need to train the CRF if the intent has no slots return self random_state = check_random_state(self.config.random_seed) augmented_intent_utterances = augment_utterances( dataset, self.intent, language=self.language, random_state=random_state, **self.config.data_augmentation_config.to_dict()) crf_samples = [ utterance_to_sample(u[DATA], self.config.tagging_scheme, self.language) for u in augmented_intent_utterances ] for factory in self.features_factories: factory.fit(dataset, intent) # Ensure that X, Y are safe and that the OUTSIDE label is learnt to # avoid segfault at inference time # pylint: disable=C0103 X = [ self.compute_features(sample[TOKENS], drop_out=True) for sample in crf_samples ] Y = [[tag for tag in sample[TAGS]] for sample in crf_samples] X, Y = _ensure_safe(X, Y) # ensure ascii tags Y = [[_encode_tag(tag) for tag in y] for y in Y] # pylint: enable=C0103 self.crf_model = _get_crf_model(self.config.crf_args) self.crf_model.fit(X, Y) logger.debug("Most relevant features for %s:\n%s", self.intent, DifferedLoggingMessage(self.log_weights)) return self
def fit(self, dataset, intent, verbose=False): """Fit the slot filler Args: dataset (dict): A valid Snips dataset intent (str): The specific intent of the dataset to train the slot filler on verbose (bool, optional): If *True*, it will print the weights of the CRF once the training is done Returns: :class:`CRFSlotFiller`: The same instance, trained """ dataset = validate_and_format_dataset(dataset) self.intent = intent self.slot_name_mapping = get_slot_name_mapping(dataset, intent) self.language = dataset[LANGUAGE] random_state = check_random_state(self.config.random_seed) augmented_intent_utterances = augment_utterances( dataset, self.intent, language=self.language, random_state=random_state, **self.config.data_augmentation_config.to_dict()) crf_samples = [ utterance_to_sample(u[DATA], self.config.tagging_scheme, self.language) for u in augmented_intent_utterances ] for factory in self.features_factories: factory.fit(dataset, intent) # pylint: disable=C0103 X = [ self.compute_features(sample[TOKENS], drop_out=True) for sample in crf_samples ] # ensure ascii tags Y = [[_encode_tag(tag) for tag in sample[TAGS]] for sample in crf_samples] # pylint: enable=C0103 self.crf_model = _get_crf_model(self.config.crf_args) self.crf_model.fit(X, Y) if verbose: self.print_weights() return self
def fit(self, dataset, intent): """Fit the slot filler Args: dataset (dict): A valid Snips dataset intent (str): The specific intent of the dataset to train the slot filler on Returns: :class:`CRFSlotFiller`: The same instance, trained """ logger.debug("Fitting %s slot filler...", intent) dataset = validate_and_format_dataset(dataset) self.intent = intent self.slot_name_mapping = get_slot_name_mapping(dataset, intent) self.language = dataset[LANGUAGE] random_state = check_random_state(self.config.random_seed) augmented_intent_utterances = augment_utterances( dataset, self.intent, language=self.language, random_state=random_state, **self.config.data_augmentation_config.to_dict()) crf_samples = [ utterance_to_sample(u[DATA], self.config.tagging_scheme, self.language) for u in augmented_intent_utterances ] for factory in self.features_factories: factory.fit(dataset, intent) # pylint: disable=C0103 X = [ self.compute_features(sample[TOKENS], drop_out=True) for sample in crf_samples ] # ensure ascii tags Y = [[_encode_tag(tag) for tag in sample[TAGS]] for sample in crf_samples] # pylint: enable=C0103 self.crf_model = _get_crf_model(self.config.crf_args) self.crf_model.fit(X, Y) logger.debug("Most relevant features for %s:\n%s", self.intent, DifferedLoggingMessage(self.log_weights)) return self
def fit(self, dataset, intent, verbose=False): """Fit the slot filler Args: dataset (dict): A valid Snips dataset intent (str): The specific intent of the dataset to train the slot filler on verbose (bool, optional): If *True*, it will print the weights of the CRF once the training is done Returns: :class:`CRFSlotFiller`: The same instance, trained """ dataset = validate_and_format_dataset(dataset) self.intent = intent self.slot_name_mapping = get_slot_name_mapping(dataset, intent) self.language = dataset[LANGUAGE] random_state = check_random_state(self.config.random_seed) augmented_intent_utterances = augment_utterances( dataset, self.intent, language=self.language, random_state=random_state, **self.config.data_augmentation_config.to_dict()) crf_samples = [ utterance_to_sample(u[DATA], self.config.tagging_scheme, self.language) for u in augmented_intent_utterances] for factory in self.features_factories: factory.fit(dataset, intent) # pylint: disable=C0103 X = [self.compute_features(sample[TOKENS], drop_out=True) for sample in crf_samples] # ensure ascii tags Y = [[_encode_tag(tag) for tag in sample[TAGS]] for sample in crf_samples] # pylint: enable=C0103 self.crf_model = _get_crf_model(self.config.crf_args) self.crf_model.fit(X, Y) if verbose: self.print_weights() return self