def test_differed_logging_message(self): # Given def fn(a, b, c): return a + b + c mocked_fn = MagicMock() mocked_fn.side_effect = fn class Greeter(object): def greet(self): return "Yo!" levels = [logging.DEBUG, logging.INFO, logging.WARNING] logger = logging.Logger("my_dummy_logger", logging.INFO) logger.addHandler(logging.StreamHandler()) a_, b_, c_ = 1, 2, 3 with self.fail_if_exception("Failed to log"): # When/Then my_obj = Greeter() logger.log(logging.INFO, "Greeting: %s", DifferedLoggingMessage(my_obj.greet)) for l in levels: logger.log(l, "Level: %s -> %s", str(l), DifferedLoggingMessage(mocked_fn, a_, b_, c=c_)) self.assertEqual(2, mocked_fn.call_count)
def _get_intents(self, text, intents_filter): if isinstance(intents_filter, str): intents_filter = {intents_filter} elif isinstance(intents_filter, list): intents_filter = set(intents_filter) if not text or not self.intent_list or not self.featurizer: results = [intent_classification_result(None, 1.0)] results += [intent_classification_result(i, 0.0) for i in self.intent_list if i is not None] return results if len(self.intent_list) == 1: return [intent_classification_result(self.intent_list[0], 1.0)] # pylint: disable=C0103 X = self.featurizer.transform([text_to_utterance(text)]) # pylint: enable=C0103 proba_vec = self._predict_proba(X) logger.debug( "%s", DifferedLoggingMessage(self.log_activation_weights, text, X)) results = [ intent_classification_result(i, proba) for i, proba in zip(self.intent_list, proba_vec[0]) if intents_filter is None or i is None or i in intents_filter] return sorted(results, key=lambda res: -res[RES_PROBA])
def get_slots(self, text): """Extracts slots from the provided text Returns: list of dict: The list of extracted slots Raises: NotTrained: When the slot filler is not fitted """ if not self.slot_name_mapping: # Early return if the intent has no slots return [] tokens = tokenize(text, self.language) if not tokens: return [] features = self.compute_features(tokens) tags = self.crf_model.predict_single(features) logger.debug( DifferedLoggingMessage(self.log_inference_weights, text, tokens=tokens, features=features, tags=tags)) decoded_tags = [_decode_tag(t) for t in tags] return tags_to_slots(text, tokens, decoded_tags, self.config.tagging_scheme, self.slot_name_mapping)
def fit(self, dataset): """Fits the intent classifier with a valid Snips dataset Returns: :class:`LogRegIntentClassifier`: The same instance, trained """ from sklearn.linear_model import SGDClassifier from sklearn.utils import compute_class_weight logger.info("Fitting LogRegIntentClassifier...") dataset = validate_and_format_dataset(dataset) self.load_resources_if_needed(dataset[LANGUAGE]) self.fit_builtin_entity_parser_if_needed(dataset) self.fit_custom_entity_parser_if_needed(dataset) language = dataset[LANGUAGE] data_augmentation_config = self.config.data_augmentation_config utterances, classes, intent_list = build_training_data( dataset, language, data_augmentation_config, self.resources, self.random_state) self.intent_list = intent_list if len(self.intent_list) <= 1: return self self.featurizer = Featurizer( config=self.config.featurizer_config, builtin_entity_parser=self.builtin_entity_parser, custom_entity_parser=self.custom_entity_parser, resources=self.resources, random_state=self.random_state, ) self.featurizer.language = language none_class = max(classes) try: x = self.featurizer.fit_transform(dataset, utterances, classes, none_class) except _EmptyDatasetUtterancesError: logger.warning("No (non-empty) utterances found in dataset") self.featurizer = None return self alpha = get_regularization_factor(dataset) class_weights_arr = compute_class_weight("balanced", range(none_class + 1), classes) # Re-weight the noise class class_weights_arr[-1] *= self.config.noise_reweight_factor class_weight = {idx: w for idx, w in enumerate(class_weights_arr)} self.classifier = SGDClassifier(random_state=self.random_state, alpha=alpha, class_weight=class_weight, **LOG_REG_ARGS) self.classifier.fit(x, classes) logger.debug("%s", DifferedLoggingMessage(self.log_best_features)) return self
def fit(self, dataset): """Fits the intent classifier with a valid Snips dataset Returns: :class:`LogRegIntentClassifier`: The same instance, trained """ logger.info("Fitting LogRegIntentClassifier...") dataset = validate_and_format_dataset(dataset) self.load_resources_if_needed(dataset[LANGUAGE]) self.fit_builtin_entity_parser_if_needed(dataset) self.fit_custom_entity_parser_if_needed(dataset) language = dataset[LANGUAGE] data_augmentation_config = self.config.data_augmentation_config utterances, classes, intent_list = build_training_data( dataset, language, data_augmentation_config, self.resources, self.random_state) self.intent_list = intent_list if len(self.intent_list) <= 1: return self self.featurizer = Featurizer( config=self.config.featurizer_config, builtin_entity_parser=self.builtin_entity_parser, custom_entity_parser=self.custom_entity_parser, resources=self.resources, random_state=self.random_state, ) self.featurizer.language = language none_class = max(classes) try: x = self.featurizer.fit_transform(dataset, utterances, classes, none_class) except _EmptyDatasetUtterancesError: self.featurizer = None return self alpha = get_regularization_factor(dataset) self.classifier = SGDClassifier(random_state=self.random_state, alpha=alpha, **LOG_REG_ARGS) self.classifier.fit(x, classes) logger.debug("%s", DifferedLoggingMessage(self.log_best_features)) return self
def fit(self, dataset, intent): """Fits the slot filler Args: dataset (dict): A valid Snips dataset intent (str): The specific intent of the dataset to train the slot filler on Returns: :class:`CRFSlotFiller`: The same instance, trained """ logger.debug("Fitting %s slot filler...", intent) dataset = validate_and_format_dataset(dataset) self.load_resources_if_needed(dataset[LANGUAGE]) self.fit_builtin_entity_parser_if_needed(dataset) self.fit_custom_entity_parser_if_needed(dataset) for factory in self.features_factories: factory.custom_entity_parser = self.custom_entity_parser factory.builtin_entity_parser = self.builtin_entity_parser factory.resources = self.resources self.language = dataset[LANGUAGE] self.intent = intent self.slot_name_mapping = get_slot_name_mapping(dataset, intent) if not self.slot_name_mapping: # No need to train the CRF if the intent has no slots return self random_state = check_random_state(self.config.random_seed) augmented_intent_utterances = augment_utterances( dataset, self.intent, language=self.language, resources=self.resources, random_state=random_state, **self.config.data_augmentation_config.to_dict()) crf_samples = [ utterance_to_sample(u[DATA], self.config.tagging_scheme, self.language) for u in augmented_intent_utterances ] for factory in self.features_factories: factory.fit(dataset, intent) # Ensure that X, Y are safe and that the OUTSIDE label is learnt to # avoid segfault at inference time # pylint: disable=C0103 X = [ self.compute_features(sample[TOKENS], drop_out=True) for sample in crf_samples ] Y = [[tag for tag in sample[TAGS]] for sample in crf_samples] X, Y = _ensure_safe(X, Y) # ensure ascii tags Y = [[_encode_tag(tag) for tag in y] for y in Y] # pylint: enable=C0103 self.crf_model = _get_crf_model(self.config.crf_args) self.crf_model.fit(X, Y) logger.debug("Most relevant features for %s:\n%s", self.intent, DifferedLoggingMessage(self.log_weights)) return self