def test_mitie(): from rasa_nlu.tokenizers.mitie_tokenizer import MITIETokenizer tk = MITIETokenizer() tk.tokenize(u"Hi. My name is rasa") == [ u'Hi', u'My', u'name', u'is', u'rasa' ] tk.tokenize(u"ὦ ἄνδρες ᾿Αθηναῖοι.") == [u'ὦ', u'ἄνδρες', u'᾿Αθηναῖοι']
def test_mitie(): from rasa_nlu.tokenizers.mitie_tokenizer import MITIETokenizer tk = MITIETokenizer() tk.tokenize(u"Hi. My name is rasa") == [ u'Hi', u'My', u'name', u'is', u'rasa' ] tk.tokenize(u"ὦ ἄνδρες ᾿Αθηναῖοι.") == [u'ὦ', u'ἄνδρες', u'᾿Αθηναῖοι'] tk.tokenize_with_offsets(u"Forecast for lunch") == ([ u'Forecast', u'for', u'lunch' ], [0, 9, 13])
class MITIESklearnInterpreter(Interpreter): def __init__(self, metadata): self.extractor = named_entity_extractor( metadata["entity_extractor"]) # ,metadata["feature_extractor"]) self.classifier = text_categorizer( metadata["intent_classifier"]) # ,metadata["feature_extractor"]) self.tokenizer = MITIETokenizer() def get_entities(self, tokens): d = {} entities = self.extractor.extract_entities(tokens) for e in entities: _range = e[0] d[e[1]] = " ".join(tokens[i] for i in _range) return d def get_intent(self, tokens): label, _ = self.classifier(tokens) # don't use the score return label def parse(self, text): tokens = self.tokenizer.tokenize(text) intent = self.get_intent(tokens) entities = self.get_entities(tokens) return {'intent': intent, 'entities': entities}
def test_mitie(): from rasa_nlu.tokenizers.mitie_tokenizer import MITIETokenizer tk = MITIETokenizer() assert tk.tokenize(u"Hi. My name is rasa") == [ u'Hi', u'My', u'name', u'is', u'rasa' ] assert tk.tokenize(u"ὦ ἄνδρες ᾿Αθηναῖοι") == [ u'ὦ', u'ἄνδρες', u'᾿Αθηναῖοι' ] assert tk.tokenize_with_offsets(u"Forecast for lunch") == ([ u'Forecast', u'for', u'lunch' ], [0, 9, 13]) assert tk.tokenize_with_offsets(u"hey ńöñàśçií how're you?") == ([ u'hey', u'ńöñàśçií', u'how', u'\'re', 'you', '?' ], [0, 4, 13, 16, 20, 23])
class MITIEInterpreter(Interpreter): def __init__(self, intent_classifier=None, entity_extractor=None, feature_extractor=None, entity_synonyms=None, **kwargs): self.extractor = None self.classifier = None if entity_extractor: self.extractor = named_entity_extractor(entity_extractor, feature_extractor) if intent_classifier: self.classifier = text_categorizer(intent_classifier, feature_extractor) self.tokenizer = MITIETokenizer() self.ent_synonyms = None if entity_synonyms: Interpreter.load_synonyms(entity_synonyms) def get_intent(self, tokens): if self.classifier: label, score = self.classifier(tokens) else: label, score = "None", 0.0 return label, score def parse(self, text): tokens = self.tokenizer.tokenize(text) intent, score = self.get_intent(tokens) entities = get_entities(text, tokens, self.extractor) if self.ent_synonyms: Interpreter.replace_synonyms(entities, self.ent_synonyms) return {'text': text, 'intent': intent, 'entities': entities, 'confidence': score}
class MITIEInterpreter(Interpreter): def __init__(self, intent_classifier=None, entity_extractor=None, feature_extractor=None, **kwargs): self.extractor = named_entity_extractor(entity_extractor, feature_extractor) self.classifier = text_categorizer(intent_classifier, feature_extractor) self.tokenizer = MITIETokenizer() def get_entities(self, text): tokens = self.tokenizer.tokenize(text) ents = [] entities = self.extractor.extract_entities(tokens) for e in entities: _range = e[0] _regex = u"\s*".join(tokens[i] for i in _range) expr = re.compile(_regex) m = expr.search(text) start, end = m.start(), m.end() ents.append({ "entity": e[1], "value": text[start:end], "start": start, "end": end }) return ents def get_intent(self, text): tokens = tokenize(text) label, _ = self.classifier(tokens) # don't use the score return label def parse(self, text): intent = self.get_intent(text) entities = self.get_entities(text) return {'text': text, 'intent': intent, 'entities': entities}
class MITIEInterpreter(Interpreter): @staticmethod def load(meta): """ :type meta: ModelMetadata :rtype: MITIEInterpreter """ if meta.entity_extractor_path: if meta.feature_extractor_path is None or not os.path.isfile(meta.feature_extractor_path): raise Exception("Invalid feature extractor path for MITIE model. Meta data: " + meta) extractor = named_entity_extractor( meta.entity_extractor_path, meta.feature_extractor_path) else: extractor = None if meta.intent_classifier_path: classifier = text_categorizer( meta.intent_classifier_path, meta.feature_extractor_path) else: classifier = None if meta.entity_synonyms_path: entity_synonyms = Interpreter.load_synonyms(meta.entity_synonyms_path) else: entity_synonyms = None return MITIEInterpreter( classifier, extractor, entity_synonyms) def __init__(self, intent_classifier=None, entity_extractor=None, entity_synonyms=None): self.extractor = entity_extractor self.classifier = intent_classifier self.ent_synonyms = entity_synonyms self.tokenizer = MITIETokenizer() def get_intent(self, tokens): if self.classifier: label, score = self.classifier(tokens) else: label, score = "None", 0.0 return label, score def parse(self, text): tokens = self.tokenizer.tokenize(text) intent, score = self.get_intent(tokens) entities = get_entities(text, tokens, self.extractor) if self.ent_synonyms: Interpreter.replace_synonyms(entities, self.ent_synonyms) return {'text': text, 'intent': intent, 'entities': entities, 'confidence': score}
class MITIESklearnInterpreter(Interpreter): def __init__(self, intent_classifier=None, entity_extractor=None, feature_extractor=None, entity_synonyms=None, **kwargs): self.extractor = None self.classifier = None if entity_extractor: self.extractor = named_entity_extractor(entity_extractor, feature_extractor) if intent_classifier: with open(intent_classifier, 'rb') as f: self.classifier = cloudpickle.load(f) self.featurizer = MITIEFeaturizer(feature_extractor) self.tokenizer = MITIETokenizer() self.ent_synonyms = None if entity_synonyms: self.ent_synonyms = Interpreter.load_synonyms(entity_synonyms) def get_intent(self, sentence_tokens): """Returns the most likely intent and its probability for the input text. :param sentence_tokens: text to classify :return: tuple of most likely intent name and its probability""" if self.classifier: X = self.featurizer.features_for_tokens(sentence_tokens).reshape( 1, -1) intent_ids, probabilities = self.classifier.predict(X) intents = self.classifier.transform_labels_num2str(intent_ids) intent, score = intents[0], probabilities[0] else: intent, score = "None", 0.0 return intent, score def parse(self, text): tokens = self.tokenizer.tokenize(text) intent, probability = self.get_intent(tokens) entities = get_entities(text, tokens, self.extractor) if self.ent_synonyms: Interpreter.replace_synonyms(entities, self.ent_synonyms) return { 'text': text, 'intent': intent, 'entities': entities, 'confidence': probability }
class MITIESklearnInterpreter(Interpreter): def __init__(self, intent_classifier_file=None, entity_extractor_file=None, feature_extractor_file=None, **kwargs): if entity_extractor_file: self.extractor = named_entity_extractor(entity_extractor_file) # ,metadata["feature_extractor"]) with open(intent_classifier_file, 'rb') as f: self.classifier = cloudpickle.load(f) self.featurizer = MITIEFeaturizer(feature_extractor_file) self.tokenizer = MITIETokenizer() def get_entities(self, tokens): d = {} entities = self.extractor.extract_entities(tokens) for e in entities: _range = e[0] d[e[1]] = " ".join(tokens[i] for i in _range) return d def get_intent(self, text): """Returns the most likely intent and its probability for the input text. :param text: text to classify :return: tuple of most likely intent name and its probability""" if self.classifier: X = self.featurizer.create_bow_vecs([text]) intent_ids, probabilities = self.classifier.predict(X) intents = self.classifier.transform_labels_num2str(intent_ids) intent, score = intents[0], probabilities[0] else: intent, score = "None", 0.0 return intent, score def parse(self, text): tokens = self.tokenizer.tokenize(text) intent, probability = self.get_intent(tokens) entities = self.get_entities(tokens) return {'text': text, 'intent': intent, 'entities': entities, 'confidence': probability}
class MITIEInterpreter(Interpreter): def __init__(self, intent_classifier=None, entity_extractor=None, feature_extractor=None, entity_synonyms=None, **kwargs): self.extractor = None self.classifier = None if entity_extractor: self.extractor = named_entity_extractor(entity_extractor, feature_extractor) if intent_classifier: self.classifier = text_categorizer(intent_classifier, feature_extractor) self.tokenizer = MITIETokenizer() self.ent_synonyms = None if entity_synonyms: Interpreter.load_synonyms(entity_synonyms) def get_entities(self, text): tokens = self.tokenizer.tokenize(text) ents = [] if self.extractor: entities = self.extractor.extract_entities(tokens) for e in entities: _range = e[0] _regex = u"\s*".join(re.escape(tokens[i]) for i in _range) expr = re.compile(_regex) m = expr.search(text) start, end = m.start(), m.end() entity_value = text[start:end] ents.append({ "entity": e[1], "value": entity_value, "start": start, "end": end }) return ents def get_intent(self, text): if self.classifier: tokens = tokenize(text) label, score = self.classifier(tokens) else: label, score = "None", 0.0 return label, score def parse(self, text): intent, score = self.get_intent(text) entities = self.get_entities(text) if self.ent_synonyms: Interpreter.replace_synonyms(entities, self.ent_synonyms) return { 'text': text, 'intent': intent, 'entities': entities, 'confidence': score }
class MITIESklearnInterpreter(Interpreter): @staticmethod def load(meta, featurizer=None): """ :type meta: rasa_nlu.model.Metadata :rtype: MITIESklearnInterpreter """ if meta.entity_extractor_path: extractor = named_entity_extractor(meta.entity_extractor_path) else: extractor = None if featurizer is None: featurizer = MITIEFeaturizer(meta.feature_extractor_path) if meta.intent_classifier_path: with open(meta.intent_classifier_path, 'rb') as f: classifier = cloudpickle.load(f) else: classifier = None if meta.entity_synonyms_path: entity_synonyms = Interpreter.load_synonyms( meta.entity_synonyms_path) else: entity_synonyms = None return MITIESklearnInterpreter(classifier, extractor, featurizer, entity_synonyms) def __init__(self, intent_classifier=None, entity_extractor=None, featurizer=None, entity_synonyms=None): self.extractor = entity_extractor self.classifier = intent_classifier self.featurizer = featurizer self.tokenizer = MITIETokenizer() self.ent_synonyms = entity_synonyms def get_intent(self, sentence_tokens): """Returns the most likely intent and its probability for the input text. :param sentence_tokens: text to classify :return: tuple of most likely intent name and its probability""" if self.classifier: X = self.featurizer.features_for_tokens(sentence_tokens).reshape( 1, -1) intent_ids, probabilities = self.classifier.predict(X) intents = self.classifier.transform_labels_num2str(intent_ids) intent, score = intents[0], probabilities[0] else: intent, score = "None", 0.0 return intent, score def parse(self, text): tokens = self.tokenizer.tokenize(text) intent, probability = self.get_intent(tokens) entities = get_entities(text, tokens, self.extractor, self.featurizer) if self.ent_synonyms: Interpreter.replace_synonyms(entities, self.ent_synonyms) return { 'text': text, 'intent': intent, 'entities': entities, 'confidence': probability }
class MITIEInterpreter(Interpreter): @staticmethod def load(meta, featurizer=None): """ :type meta: rasa_nlu.model.Metadata :rtype: MITIEInterpreter """ if meta.entity_extractor_path: extractor = named_entity_extractor(meta.entity_extractor_path) else: extractor = None if meta.intent_classifier_path: classifier = text_categorizer(meta.intent_classifier_path) else: classifier = None if featurizer is None: featurizer = MITIEFeaturizer(meta.feature_extractor_path) if meta.entity_synonyms_path: entity_synonyms = Interpreter.load_synonyms( meta.entity_synonyms_path) else: entity_synonyms = None return MITIEInterpreter(classifier, extractor, featurizer, entity_synonyms) def __init__(self, intent_classifier=None, entity_extractor=None, featurizer=None, entity_synonyms=None): self.extractor = entity_extractor self.featurizer = featurizer self.classifier = intent_classifier self.ent_synonyms = entity_synonyms self.tokenizer = MITIETokenizer() def get_intent(self, tokens): if self.classifier: label, score = self.classifier(tokens, self.featurizer.feature_extractor) else: label, score = "None", 0.0 return label, score def parse(self, text): tokens = self.tokenizer.tokenize(text) intent, score = self.get_intent(tokens) entities = get_entities(text, tokens, self.extractor, self.featurizer) if self.ent_synonyms: Interpreter.replace_synonyms(entities, self.ent_synonyms) return { 'text': text, 'intent': intent, 'entities': entities, 'confidence': score }