def __init__(self, intent_classifier_file=None, entity_extractor_file=None, feature_extractor_file=None, **kwargs):
     if entity_extractor_file:
         self.extractor = named_entity_extractor(entity_extractor_file)  # ,metadata["feature_extractor"])
     with open(intent_classifier_file, 'rb') as f:
         self.classifier = cloudpickle.load(f)
     self.featurizer = MITIEFeaturizer(feature_extractor_file)
     self.tokenizer = MITIETokenizer()
示例#2
0
 def __init__(self,
              intent_classifier=None,
              entity_extractor=None,
              entity_synonyms=None):
     self.extractor = entity_extractor
     self.classifier = intent_classifier
     self.ent_synonyms = entity_synonyms
     self.tokenizer = MITIETokenizer()
示例#3
0
 def __init__(self,
              intent_classifier=None,
              entity_extractor=None,
              feature_extractor=None,
              **kwargs):
     self.extractor = named_entity_extractor(entity_extractor,
                                             feature_extractor)
     self.classifier = text_categorizer(intent_classifier,
                                        feature_extractor)
     self.tokenizer = MITIETokenizer()
示例#4
0
 def find_entity(cls, ent, text):
     tk = MITIETokenizer()
     tokens, offsets = tk.tokenize_with_offsets(text)
     if ent["start"] not in offsets:
         message = u"invalid entity {0} in example {1}:".format(ent, text) + \
             u" entities must span whole tokens"
         raise ValueError(message)
     start = offsets.index(ent["start"])
     _slice = text[ent["start"]:ent["end"]]
     val_tokens = tokenize(_slice)
     end = start + len(val_tokens)
     return start, end
示例#5
0
 def __init__(self,
              intent_classifier=None,
              entity_extractor=None,
              feature_extractor=None,
              entity_synonyms=None,
              **kwargs):
     self.extractor = None
     self.classifier = None
     if entity_extractor:
         self.extractor = named_entity_extractor(entity_extractor, feature_extractor)
     if intent_classifier:
         self.classifier = text_categorizer(intent_classifier, feature_extractor)
     self.tokenizer = MITIETokenizer()
     self.ent_synonyms = None
     if entity_synonyms:
         Interpreter.load_synonyms(entity_synonyms)
示例#6
0
class MITIEInterpreter(Interpreter):
    def __init__(self,
                 intent_classifier=None,
                 entity_extractor=None,
                 feature_extractor=None,
                 entity_synonyms=None,
                 **kwargs):
        self.extractor = None
        self.classifier = None
        if entity_extractor:
            self.extractor = named_entity_extractor(entity_extractor, feature_extractor)
        if intent_classifier:
            self.classifier = text_categorizer(intent_classifier, feature_extractor)
        self.tokenizer = MITIETokenizer()
        self.ent_synonyms = None
        if entity_synonyms:
            Interpreter.load_synonyms(entity_synonyms)

    def get_intent(self, tokens):
        if self.classifier:
            label, score = self.classifier(tokens)
        else:
            label, score = "None", 0.0
        return label, score

    def parse(self, text):
        tokens = self.tokenizer.tokenize(text)
        intent, score = self.get_intent(tokens)
        entities = get_entities(text, tokens, self.extractor)
        if self.ent_synonyms:
            Interpreter.replace_synonyms(entities, self.ent_synonyms)

        return {'text': text, 'intent': intent, 'entities': entities, 'confidence': score}
示例#7
0
    def __init__(self, resource_name, backend, language_name):
        self.intent_examples = []
        self.entity_examples = []
        self.resource_name = resource_name
        self.files = util.recursively_find_files(resource_name)
        self.fformat = self.guess_format(self.files)
        self.tokenizer = None
        self.language_name = language_name

        if backend in ['mitie', 'mitie_sklearn']:
            from rasa_nlu.tokenizers.mitie_tokenizer import MITIETokenizer
            self.tokenizer = MITIETokenizer()
        elif backend in ['spacy_sklearn']:
            from rasa_nlu.tokenizers.spacy_tokenizer import SpacyTokenizer
            self.tokenizer = SpacyTokenizer(language_name)
        else:
            from rasa_nlu.tokenizers.whitespace_tokenizer import WhitespaceTokenizer
            self.tokenizer = WhitespaceTokenizer()
            warnings.warn(
                "backend not recognised by TrainingData : defaulting to tokenizing by splitting on whitespace"
            )

        if self.fformat == 'luis':
            self.load_luis_data(self.files[0])
        elif self.fformat == 'wit':
            self.load_wit_data(self.files[0])
        elif self.fformat == 'api':
            self.load_api_data(self.files)
        elif self.fformat == 'rasa_nlu':
            self.load_data(self.files[0])
        else:
            raise ValueError("unknown training file format : {0}".format(
                self.fformat))
示例#8
0
class MITIEInterpreter(Interpreter):
    def __init__(self, intent_classifier=None, entity_extractor=None, feature_extractor=None, **kwargs):
        self.extractor = named_entity_extractor(entity_extractor, feature_extractor)
        self.classifier = text_categorizer(intent_classifier, feature_extractor)
        self.tokenizer = MITIETokenizer()

    def get_entities(self, text):
        tokens = self.tokenizer.tokenize(text)
        ents = []
        entities = self.extractor.extract_entities(tokens)
        for e in entities:
            _range = e[0]
            _regex = u"\s*".join(tokens[i] for i in _range)
            expr = re.compile(_regex)
            m = expr.search(text)
            start, end = m.start(), m.end()
            ents.append({
                "entity": e[1],
                "value": text[start:end],
                "start": start,
                "end": end
            })

        return ents

    def get_intent(self, text):
        tokens = tokenize(text)
        label, _ = self.classifier(tokens)  # don't use the score
        return label

    def parse(self, text):
        intent = self.get_intent(text)
        entities = self.get_entities(text)

        return {'text': text, 'intent': intent, 'entities': entities}
示例#9
0
class MITIESklearnInterpreter(Interpreter):
    def __init__(self, metadata):
        self.extractor = named_entity_extractor(
            metadata["entity_extractor"])  # ,metadata["feature_extractor"])
        self.classifier = text_categorizer(
            metadata["intent_classifier"])  # ,metadata["feature_extractor"])
        self.tokenizer = MITIETokenizer()

    def get_entities(self, tokens):
        d = {}
        entities = self.extractor.extract_entities(tokens)
        for e in entities:
            _range = e[0]
            d[e[1]] = " ".join(tokens[i] for i in _range)
        return d

    def get_intent(self, tokens):
        label, _ = self.classifier(tokens)  # don't use the score
        return label

    def parse(self, text):
        tokens = self.tokenizer.tokenize(text)
        intent = self.get_intent(tokens)
        entities = self.get_entities(tokens)

        return {'intent': intent, 'entities': entities}
示例#10
0
def test_mitie():
    from rasa_nlu.tokenizers.mitie_tokenizer import MITIETokenizer
    tk = MITIETokenizer()

    tk.tokenize(u"Hi. My name is rasa") == [
        u'Hi', u'My', u'name', u'is', u'rasa'
    ]
    tk.tokenize(u"ὦ ἄνδρες ᾿Αθηναῖοι.") == [u'ὦ', u'ἄνδρες', u'᾿Αθηναῖοι']
    tk.tokenize_with_offsets(u"Forecast for lunch") == ([
        u'Forecast', u'for', u'lunch'
    ], [0, 9, 13])
 def __init__(self,
              intent_classifier=None,
              entity_extractor=None,
              feature_extractor=None,
              entity_synonyms=None,
              **kwargs):
     self.extractor = None
     self.classifier = None
     if entity_extractor:
         self.extractor = named_entity_extractor(entity_extractor,
                                                 feature_extractor)
     if intent_classifier:
         with open(intent_classifier, 'rb') as f:
             self.classifier = cloudpickle.load(f)
     self.featurizer = MITIEFeaturizer(feature_extractor)
     self.tokenizer = MITIETokenizer()
     self.ent_synonyms = None
     if entity_synonyms:
         self.ent_synonyms = Interpreter.load_synonyms(entity_synonyms)
示例#12
0
 def init_tokenizer(self, backend, nlp):
     if backend in [mitie.MITIE_BACKEND_NAME, mitie.MITIE_SKLEARN_BACKEND_NAME]:
         from rasa_nlu.tokenizers.mitie_tokenizer import MITIETokenizer
         self.tokenizer = MITIETokenizer()
     elif backend in [spacy.SPACY_BACKEND_NAME]:
         from rasa_nlu.tokenizers.spacy_tokenizer import SpacyTokenizer
         self.tokenizer = SpacyTokenizer(nlp)
     else:
         from rasa_nlu.tokenizers.whitespace_tokenizer import WhitespaceTokenizer
         self.tokenizer = WhitespaceTokenizer()
         warnings.warn(
             "backend not recognised by TrainingData : defaulting to tokenizing by splitting on whitespace")
示例#13
0
class MITIEInterpreter(Interpreter):
    @staticmethod
    def load(meta):
        """
        :type meta: ModelMetadata
        :rtype: MITIEInterpreter
        """
        if meta.entity_extractor_path:
            if meta.feature_extractor_path is None or not os.path.isfile(meta.feature_extractor_path):
                raise Exception("Invalid feature extractor path for MITIE model. Meta data: " + meta)
            extractor = named_entity_extractor(
                meta.entity_extractor_path, meta.feature_extractor_path)
        else:
            extractor = None
        if meta.intent_classifier_path:
            classifier = text_categorizer(
                meta.intent_classifier_path, meta.feature_extractor_path)
        else:
            classifier = None
        if meta.entity_synonyms_path:
            entity_synonyms = Interpreter.load_synonyms(meta.entity_synonyms_path)
        else:
            entity_synonyms = None
        return MITIEInterpreter(
            classifier,
            extractor,
            entity_synonyms)

    def __init__(self,
                 intent_classifier=None,
                 entity_extractor=None,
                 entity_synonyms=None):
        self.extractor = entity_extractor
        self.classifier = intent_classifier
        self.ent_synonyms = entity_synonyms
        self.tokenizer = MITIETokenizer()

    def get_intent(self, tokens):
        if self.classifier:
            label, score = self.classifier(tokens)
        else:
            label, score = "None", 0.0
        return label, score

    def parse(self, text):
        tokens = self.tokenizer.tokenize(text)
        intent, score = self.get_intent(tokens)
        entities = get_entities(text, tokens, self.extractor)
        if self.ent_synonyms:
            Interpreter.replace_synonyms(entities, self.ent_synonyms)

        return {'text': text, 'intent': intent, 'entities': entities, 'confidence': score}
示例#14
0
def test_mitie():
    from rasa_nlu.tokenizers.mitie_tokenizer import MITIETokenizer
    tk = MITIETokenizer()

    tk.tokenize(u"Hi. My name is rasa") == [
        u'Hi', u'My', u'name', u'is', u'rasa'
    ]
    tk.tokenize(u"ὦ ἄνδρες ᾿Αθηναῖοι.") == [u'ὦ', u'ἄνδρες', u'᾿Αθηναῖοι']
class MITIESklearnInterpreter(Interpreter):
    def __init__(self,
                 intent_classifier=None,
                 entity_extractor=None,
                 feature_extractor=None,
                 entity_synonyms=None,
                 **kwargs):
        self.extractor = None
        self.classifier = None
        if entity_extractor:
            self.extractor = named_entity_extractor(entity_extractor,
                                                    feature_extractor)
        if intent_classifier:
            with open(intent_classifier, 'rb') as f:
                self.classifier = cloudpickle.load(f)
        self.featurizer = MITIEFeaturizer(feature_extractor)
        self.tokenizer = MITIETokenizer()
        self.ent_synonyms = None
        if entity_synonyms:
            self.ent_synonyms = Interpreter.load_synonyms(entity_synonyms)

    def get_intent(self, sentence_tokens):
        """Returns the most likely intent and its probability for the input text.

        :param sentence_tokens: text to classify
        :return: tuple of most likely intent name and its probability"""
        if self.classifier:
            X = self.featurizer.features_for_tokens(sentence_tokens).reshape(
                1, -1)
            intent_ids, probabilities = self.classifier.predict(X)
            intents = self.classifier.transform_labels_num2str(intent_ids)
            intent, score = intents[0], probabilities[0]
        else:
            intent, score = "None", 0.0

        return intent, score

    def parse(self, text):
        tokens = self.tokenizer.tokenize(text)
        intent, probability = self.get_intent(tokens)
        entities = get_entities(text, tokens, self.extractor)
        if self.ent_synonyms:
            Interpreter.replace_synonyms(entities, self.ent_synonyms)

        return {
            'text': text,
            'intent': intent,
            'entities': entities,
            'confidence': probability
        }
示例#16
0
def test_mitie_featurizer():
    from rasa_nlu.featurizers.mitie_featurizer import MITIEFeaturizer

    filename = os.environ.get('MITIE_FILE')
    if not filename or not os.path.isfile(filename):
        filename = "data/total_word_feature_extractor.dat"

    ftr = MITIEFeaturizer(filename)
    sentence = "Hey how are you today"
    tokens = MITIETokenizer().tokenize(sentence)
    vecs = ftr.features_for_tokens(tokens)
    assert np.allclose(
        vecs[:5],
        np.array([0., -4.4551446, 0.26073121, -1.46632245, -1.84205751]),
        atol=1e-5)
示例#17
0
def test_mitie():
    from rasa_nlu.tokenizers.mitie_tokenizer import MITIETokenizer
    tk = MITIETokenizer()

    assert tk.tokenize(u"Hi. My name is rasa") == [
        u'Hi', u'My', u'name', u'is', u'rasa'
    ]
    assert tk.tokenize(u"ὦ ἄνδρες ᾿Αθηναῖοι") == [
        u'ὦ', u'ἄνδρες', u'᾿Αθηναῖοι'
    ]
    assert tk.tokenize_with_offsets(u"Forecast for lunch") == ([
        u'Forecast', u'for', u'lunch'
    ], [0, 9, 13])
    assert tk.tokenize_with_offsets(u"hey ńöñàśçií how're you?") == ([
        u'hey', u'ńöñàśçií', u'how', u'\'re', 'you', '?'
    ], [0, 4, 13, 16, 20, 23])
class MITIESklearnInterpreter(Interpreter):
    def __init__(self, intent_classifier_file=None, entity_extractor_file=None, feature_extractor_file=None, **kwargs):
        if entity_extractor_file:
            self.extractor = named_entity_extractor(entity_extractor_file)  # ,metadata["feature_extractor"])
        with open(intent_classifier_file, 'rb') as f:
            self.classifier = cloudpickle.load(f)
        self.featurizer = MITIEFeaturizer(feature_extractor_file)
        self.tokenizer = MITIETokenizer()

    def get_entities(self, tokens):
        d = {}
        entities = self.extractor.extract_entities(tokens)
        for e in entities:
            _range = e[0]
            d[e[1]] = " ".join(tokens[i] for i in _range)
        return d

    def get_intent(self, text):
        """Returns the most likely intent and its probability for the input text.

        :param text: text to classify
        :return: tuple of most likely intent name and its probability"""
        if self.classifier:
            X = self.featurizer.create_bow_vecs([text])
            intent_ids, probabilities = self.classifier.predict(X)
            intents = self.classifier.transform_labels_num2str(intent_ids)
            intent, score = intents[0], probabilities[0]
        else:
            intent, score = "None", 0.0

        return intent, score

    def parse(self, text):
        tokens = self.tokenizer.tokenize(text)
        intent, probability = self.get_intent(tokens)
        entities = self.get_entities(tokens)

        return {'text': text, 'intent': intent, 'entities': entities, 'confidence': probability}
示例#19
0
 def __init__(self, metadata):
     self.extractor = named_entity_extractor(
         metadata["entity_extractor"])  # ,metadata["feature_extractor"])
     self.classifier = text_categorizer(
         metadata["intent_classifier"])  # ,metadata["feature_extractor"])
     self.tokenizer = MITIETokenizer()
示例#20
0
class MITIEInterpreter(Interpreter):
    def __init__(self,
                 intent_classifier=None,
                 entity_extractor=None,
                 feature_extractor=None,
                 entity_synonyms=None,
                 **kwargs):
        self.extractor = None
        self.classifier = None
        if entity_extractor:
            self.extractor = named_entity_extractor(entity_extractor,
                                                    feature_extractor)
        if intent_classifier:
            self.classifier = text_categorizer(intent_classifier,
                                               feature_extractor)
        self.tokenizer = MITIETokenizer()
        self.ent_synonyms = None
        if entity_synonyms:
            Interpreter.load_synonyms(entity_synonyms)

    def get_entities(self, text):
        tokens = self.tokenizer.tokenize(text)
        ents = []
        if self.extractor:
            entities = self.extractor.extract_entities(tokens)
            for e in entities:
                _range = e[0]
                _regex = u"\s*".join(re.escape(tokens[i]) for i in _range)
                expr = re.compile(_regex)
                m = expr.search(text)
                start, end = m.start(), m.end()
                entity_value = text[start:end]
                ents.append({
                    "entity": e[1],
                    "value": entity_value,
                    "start": start,
                    "end": end
                })

        return ents

    def get_intent(self, text):
        if self.classifier:
            tokens = tokenize(text)
            label, score = self.classifier(tokens)
        else:
            label, score = "None", 0.0
        return label, score

    def parse(self, text):
        intent, score = self.get_intent(text)
        entities = self.get_entities(text)
        if self.ent_synonyms:
            Interpreter.replace_synonyms(entities, self.ent_synonyms)

        return {
            'text': text,
            'intent': intent,
            'entities': entities,
            'confidence': score
        }
示例#21
0
class MITIESklearnInterpreter(Interpreter):
    @staticmethod
    def load(meta, featurizer=None):
        """
        :type meta: rasa_nlu.model.Metadata
        :rtype: MITIESklearnInterpreter
        """
        if meta.entity_extractor_path:
            extractor = named_entity_extractor(meta.entity_extractor_path)
        else:
            extractor = None

        if featurizer is None:
            featurizer = MITIEFeaturizer(meta.feature_extractor_path)

        if meta.intent_classifier_path:
            with open(meta.intent_classifier_path, 'rb') as f:
                classifier = cloudpickle.load(f)
        else:
            classifier = None
        if meta.entity_synonyms_path:
            entity_synonyms = Interpreter.load_synonyms(
                meta.entity_synonyms_path)
        else:
            entity_synonyms = None

        return MITIESklearnInterpreter(classifier, extractor, featurizer,
                                       entity_synonyms)

    def __init__(self,
                 intent_classifier=None,
                 entity_extractor=None,
                 featurizer=None,
                 entity_synonyms=None):
        self.extractor = entity_extractor
        self.classifier = intent_classifier
        self.featurizer = featurizer
        self.tokenizer = MITIETokenizer()
        self.ent_synonyms = entity_synonyms

    def get_intent(self, sentence_tokens):
        """Returns the most likely intent and its probability for the input text.

        :param sentence_tokens: text to classify
        :return: tuple of most likely intent name and its probability"""
        if self.classifier:
            X = self.featurizer.features_for_tokens(sentence_tokens).reshape(
                1, -1)
            intent_ids, probabilities = self.classifier.predict(X)
            intents = self.classifier.transform_labels_num2str(intent_ids)
            intent, score = intents[0], probabilities[0]
        else:
            intent, score = "None", 0.0

        return intent, score

    def parse(self, text):
        tokens = self.tokenizer.tokenize(text)
        intent, probability = self.get_intent(tokens)
        entities = get_entities(text, tokens, self.extractor, self.featurizer)
        if self.ent_synonyms:
            Interpreter.replace_synonyms(entities, self.ent_synonyms)

        return {
            'text': text,
            'intent': intent,
            'entities': entities,
            'confidence': probability
        }
示例#22
0
class MITIEInterpreter(Interpreter):
    @staticmethod
    def load(meta, featurizer=None):
        """
        :type meta: rasa_nlu.model.Metadata
        :rtype: MITIEInterpreter
        """
        if meta.entity_extractor_path:
            extractor = named_entity_extractor(meta.entity_extractor_path)
        else:
            extractor = None

        if meta.intent_classifier_path:
            classifier = text_categorizer(meta.intent_classifier_path)
        else:
            classifier = None

        if featurizer is None:
            featurizer = MITIEFeaturizer(meta.feature_extractor_path)

        if meta.entity_synonyms_path:
            entity_synonyms = Interpreter.load_synonyms(
                meta.entity_synonyms_path)
        else:
            entity_synonyms = None

        return MITIEInterpreter(classifier, extractor, featurizer,
                                entity_synonyms)

    def __init__(self,
                 intent_classifier=None,
                 entity_extractor=None,
                 featurizer=None,
                 entity_synonyms=None):
        self.extractor = entity_extractor
        self.featurizer = featurizer
        self.classifier = intent_classifier
        self.ent_synonyms = entity_synonyms
        self.tokenizer = MITIETokenizer()

    def get_intent(self, tokens):
        if self.classifier:
            label, score = self.classifier(tokens,
                                           self.featurizer.feature_extractor)
        else:
            label, score = "None", 0.0
        return label, score

    def parse(self, text):
        tokens = self.tokenizer.tokenize(text)
        intent, score = self.get_intent(tokens)
        entities = get_entities(text, tokens, self.extractor, self.featurizer)
        if self.ent_synonyms:
            Interpreter.replace_synonyms(entities, self.ent_synonyms)

        return {
            'text': text,
            'intent': intent,
            'entities': entities,
            'confidence': score
        }