Exemplo n.º 1
0
def test_duckling_entity_extractor_and_synonyms(component_builder):
    _config = RasaNLUModelConfig({"pipeline": [{"name": "ner_duckling"}]})
    _config.set_component_attr("ner_duckling", dimensions=["number"])
    duckling = component_builder.create_component("ner_duckling", _config)
    synonyms = component_builder.create_component("ner_synonyms", _config)
    message = Message("He was 6 feet away")
    duckling.process(message)
    # checks that the synonym processor can handle entities that have int values
    synonyms.process(message)
    assert message is not None
Exemplo n.º 2
0
def test_duckling_entity_extractor(component_builder):
    _config = RasaNLUModelConfig({"pipeline": [{"name": "ner_duckling"}]})
    _config.set_component_attr("ner_duckling", dimensions=["time"])
    duckling = component_builder.create_component("ner_duckling", _config)
    message = Message("Today is the 5th of May. Let us meet tomorrow.")
    duckling.process(message)
    entities = message.get("entities")
    assert len(entities) == 3

    # Test duckling with a defined date

    # 1381536182000 == 2013/10/12 02:03:02
    message = Message("Let us meet tomorrow.", time="1381536182000")
    duckling.process(message)
    entities = message.get("entities")
    assert len(entities) == 1
    assert entities[0]["text"] == "tomorrow"
    assert entities[0]["value"] == "2013-10-13T00:00:00.000Z"
Exemplo n.º 3
0
    def create_component(self,
                         component_config: Dict[Text, Any],
                         cfg: RasaNLUModelConfig) -> Component:
        """Tries to retrieve a component from the cache,
        calls `create` to create a new component."""
        from rasa_nlu import registry
        from rasa_nlu.model import Metadata

        try:
            component, cache_key = self.__get_cached_component(
                component_config, Metadata(cfg.as_dict(), None))
            if component is None:
                component = registry.create_component_by_config(
                    component_config, cfg)
                self.__add_to_cache(component, cache_key)
            return component
        except MissingArgumentError as e:  # pragma: no cover
            raise Exception("Failed to create component `{}`. "
                            "{}".format(component_config['name'], e))
Exemplo n.º 4
0
def test_spacy_ner_extractor(component_builder, spacy_nlp):
    _config = RasaNLUModelConfig({"pipeline":
                                 [{"name": "SpacyEntityExtractor"}]})
    ext = component_builder.create_component(_config.for_component(0), _config)
    example = Message("anywhere in the West", {
        "intent": "restaurant_search",
        "entities": [],
        "spacy_doc": spacy_nlp("anywhere in the west")})
    ext.process(example, spacy_nlp=spacy_nlp)

    assert len(example.get("entities", [])) == 1
    assert example.get("entities")[0] == {
        'start': 16,
        'extractor': 'SpacyEntityExtractor',
        'end': 20,
        'value': 'West',
        'entity': 'LOC',
        'confidence': None}

    # Test dimension filtering includes only specified dimensions

    example = Message("anywhere in the West with Sebastian Thrun", {
        "intent": "example_intent",
        "entities": [],
        "spacy_doc": spacy_nlp("anywhere in the West with Sebastian Thrun")})
    _config = RasaNLUModelConfig({"pipeline":
                                 [{"name": "SpacyEntityExtractor"}]})
    _config.set_component_attr(0, dimensions=["PERSON"])
    ext = component_builder.create_component(_config.for_component(0), _config)
    ext.process(example, spacy_nlp=spacy_nlp)

    assert len(example.get("entities", [])) == 1
    assert example.get("entities")[0] == {
        'start': 26,
        'extractor': 'SpacyEntityExtractor',
        'end': 41,
        'value': 'Sebastian Thrun',
        'entity': 'PERSON',
        'confidence': None}
Exemplo n.º 5
0
import logging

from rasa_core.agent import Agent
from rasa_core.policies.keras_policy import KerasPolicy
from rasa_core.policies.memoization import MemoizationPolicy
from rasa_core.policies.form_policy import FormPolicy

import warnings
import ruamel.yaml as yaml
warnings.simplefilter('ignore', yaml.error.UnsafeLoaderWarning)
logging.basicConfig(level='INFO')
'''
training the nlu
'''
args1 = {"pipeline": "tensorflow_embedding"}
conf1 = RasaNLUModelConfig(args1)
trainer1 = Trainer(conf1)

#nlu for agent 1
training_data1 = load_data("./data2/nlu.md")
Interpreter1 = trainer1.train(training_data1)
model_directory1 = trainer1.persist('./models', fixed_model_name="ner_a2")

#core for agent1
domain_file = "domain2.yml"
training_data_file = './data2/stories.md'
model_path = './models/dialogue_agent_2'
agent = Agent(domain_file,
              policies=[
                  MemoizationPolicy(max_history=3),
                  KerasPolicy(max_history=3, epochs=500, batch_size=10),
Exemplo n.º 6
0
import json
import random
from rasa_nlu.training_data import load_data
from rasa_nlu.config import RasaNLUModelConfig
from rasa_nlu.model import Trainer

config = RasaNLUModelConfig(
    configuration_values={
        "pipeline": [{
            "name": "nlp_spacy"
        }, {
            "name": "tokenizer_spacy"
        }, {
            "name": "intent_entity_featurizer_regex"
        }, {
            "name": "intent_featurizer_spacy"
        }, {
            "name": "ner_crf"
        }, {
            "name": "ner_synonyms"
        }, {
            "name": "intent_classifier_sklearn"
        }, {
            "name": "ner_spacy"
        }]
    })

trainer = Trainer(config)
training_data = load_data("training_data.json")
interpreter = trainer.train(training_data)
response, adr, params, suggestions, excluded = '', '', {}, [], []
Exemplo n.º 7
0
def test_crf_extractor(spacy_nlp, ner_crf_pos_feature_config):
    from rasa_nlu.extractors.crf_entity_extractor import CRFEntityExtractor
    ext = CRFEntityExtractor(component_config=ner_crf_pos_feature_config)
    examples = [
        Message(
            "anywhere in the west", {
                "intent":
                "restaurant_search",
                "entities": [{
                    "start": 16,
                    "end": 20,
                    "value": "west",
                    "entity": "location"
                }],
                "spacy_doc":
                spacy_nlp("anywhere in the west")
            }),
        Message(
            "central indian restaurant", {
                "intent":
                "restaurant_search",
                "entities": [{
                    "start": 0,
                    "end": 7,
                    "value": "central",
                    "entity": "location",
                    "extractor": "random_extractor"
                }, {
                    "start": 8,
                    "end": 14,
                    "value": "indian",
                    "entity": "cuisine",
                    "extractor": "CRFEntityExtractor"
                }],
                "spacy_doc":
                spacy_nlp("central indian restaurant")
            })
    ]

    # uses BILOU and the default features
    ext.train(TrainingData(training_examples=examples), RasaNLUModelConfig())
    sentence = 'anywhere in the west'
    doc = {"spacy_doc": spacy_nlp(sentence)}
    crf_format = ext._from_text_to_crf(Message(sentence, doc))
    assert [word[0]
            for word in crf_format] == ['anywhere', 'in', 'the', 'west']
    feats = ext._sentence_to_features(crf_format)
    assert 'BOS' in feats[0]
    assert 'EOS' in feats[-1]
    assert feats[1]['0:low'] == "in"
    sentence = 'anywhere in the west'
    ext.extract_entities(Message(sentence, {"spacy_doc": spacy_nlp(sentence)}))
    filtered = ext.filter_trainable_entities(examples)
    assert filtered[0].get('entities') == [{
        "start": 16,
        "end": 20,
        "value": "west",
        "entity": "location"
    }], 'Entity without extractor remains'
    assert filtered[1].get('entities') == [{
        "start": 8,
        "end": 14,
        "value": "indian",
        "entity": "cuisine",
        "extractor": "CRFEntityExtractor"
    }], 'Only CRFEntityExtractor entity annotation remains'
    assert examples[1].get('entities')[0] == {
        "start": 0,
        "end": 7,
        "value": "central",
        "entity": "location",
        "extractor": "random_extractor"
    }, 'Original examples are not mutated'
Exemplo n.º 8
0
def test_duckling_entity_extractor(component_builder):
    httpretty.register_uri(
        httpretty.POST,
        "http://localhost:8000/parse",
        body="""[{"body":"Today","start":0,"value":{"values":[{
             "value":"2018-11-13T00:00:00.000-08:00","grain":"day",
             "type":"value"}],"value":"2018-11-13T00:00:00.000-08:00",
             "grain":"day","type":"value"},"end":5,
             "dim":"time","latent":false},{"body":"the 5th","start":9,
             "value":{"values":[{
             "value":"2018-12-05T00:00:00.000-08:00","grain":"day",
             "type":"value"},
             {"value":"2019-01-05T00:00:00.000-08:00","grain":"day",
             "type":"value"},
             {"value":"2019-02-05T00:00:00.000-08:00","grain":"day",
             "type":"value"}],
             "value":"2018-12-05T00:00:00.000-08:00","grain":"day",
             "type":"value"},"end":16,"dim":"time",
             "latent":false},{"body":"5th of May","start":13,"value":{
             "values":[{
             "value":"2019-05-05T00:00:00.000-07:00","grain":"day",
             "type":"value"},
             {"value":"2020-05-05T00:00:00.000-07:00","grain":"day",
             "type":"value"},
             {"value":"2021-05-05T00:00:00.000-07:00","grain":"day",
             "type":"value"}],
             "value":"2019-05-05T00:00:00.000-07:00","grain":"day",
             "type":"value"},"end":23,"dim":"time",
             "latent":false},{"body":"tomorrow","start":37,"value":{
             "values":[{
             "value":"2018-11-14T00:00:00.000-08:00","grain":"day",
             "type":"value"}],
             "value":"2018-11-14T00:00:00.000-08:00","grain":"day",
             "type":"value"},"end":45,"dim":"time",
             "latent":false}]""")
    httpretty.enable()

    _config = RasaNLUModelConfig(
        {"pipeline": [{
            "name": "DucklingHTTPExtractor"
        }]})
    _config.set_component_attr(0,
                               dimensions=["time"],
                               timezone="UTC",
                               url="http://localhost:8000")
    duckling = component_builder.create_component(_config.for_component(0),
                                                  _config)
    message = Message("Today is the 5th of May. Let us meet tomorrow.")
    duckling.process(message)
    entities = message.get("entities")
    assert len(entities) == 4

    # Test duckling with a defined date

    httpretty.register_uri(
        httpretty.POST,
        "http://localhost:8000/parse",
        body="""[{"body":"tomorrow","start":12,"value":{"values":[{
             "value":"2013-10-13T00:00:00.000Z","grain":"day",
             "type":"value"}],"value":"2013-10-13T00:00:00.000Z",
             "grain":"day","type":"value"},"end":20,
             "dim":"time","latent":false}]""")

    # 1381536182 == 2013/10/12 02:03:02
    message = Message("Let us meet tomorrow.", time="1381536182")
    duckling.process(message)
    entities = message.get("entities")
    assert len(entities) == 1
    assert entities[0]["text"] == "tomorrow"
    assert entities[0]["value"] == "2013-10-13T00:00:00.000Z"

    # Test dimension filtering includes only specified dimensions
    _config = RasaNLUModelConfig(
        {"pipeline": [{
            "name": "DucklingHTTPExtractor"
        }]})
    _config.set_component_attr(0,
                               dimensions=["number"],
                               url="http://localhost:8000")
    ducklingNumber = component_builder.create_component(
        _config.for_component(0), _config)
    httpretty.register_uri(
        httpretty.POST,
        "http://localhost:8000/parse",
        body="""[{"body":"Yesterday","start":0,"value":{"values":[{
            "value":"2019-02-28T00:00:00.000+01:00","grain":"day",
            "type":"value"}],"value":"2019-02-28T00:00:00.000+01:00",
            "grain":"day","type":"value"},"end":9,"dim":"time"},
            {"body":"5","start":21,"value":{"value":5,"type":"value"},
            "end":22,"dim":"number"}]""")

    message = Message("Yesterday there were 5 people in a room")
    ducklingNumber.process(message)
    entities = message.get("entities")
    assert len(entities) == 1
    assert entities[0]["text"] == "5"
    assert entities[0]["value"] == 5
Exemplo n.º 9
0
def train_nlu(data, config, model_dir):
    training_data = load_data(data)
    rasa_model_config = RasaNLUModelConfig(load_json(config))
    trainer = Trainer(rasa_model_config)
    trainer.train(training_data)
    model_directory = trainer.persist(model_dir, fixed_model_name='weathernlu')
Exemplo n.º 10
0
    def create(cls, config: RasaNLUModelConfig) -> 'DucklingHTTPExtractor':

        return cls(config.for_component(cls.name, cls.defaults),
                   config.language)
Exemplo n.º 11
0
from rasa_nlu.training_data import load_data
from rasa_nlu.config import RasaNLUModelConfig
from rasa_nlu.model import Trainer

training_data = load_data('nlu.md')
pipeline = [{
    "name": "nlp_spacy"
}, {
    "name": "tokenizer_spacy"
}, {
    "name": "intent_featurizer_spacy"
}, {
    "name": "intent_classifier_sklearn"
}]
trainer = Trainer(RasaNLUModelConfig({"pipeline": pipeline}))
interpreter = trainer.train(training_data)
model_directory = trainer.persist('./projects/default/')
Exemplo n.º 12
0
        "name": "tokenizer_bert"
    }, {
        "name": "intent_featurizer_bert",
        "lm_spell_checker": True,
        "mask_spell_checker": False,
        "mul_similar_matrix": True,
        "spell_checker_score": 1
    }]

    # pipeline = [{"name": "tokenizer_bert"},
    #             {"name": "intent_featurizer_bert",
    #              "lm_spell_checker": False,
    #              "mask_spell_checker": True,
    #              "mul_similar_matrix": True,
    #              "spell_checker_score": 1}
    #             ]
    training_data = load_data('./data/examples/rasa/demo-rasa_zh.json')
    trainer = Trainer(
        RasaNLUModelConfig({
            "pipeline": pipeline,
            "language": "zh"
        }))
    interpreter = trainer.train(training_data)
    model = interpreter
    # model = Interpreter.load('./projects/spell_checker/default/model_20190115-163425')
    print('loaded')
    tornado.options.parse_command_line()
    http_server = tornado.httpserver.HTTPServer(Application())
    http_server.listen(options.port)
    tornado.ioloop.IOLoop.instance().start()
Exemplo n.º 13
0
from rasa_nlu.train import load_data
from rasa_nlu.config import RasaNLUModelConfig
from rasa_nlu.utils.spacy_utils import SpacyNLP
from rasa_nlu.tokenizers.spacy_tokenizer import SpacyTokenizer
from rasa_nlu.featurizers.spacy_featurizer import SpacyFeaturizer
import numpy as np, spacy

training_data = load_data("data/examples/rasa/demo-rasa.json")
config = RasaNLUModelConfig()
SpacyNLP(nlp=spacy.load("en")).train(training_data, config)
SpacyTokenizer().train(training_data, config)
SpacyFeaturizer().train(training_data, config)

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

labels = [e.get("intent") for e in training_data.intent_examples]
le = LabelEncoder()

y = le.fit_transform(labels)
X = np.stack([
    example.get("text_features") for example in training_data.intent_examples
])

defaults = {
    # C parameter of the svm - cross validation will select the best value
    "C": [1, 2, 5, 10, 20, 100],

    # the kernels to use for the svm training - cross validation will
    # decide which one of them performs best
Exemplo n.º 14
0
def test_duckling_entity_extractor(component_builder):
    httpretty.register_uri(
        httpretty.POST,
        "http://localhost:8000/parse",
        body="""[{"body":"Today","start":0,"value":{"values":[{
             "value":"2018-11-13T00:00:00.000-08:00","grain":"day",
             "type":"value"}],"value":"2018-11-13T00:00:00.000-08:00",
             "grain":"day","type":"value"},"end":5,
             "dim":"time","latent":false},{"body":"the 5th","start":9,
             "value":{"values":[{
             "value":"2018-12-05T00:00:00.000-08:00","grain":"day",
             "type":"value"},
             {"value":"2019-01-05T00:00:00.000-08:00","grain":"day",
             "type":"value"},
             {"value":"2019-02-05T00:00:00.000-08:00","grain":"day",
             "type":"value"}],
             "value":"2018-12-05T00:00:00.000-08:00","grain":"day",
             "type":"value"},"end":16,"dim":"time",
             "latent":false},{"body":"5th of May","start":13,"value":{
             "values":[{
             "value":"2019-05-05T00:00:00.000-07:00","grain":"day",
             "type":"value"},
             {"value":"2020-05-05T00:00:00.000-07:00","grain":"day",
             "type":"value"},
             {"value":"2021-05-05T00:00:00.000-07:00","grain":"day",
             "type":"value"}],
             "value":"2019-05-05T00:00:00.000-07:00","grain":"day",
             "type":"value"},"end":23,"dim":"time",
             "latent":false},{"body":"tomorrow","start":37,"value":{
             "values":[{
             "value":"2018-11-14T00:00:00.000-08:00","grain":"day",
             "type":"value"}],
             "value":"2018-11-14T00:00:00.000-08:00","grain":"day",
             "type":"value"},"end":45,"dim":"time",
             "latent":false}]"""
    )
    httpretty.enable()

    _config = RasaNLUModelConfig(
        {"pipeline": [{"name": "DucklingHTTPExtractor"}]}
    )
    _config.set_component_attr(0, dimensions=["time"], timezone="UTC",
                               url="http://localhost:8000")
    duckling = component_builder.create_component(_config.for_component(0),
                                                  _config)
    message = Message("Today is the 5th of May. Let us meet tomorrow.")
    duckling.process(message)
    entities = message.get("entities")
    assert len(entities) == 4

    # Test duckling with a defined date

    httpretty.register_uri(
        httpretty.POST,
        "http://localhost:8000/parse",
        body="""[{"body":"tomorrow","start":12,"value":{"values":[{
             "value":"2013-10-13T00:00:00.000Z","grain":"day",
             "type":"value"}],"value":"2013-10-13T00:00:00.000Z",
             "grain":"day","type":"value"},"end":20,
             "dim":"time","latent":false}]"""
    )

    # 1381536182 == 2013/10/12 02:03:02
    message = Message("Let us meet tomorrow.", time="1381536182")
    duckling.process(message)
    entities = message.get("entities")
    assert len(entities) == 1
    assert entities[0]["text"] == "tomorrow"
    assert entities[0]["value"] == "2013-10-13T00:00:00.000Z"

    # Test dimension filtering includes only specified dimensions
    _config = RasaNLUModelConfig(
        {"pipeline": [{"name": "DucklingHTTPExtractor"}]}
    )
    _config.set_component_attr(0, dimensions=["number"],
                               url="http://localhost:8000")
    ducklingNumber = component_builder.create_component(
        _config.for_component(0),
        _config)
    httpretty.register_uri(
        httpretty.POST,
        "http://localhost:8000/parse",
        body="""[{"body":"Yesterday","start":0,"value":{"values":[{
            "value":"2019-02-28T00:00:00.000+01:00","grain":"day",
            "type":"value"}],"value":"2019-02-28T00:00:00.000+01:00",
            "grain":"day","type":"value"},"end":9,"dim":"time"},
            {"body":"5","start":21,"value":{"value":5,"type":"value"},
            "end":22,"dim":"number"}]"""
    )

    message = Message("Yesterday there were 5 people in a room")
    ducklingNumber.process(message)
    entities = message.get("entities")
    assert len(entities) == 1
    assert entities[0]["text"] == "5"
    assert entities[0]["value"] == 5