Пример #1
0
def test_spacy_ner_extractor(component_builder, spacy_nlp):
    _config = RasaNLUModelConfig(
        {"pipeline": [{
            "name": "SpacyEntityExtractor"
        }]})
    ext = component_builder.create_component(_config.for_component(0), _config)
    example = Message(
        "anywhere in the West", {
            "intent": "restaurant_search",
            "entities": [],
            "spacy_doc": spacy_nlp("anywhere in the west")
        })
    ext.process(example, spacy_nlp=spacy_nlp)

    assert len(example.get("entities", [])) == 1
    assert example.get("entities")[0] == {
        'start': 16,
        'extractor': 'SpacyEntityExtractor',
        'end': 20,
        'value': 'West',
        'entity': 'LOC',
        'confidence': None
    }

    # Test dimension filtering includes only specified dimensions

    example = Message(
        "anywhere in the West with Sebastian Thrun", {
            "intent": "example_intent",
            "entities": [],
            "spacy_doc": spacy_nlp("anywhere in the West with Sebastian Thrun")
        })
    _config = RasaNLUModelConfig(
        {"pipeline": [{
            "name": "SpacyEntityExtractor"
        }]})
    _config.set_component_attr(0, dimensions=["PERSON"])
    ext = component_builder.create_component(_config.for_component(0), _config)
    ext.process(example, spacy_nlp=spacy_nlp)

    assert len(example.get("entities", [])) == 1
    assert example.get("entities")[0] == {
        'start': 26,
        'extractor': 'SpacyEntityExtractor',
        'end': 41,
        'value': 'Sebastian Thrun',
        'entity': 'PERSON',
        'confidence': None
    }
Пример #2
0
def test_duckling_entity_extractor_and_synonyms(component_builder):
    _config = RasaNLUModelConfig(
        {"pipeline": [{"name": "DucklingHTTPExtractor"},
                      {"name": "EntitySynonymMapper"}]}
    )
    _config.set_component_attr(0, dimensions=["number"])
    duckling = component_builder.create_component(_config.for_component(0),
                                                  _config)
    synonyms = component_builder.create_component(_config.for_component(1),
                                                  _config)
    message = Message("He was 6 feet away")
    duckling.process(message)
    # checks that the synonym processor
    # can handle entities that have int values
    synonyms.process(message)
    assert message is not None
Пример #3
0
def test_duckling_entity_extractor_and_synonyms(component_builder):
    _config = RasaNLUModelConfig({
        "pipeline": [{
            "name": "DucklingHTTPExtractor"
        }, {
            "name": "EntitySynonymMapper"
        }]
    })
    _config.set_component_attr(0, dimensions=["number"])
    duckling = component_builder.create_component(_config.for_component(0),
                                                  _config)
    synonyms = component_builder.create_component(_config.for_component(1),
                                                  _config)
    message = Message("He was 6 feet away")
    duckling.process(message)
    # checks that the synonym processor
    # can handle entities that have int values
    synonyms.process(message)
    assert message is not None
Пример #4
0
    def create(cls, cfg: RasaNLUModelConfig) -> 'Component':
        """Creates this component (e.g. before a training is started).

        Method can access all configuration parameters."""

        # Check language supporting
        language = cfg.language
        if not cls.can_handle_language(language):
            # check failed
            raise UnsupportedLanguageError(cls.name, language)

        return cls(cfg.for_component(cls.name, cls.defaults))
Пример #5
0
def test_spacy_ner_extractor(component_builder, spacy_nlp):
    _config = RasaNLUModelConfig({"pipeline":
                                 [{"name": "SpacyEntityExtractor"}]})
    ext = component_builder.create_component(_config.for_component(0), _config)
    example = Message("anywhere in the West", {
        "intent": "restaurant_search",
        "entities": [],
        "spacy_doc": spacy_nlp("anywhere in the west")})
    ext.process(example, spacy_nlp=spacy_nlp)

    assert len(example.get("entities", [])) == 1
    assert example.get("entities")[0] == {
        'start': 16,
        'extractor': 'SpacyEntityExtractor',
        'end': 20,
        'value': 'West',
        'entity': 'LOC',
        'confidence': None}

    # Test dimension filtering includes only specified dimensions

    example = Message("anywhere in the West with Sebastian Thrun", {
        "intent": "example_intent",
        "entities": [],
        "spacy_doc": spacy_nlp("anywhere in the West with Sebastian Thrun")})
    _config = RasaNLUModelConfig({"pipeline":
                                 [{"name": "SpacyEntityExtractor"}]})
    _config.set_component_attr(0, dimensions=["PERSON"])
    ext = component_builder.create_component(_config.for_component(0), _config)
    ext.process(example, spacy_nlp=spacy_nlp)

    assert len(example.get("entities", [])) == 1
    assert example.get("entities")[0] == {
        'start': 26,
        'extractor': 'SpacyEntityExtractor',
        'end': 41,
        'value': 'Sebastian Thrun',
        'entity': 'PERSON',
        'confidence': None}
Пример #6
0
    def _build_pipeline(cfg: RasaNLUModelConfig,
                        component_builder: ComponentBuilder
                        ) -> List[Component]:
        """Transform the passed names of the pipeline components into classes"""
        pipeline = []

        # Transform the passed names of the pipeline components into classes
        for i in range(len(cfg.pipeline)):
            component_cfg = cfg.for_component(i)
            component = component_builder.create_component(component_cfg, cfg)
            pipeline.append(component)

        return pipeline
Пример #7
0
 def create(cls,
            cfg: RasaNLUModelConfig) -> 'Word2vecKerasIntentClassifier':
     component_config: Dict[str, Any] = cfg.for_component(cls.name)
     LOGGER.info(f'CLASSIFIER CONFIG: {component_config}')
     clf_config_file_path: str = component_config['clf_config_file_path']
     clf_file_path: Optional[str] = component_config.get(
         'clf_file_path', None)
     labels: Optional[List[str]] = component_config.get('labels', None)
     with open(clf_config_file_path) as f:
         clf_config: Dict[str, Any] = json.load(f)
     if clf_file_path is not None:
         clf_model: Model = load_model(clf_file_path)
         clf = KerasBaseModel(clf_config_file_path, clf_model)
     else:
         clf = None
     return cls(component_config, clf_config, clf, labels)
Пример #8
0
    def create(cls, cfg: RasaNLUModelConfig) -> 'MitieNLP':
        import mitie

        component_conf = cfg.for_component(cls.name, cls.defaults)
        model_file = component_conf.get("model")
        if not model_file:
            raise Exception("The MITIE component 'nlp_mitie' needs "
                            "the configuration value for 'model'."
                            "Please take a look at the "
                            "documentation in the pipeline section "
                            "to get more info about this "
                            "parameter.")
        extractor = mitie.total_word_feature_extractor(model_file)
        cls.ensure_proper_language_model(extractor)

        return MitieNLP(component_conf, extractor)
Пример #9
0
    def create(cls, cfg: RasaNLUModelConfig) -> 'SpacyNLP':
        import spacy

        component_conf = cfg.for_component(cls.name, cls.defaults)
        spacy_model_name = component_conf.get("model")

        # if no model is specified, we fall back to the language string
        if not spacy_model_name:
            spacy_model_name = cfg.language
            component_conf["model"] = cfg.language

        logger.info("Trying to load spacy model with "
                    "name '{}'".format(spacy_model_name))

        nlp = spacy.load(spacy_model_name, parser=False)
        cls.ensure_proper_language_model(nlp)
        return SpacyNLP(component_conf, nlp)
Пример #10
0
    def train(self, training_data: TrainingData, config: RasaNLUModelConfig,
              **kwargs: Any) -> None:

        self.component_config = config.for_component(self.name, self.defaults)

        self._validate_configuration()

        # checks whether there is at least one
        # example with an entity annotation
        if training_data.entity_examples:
            self._check_spacy_doc(training_data.training_examples[0])

            # filter out pre-trained entity examples
            filtered_entity_examples = self.filter_trainable_entities(
                training_data.training_examples)

            # convert the dataset into features
            # this will train on ALL examples, even the ones
            # without annotations
            dataset = self._create_dataset(filtered_entity_examples)

            self._train_model(dataset)
Пример #11
0
def test_duckling_entity_extractor(component_builder):
    httpretty.register_uri(
        httpretty.POST,
        "http://localhost:8000/parse",
        body="""[{"body":"Today","start":0,"value":{"values":[{
             "value":"2018-11-13T00:00:00.000-08:00","grain":"day",
             "type":"value"}],"value":"2018-11-13T00:00:00.000-08:00",
             "grain":"day","type":"value"},"end":5,
             "dim":"time","latent":false},{"body":"the 5th","start":9,
             "value":{"values":[{
             "value":"2018-12-05T00:00:00.000-08:00","grain":"day",
             "type":"value"},
             {"value":"2019-01-05T00:00:00.000-08:00","grain":"day",
             "type":"value"},
             {"value":"2019-02-05T00:00:00.000-08:00","grain":"day",
             "type":"value"}],
             "value":"2018-12-05T00:00:00.000-08:00","grain":"day",
             "type":"value"},"end":16,"dim":"time",
             "latent":false},{"body":"5th of May","start":13,"value":{
             "values":[{
             "value":"2019-05-05T00:00:00.000-07:00","grain":"day",
             "type":"value"},
             {"value":"2020-05-05T00:00:00.000-07:00","grain":"day",
             "type":"value"},
             {"value":"2021-05-05T00:00:00.000-07:00","grain":"day",
             "type":"value"}],
             "value":"2019-05-05T00:00:00.000-07:00","grain":"day",
             "type":"value"},"end":23,"dim":"time",
             "latent":false},{"body":"tomorrow","start":37,"value":{
             "values":[{
             "value":"2018-11-14T00:00:00.000-08:00","grain":"day",
             "type":"value"}],
             "value":"2018-11-14T00:00:00.000-08:00","grain":"day",
             "type":"value"},"end":45,"dim":"time",
             "latent":false}]""")
    httpretty.enable()

    _config = RasaNLUModelConfig(
        {"pipeline": [{
            "name": "DucklingHTTPExtractor"
        }]})
    _config.set_component_attr(0,
                               dimensions=["time"],
                               timezone="UTC",
                               url="http://localhost:8000")
    duckling = component_builder.create_component(_config.for_component(0),
                                                  _config)
    message = Message("Today is the 5th of May. Let us meet tomorrow.")
    duckling.process(message)
    entities = message.get("entities")
    assert len(entities) == 4

    # Test duckling with a defined date

    httpretty.register_uri(
        httpretty.POST,
        "http://localhost:8000/parse",
        body="""[{"body":"tomorrow","start":12,"value":{"values":[{
             "value":"2013-10-13T00:00:00.000Z","grain":"day",
             "type":"value"}],"value":"2013-10-13T00:00:00.000Z",
             "grain":"day","type":"value"},"end":20,
             "dim":"time","latent":false}]""")

    # 1381536182 == 2013/10/12 02:03:02
    message = Message("Let us meet tomorrow.", time="1381536182")
    duckling.process(message)
    entities = message.get("entities")
    assert len(entities) == 1
    assert entities[0]["text"] == "tomorrow"
    assert entities[0]["value"] == "2013-10-13T00:00:00.000Z"

    # Test dimension filtering includes only specified dimensions
    _config = RasaNLUModelConfig(
        {"pipeline": [{
            "name": "DucklingHTTPExtractor"
        }]})
    _config.set_component_attr(0,
                               dimensions=["number"],
                               url="http://localhost:8000")
    ducklingNumber = component_builder.create_component(
        _config.for_component(0), _config)
    httpretty.register_uri(
        httpretty.POST,
        "http://localhost:8000/parse",
        body="""[{"body":"Yesterday","start":0,"value":{"values":[{
            "value":"2019-02-28T00:00:00.000+01:00","grain":"day",
            "type":"value"}],"value":"2019-02-28T00:00:00.000+01:00",
            "grain":"day","type":"value"},"end":9,"dim":"time"},
            {"body":"5","start":21,"value":{"value":5,"type":"value"},
            "end":22,"dim":"number"}]""")

    message = Message("Yesterday there were 5 people in a room")
    ducklingNumber.process(message)
    entities = message.get("entities")
    assert len(entities) == 1
    assert entities[0]["text"] == "5"
    assert entities[0]["value"] == 5
Пример #12
0
 def create(cls, config: RasaNLUModelConfig) -> 'LanguageSetter':
     return cls(config.for_component(cls.name, cls.defaults),
                config.language)
Пример #13
0
    def create(cls, config: RasaNLUModelConfig) -> 'DucklingHTTPExtractor':

        return cls(config.for_component(cls.name, cls.defaults),
                   config.language)
Пример #14
0
def test_duckling_entity_extractor(component_builder):
    httpretty.register_uri(
        httpretty.POST,
        "http://localhost:8000/parse",
        body="""[{"body":"Today","start":0,"value":{"values":[{
             "value":"2018-11-13T00:00:00.000-08:00","grain":"day",
             "type":"value"}],"value":"2018-11-13T00:00:00.000-08:00",
             "grain":"day","type":"value"},"end":5,
             "dim":"time","latent":false},{"body":"the 5th","start":9,
             "value":{"values":[{
             "value":"2018-12-05T00:00:00.000-08:00","grain":"day",
             "type":"value"},
             {"value":"2019-01-05T00:00:00.000-08:00","grain":"day",
             "type":"value"},
             {"value":"2019-02-05T00:00:00.000-08:00","grain":"day",
             "type":"value"}],
             "value":"2018-12-05T00:00:00.000-08:00","grain":"day",
             "type":"value"},"end":16,"dim":"time",
             "latent":false},{"body":"5th of May","start":13,"value":{
             "values":[{
             "value":"2019-05-05T00:00:00.000-07:00","grain":"day",
             "type":"value"},
             {"value":"2020-05-05T00:00:00.000-07:00","grain":"day",
             "type":"value"},
             {"value":"2021-05-05T00:00:00.000-07:00","grain":"day",
             "type":"value"}],
             "value":"2019-05-05T00:00:00.000-07:00","grain":"day",
             "type":"value"},"end":23,"dim":"time",
             "latent":false},{"body":"tomorrow","start":37,"value":{
             "values":[{
             "value":"2018-11-14T00:00:00.000-08:00","grain":"day",
             "type":"value"}],
             "value":"2018-11-14T00:00:00.000-08:00","grain":"day",
             "type":"value"},"end":45,"dim":"time",
             "latent":false}]"""
    )
    httpretty.enable()

    _config = RasaNLUModelConfig(
        {"pipeline": [{"name": "DucklingHTTPExtractor"}]}
    )
    _config.set_component_attr(0, dimensions=["time"], timezone="UTC",
                               url="http://localhost:8000")
    duckling = component_builder.create_component(_config.for_component(0),
                                                  _config)
    message = Message("Today is the 5th of May. Let us meet tomorrow.")
    duckling.process(message)
    entities = message.get("entities")
    assert len(entities) == 4

    # Test duckling with a defined date

    httpretty.register_uri(
        httpretty.POST,
        "http://localhost:8000/parse",
        body="""[{"body":"tomorrow","start":12,"value":{"values":[{
             "value":"2013-10-13T00:00:00.000Z","grain":"day",
             "type":"value"}],"value":"2013-10-13T00:00:00.000Z",
             "grain":"day","type":"value"},"end":20,
             "dim":"time","latent":false}]"""
    )

    # 1381536182 == 2013/10/12 02:03:02
    message = Message("Let us meet tomorrow.", time="1381536182")
    duckling.process(message)
    entities = message.get("entities")
    assert len(entities) == 1
    assert entities[0]["text"] == "tomorrow"
    assert entities[0]["value"] == "2013-10-13T00:00:00.000Z"

    # Test dimension filtering includes only specified dimensions
    _config = RasaNLUModelConfig(
        {"pipeline": [{"name": "DucklingHTTPExtractor"}]}
    )
    _config.set_component_attr(0, dimensions=["number"],
                               url="http://localhost:8000")
    ducklingNumber = component_builder.create_component(
        _config.for_component(0),
        _config)
    httpretty.register_uri(
        httpretty.POST,
        "http://localhost:8000/parse",
        body="""[{"body":"Yesterday","start":0,"value":{"values":[{
            "value":"2019-02-28T00:00:00.000+01:00","grain":"day",
            "type":"value"}],"value":"2019-02-28T00:00:00.000+01:00",
            "grain":"day","type":"value"},"end":9,"dim":"time"},
            {"body":"5","start":21,"value":{"value":5,"type":"value"},
            "end":22,"dim":"number"}]"""
    )

    message = Message("Yesterday there were 5 people in a room")
    ducklingNumber.process(message)
    entities = message.get("entities")
    assert len(entities) == 1
    assert entities[0]["text"] == "5"
    assert entities[0]["value"] == 5