Пример #1
0
def test_invalid_pipeline_template():
    args = {"pipeline": "my_made_up_name"}
    f = write_file_config(args)

    with pytest.raises(config.InvalidConfigError) as execinfo:
        config.load(f.name)
    assert "unknown pipeline template" in str(execinfo.value)
Пример #2
0
def test_invalid_config_json():
    file_config = """pipeline: [pretrained_embeddings_spacy"""  # invalid yaml
    with tempfile.NamedTemporaryFile("w+", suffix="_tmp_config_file.json") as f:
        f.write(file_config)
        f.flush()
        with pytest.raises(config.InvalidConfigError):
            config.load(f.name)
Пример #3
0
def test_invalid_config_json(tmp_path):
    file_config = """pipeline: [pretrained_embeddings_spacy"""  # invalid yaml

    f = tmp_path / "tmp_config_file.json"
    f.write_text(file_config)

    with pytest.raises(config.InvalidConfigError):
        config.load(str(f))
def train_eval_rasa_nlu_model(lang='en', cross=False, save=''):
    """ Train snips data from all brat annotation object 

    :param lang: abbreviate language name 
    :param save: path where model will be save
    :rtype: None
    """
    from rasa.nlu.training_data import load_data
    from rasa.nlu.model import Trainer
    from rasa.nlu.components import ComponentBuilder
    from rasa.nlu import config
    from rasa.nlu.test import run_evaluation

    config_file = source_config / "config_rasa_converrt.yml"

    if cross:
        filename_results = source_result / "rasa_cross_semeval_2020_model_task1_{}".format(save)

        train_data_obj = BuildSnipsDataTask1(lang, cross=cross, vers=save)
        train_data = train_data_obj.build_rasa_data_task1()

        training_data = load_data(str(train_data[0]))
        builder = ComponentBuilder(use_cache=True)  
        trainer = Trainer(config.load(str(config_file)), builder)
        
        print("--> Training patent data with Rasa...")
        trainer.train(training_data, num_threads=8, n_jobs=-1, verbose=True)
        
        print("--> Saving model trained with Rasa (Rasa)...")
        model_directory = trainer.persist(filename_results)
        
        print("--> Evaluating training data with Rasa metrics (Cross-validation)...")
        import os
        from datetime import datetime
        filename_test = str(train_data[1])
        print(filename_test)
        dmtime = "test_{}_{}".format(save, datetime.now().strftime("%Y%m%d-%H%M%S"))
        out_test = source_result / "rasa_cross_evaluation_task1" / dmtime
        model_directory = sorted(filename_results.glob("nlu_*"), key=os.path.getmtime)[-1] 
        run_evaluation(filename_test, str(model_directory), output_directory=str(out_test))

    else:
        filename_results = source_result / "rasa_semeval_2020_model_task1_{}".format(save)
        train_data_obj = BuildSnipsDataTask1(lang, cross=cross, vers=save)
        train_file = train_data_obj.build_rasa_data_task1()

        training_data = load_data(train_file)
        builder = ComponentBuilder(use_cache=True)  
        trainer = Trainer(config.load(str(config_file)), builder)
        
        print("--> Training patent data with Rasa...")
        trainer.train(training_data, num_threads=8, verbose=True, n_jobs=-1, fixed_model_name="nlu")
        
        print("--> Saving model trained with Rasa (Rasa)...")
        model_directory = trainer.persist(filename_results)
Пример #5
0
def test_set_attr_on_component(default_config):
    cfg = config.load("sample_configs/config_pretrained_embeddings_spacy.yml")
    cfg.set_component_attr(6, C=324)

    assert cfg.for_component(1) == {"name": "SpacyTokenizer"}
    assert cfg.for_component(6) == {"name": "SklearnIntentClassifier",
                                    "C": 324}
Пример #6
0
def test_run_cv_evaluation():
    td = training_data.load_data("data/examples/rasa/demo-rasa.json")
    nlu_config = config.load(
        "sample_configs/config_pretrained_embeddings_spacy.yml")

    n_folds = 2
    intent_results, entity_results = cross_validate(td, n_folds, nlu_config)

    assert len(intent_results.train["Accuracy"]) == n_folds
    assert len(intent_results.train["Precision"]) == n_folds
    assert len(intent_results.train["F1-score"]) == n_folds
    assert len(intent_results.test["Accuracy"]) == n_folds
    assert len(intent_results.test["Precision"]) == n_folds
    assert len(intent_results.test["F1-score"]) == n_folds
    assert len(
        entity_results.train["CRFEntityExtractor"]["Accuracy"]) == n_folds
    assert len(
        entity_results.train["CRFEntityExtractor"]["Precision"]) == n_folds
    assert len(
        entity_results.train["CRFEntityExtractor"]["F1-score"]) == n_folds
    assert len(
        entity_results.test["CRFEntityExtractor"]["Accuracy"]) == n_folds
    assert len(
        entity_results.test["CRFEntityExtractor"]["Precision"]) == n_folds
    assert len(
        entity_results.test["CRFEntityExtractor"]["F1-score"]) == n_folds
Пример #7
0
def train(nlu_config: Union[Text, RasaNLUModelConfig],
          data: Text,
          path: Optional[Text] = None,
          project: Optional[Text] = None,
          fixed_model_name: Optional[Text] = None,
          storage: Optional[Text] = None,
          component_builder: Optional[ComponentBuilder] = None,
          training_data_endpoint: Optional[EndpointConfig] = None,
          **kwargs: Any) -> Tuple[Trainer, Interpreter, Text]:
    """Loads the trainer and the data and runs the training of the model."""

    if isinstance(nlu_config, str):
        nlu_config = config.load(nlu_config)

    # Ensure we are training a model that we can save in the end
    # WARN: there is still a race condition if a model with the same name is
    # trained in another subprocess
    trainer = Trainer(nlu_config, component_builder)
    persistor = create_persistor(storage)
    if training_data_endpoint is not None:
        training_data = load_data_from_endpoint(training_data_endpoint,
                                                nlu_config.language)
    else:
        training_data = load_data(data, nlu_config.language)
    interpreter = trainer.train(training_data, **kwargs)

    if path:
        persisted_path = trainer.persist(path, persistor, project,
                                         fixed_model_name)
    else:
        persisted_path = None

    return trainer, interpreter, persisted_path
Пример #8
0
def train_nlu(data, configs, model_dir):
    training_data = load_data(data)
    trainer = Trainer(config.load(configs))
    trainer.train(training_data)
    model_directory = trainer.persist(model_dir, fixed_model_name="nlu")
    logger.info(f"Model trained. Stored in '{model_directory}'.")
    return model_directory
Пример #9
0
def test_run_cv_evaluation_with_response_selector():
    training_data_obj = training_data.load_data("data/examples/rasa/demo-rasa.md")
    training_data_responses_obj = training_data.load_data(
        "data/examples/rasa/demo-rasa-responses.md"
    )
    training_data_obj = training_data_obj.merge(training_data_responses_obj)
    training_data_obj.fill_response_phrases()

    nlu_config = config.load(
        "sample_configs/config_embedding_intent_response_selector.yml"
    )

    n_folds = 2
    intent_results, entity_results, response_selection_results = cross_validate(
        training_data_obj, n_folds, nlu_config
    )

    assert len(intent_results.train["Accuracy"]) == n_folds
    assert len(intent_results.train["Precision"]) == n_folds
    assert len(intent_results.train["F1-score"]) == n_folds
    assert len(intent_results.test["Accuracy"]) == n_folds
    assert len(intent_results.test["Precision"]) == n_folds
    assert len(intent_results.test["F1-score"]) == n_folds
    assert len(response_selection_results.train["Accuracy"]) == n_folds
    assert len(response_selection_results.train["Precision"]) == n_folds
    assert len(response_selection_results.train["F1-score"]) == n_folds
    assert len(response_selection_results.test["Accuracy"]) == n_folds
    assert len(response_selection_results.test["Precision"]) == n_folds
    assert len(response_selection_results.test["F1-score"]) == n_folds
    # No entity extractor in pipeline
    assert len(entity_results.train) == 0
    assert len(entity_results.test) == 0
Пример #10
0
def test_train_docker_and_docs_configs(config_file: Text):
    content = io_utils.read_yaml_file(config_file)

    loaded_config = config.load(config_file)

    assert len(loaded_config.component_names) > 1
    assert loaded_config.language == content["language"]
Пример #11
0
def test_pipeline_looksup_registry():
    pipeline_template = list(registered_pipeline_templates)[0]
    args = {"pipeline": pipeline_template}
    f = write_file_config(args)
    final_config = config.load(f.name)
    components = [c.get("name") for c in final_config.pipeline]
    assert components == registered_pipeline_templates[pipeline_template]
Пример #12
0
def train_nlu(data_path, configs, model_path):
    logging.basicConfig(filename=logfile, level=logging.DEBUG)
    training_data = load_data(data_path)
    trainer = Trainer(config.load(configs))
    trainer.train(training_data)
    model_directory = trainer.persist(model_path, fixed_model_name='nlu')
    run_evaluation(data_path, model_directory)
Пример #13
0
def train_nlu():
    training_data = load_data('./data/nlu.md')
    trainer = Trainer(config.load("config.yml"))
    trainer.train(training_data)
    model_directory = trainer.persist('./models/nlu/',
                                      fixed_model_name="current")
    return model_directory
Пример #14
0
def test_pipeline_registry_lookup(pipeline_template: Text):
    args = {"pipeline": pipeline_template}
    f = write_file_config(args)
    final_config = config.load(f.name)
    components = [c for c in final_config.pipeline]

    assert json.dumps(components, sort_keys=True) == json.dumps(
        registered_pipeline_templates[pipeline_template], sort_keys=True)
Пример #15
0
def load_training_data(data_file="../data/testData.json",
                       config_file="../configs/config_spacy.yml"):
    training_data = load_data(data_file)
    trainer = Trainer(config.load(config_file))
    trainer.train(training_data)
    model_directory = trainer.persist('./projects/default/')

    # where model_directory points to the model folder
    return model_directory
Пример #16
0
def train_nlu(lang="en", production_build=False):
    model_name = "production"
    if not production_build:
        model_name = "latest"

    training_data = load_data('./data/nlu/' + lang + "/")
    trainer = Trainer(config.load("config.yml"))
    trainer.train(training_data)
    trainer.persist('./models/nlu/' + lang + "/", fixed_model_name=model_name)
Пример #17
0
def test_train_featurizer():
    (trained, _, _) = train.do_train(
        config.load('sample_configs/sample_use_featurizer.yml'),
        data='data/examples/dialogflow',
        path='models',
        project='current',
        fixed_model_name='use-featurizer')

    assert trained.pipeline
Пример #18
0
    def train(cfg_name, project_name):
        from rasa.nlu import training_data

        cfg = config.load(cfg_name)
        trainer = Trainer(cfg, component_builder)
        training_data = training_data.load_data(data)

        trainer.train(training_data)
        trainer.persist("test_projects", project_name=project_name)
def train_model(td_file, config_file, model_dir):
    """trains a model using the training data and config
       creates model and returns the path to this model for evaluation"""
    td = load_data(td_file)
    trainer = Trainer(config.load(config_file))
    trainer.train(td)
    model_loc = trainer.persist(model_dir)

    return model_loc
Пример #20
0
def test_nlu_interpreter():
    #training_data = load_data("data/chitchat_nlu.md")
    training_data = load_data("data")
    trainer = Trainer(config.load("config.yml"))
    interpreter = trainer.train(training_data)
    test_interpreter_dir = trainer.persist("./tests/models", project_name="nlu")
    parsing = interpreter.parse('hello')

    assert parsing['intent']['name'] == 'greet'
    assert test_interpreter_dir
Пример #21
0
async def test_train_docker_and_docs_configs(config_file: Text,
                                             monkeypatch: MonkeyPatch):
    monkeypatch.setattr(autoconfig, "_dump_config", Mock())
    importer = RasaFileImporter(config_file=config_file)
    imported_config = await importer.get_config()

    loaded_config = config.load(imported_config)

    assert len(loaded_config.component_names) > 1
    assert loaded_config.language == imported_config["language"]
Пример #22
0
def load_entity_extractor(data_file, config_file):
    training_data = load_data(data_file)
    configuration = config.load(config_file)
    comp_builder = components.ComponentBuilder()
    #component = comp_builder.create_component("ner_crf",configuration)
    #ee = EntityExtractor(components.Component(configuration))
    crf = CRFEntityExtractor()
    crf.train(training_data, configuration)
    model_directory = crf.persist('./models/default/')
    return model_directory
Пример #23
0
def _train_nlu_with_validated_data(
    config: Dict[Text, Text],
    nlu_data_directory: Text,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    retrain_nlu: Union[bool, List[Text]] = True
) -> Optional[Text]:
    """Train NLU with validated training and config data."""

    import rasa.nlu.train
    import re

    with ExitStack() as stack:
        models = {}
        from rasa.nlu import config as cfg_loader

        if train_path:
            # If the train path was provided, do nothing on exit.
            _train_path = train_path
        else:
            # Otherwise, create a temp train path and clean it up on exit.
            _train_path = stack.enter_context(TempDirectoryPath(tempfile.mkdtemp()))

        pattern = r'(\w\w)*(?=\.)'
        for file in os.listdir(nlu_data_directory):
            lang = re.search(pattern, file).groups()[0]
            if isinstance(retrain_nlu, bool) and retrain_nlu or lang in retrain_nlu:
                nlu_file_path = os.path.join(nlu_data_directory, file)
                print_color("Start training {} NLU model ...".format(lang), color=bcolors.OKBLUE)
                nlu_config = cfg_loader.load(config[lang])
                nlu_config.language = lang
                _, models[lang], _ = rasa.nlu.train(
                    nlu_config, nlu_file_path, _train_path, fixed_model_name="nlu-{}".format(lang)
                )
            else:
                print_color("{} NLU data didn't change, skipping training...".format(lang), color=bcolors.OKBLUE)

        print_color("NLU model training completed.", color=bcolors.OKBLUE)

        if train_path is None:
            # Only NLU was trained
            new_fingerprint = model.model_fingerprint(
                config, nlu_data=nlu_data_directory
            )

            return _package_model(
                new_fingerprint=new_fingerprint,
                output_path=output,
                train_path=_train_path,
                fixed_model_name=fixed_model_name,
                model_prefix="nlu-",
            )

        return _train_path
Пример #24
0
    def comps(self):
        """
        $ python -m saai.saai_cli comps
        $ python -m saai comps

        :return:
        """
        from rasa.nlu import config
        conf = config.load('saai/sample_configs/config_tokenizer.yml')
        # conf.for_component('DucklingHTTPExtractor')
        return conf.component_names
Пример #25
0
 def __init__(self):
     try:
         test = Interpreter.load("./models/nlu/current")
         self.interpreter = test
     except Exception:
         training_data = load_data("./data/nlu.md")
         trainer = Trainer(config.load("config.yml"))
         self.interpreter = trainer.train(training_data)
         model_directory = trainer.persist("./models/nlu",
                                           fixed_model_name="current")
     self.music_verbs = ['Riproduci', 'Suona', 'Fai partire', 'Avvia']
Пример #26
0
def train_nlu():
    from rasa.nlu.training_data import load_data
    from rasa.nlu import config
    from rasa.nlu.model import Trainer

    training_data = load_data('data/nlu.md')
    trainer = Trainer(config.load("config.yml"))
    trainer.train(training_data)
    model_directory = trainer.persist('models/nlu/',
                                      fixed_model_name="current")

    return model_directory
Пример #27
0
 def train(self):
     # loading the nlu training samples
     training_data = load_data(self.data)
     # trainer to educate our pipeline
     trainer = Trainer(config.load(self.pipeline))
     # train the model
     self.interpreter = trainer.train(training_data)
     # store it for future use
     self.model_directory = trainer.persist(
         "opennlu/data/model/rasa",
         fixed_model_name=self.name,
         persist_nlu_training_data=training_data)
Пример #28
0
def test_override_defaults_supervised_embeddings_pipeline():
    cfg = config.load("data/test/config_embedding_test.yml")
    builder = ComponentBuilder()

    component1_cfg = cfg.for_component(0)

    component1 = builder.create_component(component1_cfg, cfg)
    assert component1.max_ngram == 3

    component2_cfg = cfg.for_component(1)
    component2 = builder.create_component(component2_cfg, cfg)
    assert component2.epochs == 10
Пример #29
0
def test_validate_required_components_from_data(
        config_path: Text, data_path: Text,
        expected_warning_excerpts: List[Text]):
    loaded_config = config.load(config_path)
    trainer = Trainer(loaded_config)
    training_data = load_data(data_path)
    with pytest.warns(UserWarning) as record:
        components.validate_required_components_from_data(
            trainer.pipeline, training_data)
    assert len(record) == 1
    assert all([excerpt in record[0].message.args[0]]
               for excerpt in expected_warning_excerpts)
Пример #30
0
def train_test(td_file, config_file, model_dir):
    # helper function to split into test and train and evaluate on results.

    td = load_data(td_file)
    trainer = Trainer(config.load(config_file))
    train, test = td.train_test_split(train_frac=0.6)
    trainer.train(train)
    model_loc = trainer.persist(model_dir)
    with open("data/tmp/temp_test.json", "w", encoding="utf8") as f:
        f.write(test.as_json())
    with open("data/temp_train.json", "w", encoding="utf8") as f:
        f.write(train.as_json())
    evaluate_model("data/tmp/temp_test.json", model_loc)