Пример #1
0
def train_nlu(data_path, configs, model_path):
    logging.basicConfig(filename=logfile, level=logging.DEBUG)
    training_data = load_data(data_path)
    trainer = Trainer(config.load(configs))
    trainer.train(training_data)
    model_directory = trainer.persist(model_path, fixed_model_name='nlu')
    run_evaluation(data_path, model_directory)
Пример #2
0
def train_nlu():
    training_data = load_data('./data/nlu.md')
    trainer = Trainer(config.load("config.yml"))
    trainer.train(training_data)
    model_directory = trainer.persist('./models/nlu/',
                                      fixed_model_name="current")
    return model_directory
Пример #3
0
def train_nlu(data, configs, model_dir):
    training_data = load_data(data)
    trainer = Trainer(config.load(configs))
    trainer.train(training_data)
    model_directory = trainer.persist(model_dir, fixed_model_name="nlu")
    logger.info(f"Model trained. Stored in '{model_directory}'.")
    return model_directory
Пример #4
0
def trained_nlu_model(request):
    cfg = RasaNLUModelConfig({"pipeline": "keyword"})
    trainer = Trainer(cfg)
    td = training_data.load_data(DEFAULT_DATA_PATH)

    trainer.train(td)

    model_path = trainer.persist(NLU_MODEL_PATH)

    nlu_data = data.get_nlu_directory(DEFAULT_DATA_PATH)
    output_path = os.path.join(NLU_MODEL_PATH, NLU_MODEL_NAME)
    new_fingerprint = model.model_fingerprint(NLU_DEFAULT_CONFIG_PATH,
                                              nlu_data=nlu_data)
    model.create_package_rasa(model_path, output_path, new_fingerprint)

    def fin():
        if os.path.exists(NLU_MODEL_PATH):
            shutil.rmtree(NLU_MODEL_PATH)

        if os.path.exists(output_path):
            shutil.rmtree(output_path)

    request.addfinalizer(fin)

    return output_path
Пример #5
0
    def train(cfg_name, project_name):
        from rasa.nlu import training_data

        cfg = config.load(cfg_name)
        trainer = Trainer(cfg, component_builder)
        training_data = training_data.load_data(data)

        trainer.train(training_data)
        trainer.persist("test_projects", project_name=project_name)
Пример #6
0
def train_nlu(lang="en", production_build=False):
    model_name = "production"
    if not production_build:
        model_name = "latest"

    training_data = load_data('./data/nlu/' + lang + "/")
    trainer = Trainer(config.load("config.yml"))
    trainer.train(training_data)
    trainer.persist('./models/nlu/' + lang + "/", fixed_model_name=model_name)
def train_model(td_file, config_file, model_dir):
    """trains a model using the training data and config
       creates model and returns the path to this model for evaluation"""
    td = load_data(td_file)
    trainer = Trainer(config.load(config_file))
    trainer.train(td)
    model_loc = trainer.persist(model_dir)

    return model_loc
Пример #8
0
def load_training_data(data_file="../data/testData.json",
                       config_file="../configs/config_spacy.yml"):
    training_data = load_data(data_file)
    trainer = Trainer(config.load(config_file))
    trainer.train(training_data)
    model_directory = trainer.persist('./projects/default/')

    # where model_directory points to the model folder
    return model_directory
Пример #9
0
def train_update(
    repository_version_language_id, by_user, repository_authorization, from_queue="celery"
):  # pragma: no cover

    update_request = backend().request_backend_start_training_nlu(
        repository_version_language_id, by_user, repository_authorization, from_queue
    )

    examples_list = get_examples_request(repository_version_language_id, repository_authorization)

    with PokeLogging() as pl:
        try:
            examples = []

            for example in examples_list:
                examples.append(
                    Message.build(
                        text=example.get("text"),
                        intent=example.get("intent"),
                        entities=example.get("entities"),
                    )
                )

            update_request["dataset_size"] = len(examples)

            pipeline_builder = PipelineBuilder(update_request)
            pipeline_builder.print_pipeline()
            rasa_nlu_config = pipeline_builder.get_nlu_model()

            trainer = Trainer(rasa_nlu_config, ComponentBuilder(use_cache=False))
            training_data = TrainingData(
                training_examples=examples, lookup_tables=None
            )

            trainer.train(training_data)

            persistor = BothubPersistor(
                repository_version_language_id, repository_authorization, rasa_version
            )
            trainer.persist(
                mkdtemp(),
                persistor=persistor,
                fixed_model_name=f"{update_request.get('repository_version')}_"
                f"{update_request.get('total_training_end') + 1}_"
                f"{update_request.get('language')}",
            )
        except Exception as e:
            logger.exception(e)
            backend().request_backend_trainfail_nlu(
                repository_version_language_id, repository_authorization
            )
            raise e
        finally:
            backend().request_backend_traininglog_nlu(
                repository_version_language_id, pl.getvalue(), repository_authorization
            )
Пример #10
0
def test_train_with_empty_data(language, pipeline, component_builder, tmpdir):
    _config = RasaNLUModelConfig({"pipeline": pipeline, "language": language})
    trainer = Trainer(_config, component_builder)
    trainer.train(TrainingData())
    persistor = create_persistor(_config)
    persisted_path = trainer.persist(tmpdir.strpath, persistor)
    loaded = Interpreter.load(persisted_path, component_builder)
    assert loaded.pipeline
    assert loaded.parse("hello") is not None
    assert loaded.parse("Hello today is Monday, again!") is not None
Пример #11
0
def trained_nlu_model():
    cfg = RasaNLUModelConfig({"pipeline": "keyword"})
    trainer = Trainer(cfg)
    td = training_data.load_data(DEFAULT_DATA_PATH)

    trainer.train(td)
    model_path = trainer.persist("test_models",
                                 project_name="test_model_keyword")

    return model_path
def train_eval_rasa_nlu_model(lang='en', cross=False, save=''):
    """ Train snips data from all brat annotation object 

    :param lang: abbreviate language name 
    :param save: path where model will be save
    :rtype: None
    """
    from rasa.nlu.training_data import load_data
    from rasa.nlu.model import Trainer
    from rasa.nlu.components import ComponentBuilder
    from rasa.nlu import config
    from rasa.nlu.test import run_evaluation

    config_file = source_config / "config_rasa_converrt.yml"

    if cross:
        filename_results = source_result / "rasa_cross_semeval_2020_model_task1_{}".format(save)

        train_data_obj = BuildSnipsDataTask1(lang, cross=cross, vers=save)
        train_data = train_data_obj.build_rasa_data_task1()

        training_data = load_data(str(train_data[0]))
        builder = ComponentBuilder(use_cache=True)  
        trainer = Trainer(config.load(str(config_file)), builder)
        
        print("--> Training patent data with Rasa...")
        trainer.train(training_data, num_threads=8, n_jobs=-1, verbose=True)
        
        print("--> Saving model trained with Rasa (Rasa)...")
        model_directory = trainer.persist(filename_results)
        
        print("--> Evaluating training data with Rasa metrics (Cross-validation)...")
        import os
        from datetime import datetime
        filename_test = str(train_data[1])
        print(filename_test)
        dmtime = "test_{}_{}".format(save, datetime.now().strftime("%Y%m%d-%H%M%S"))
        out_test = source_result / "rasa_cross_evaluation_task1" / dmtime
        model_directory = sorted(filename_results.glob("nlu_*"), key=os.path.getmtime)[-1] 
        run_evaluation(filename_test, str(model_directory), output_directory=str(out_test))

    else:
        filename_results = source_result / "rasa_semeval_2020_model_task1_{}".format(save)
        train_data_obj = BuildSnipsDataTask1(lang, cross=cross, vers=save)
        train_file = train_data_obj.build_rasa_data_task1()

        training_data = load_data(train_file)
        builder = ComponentBuilder(use_cache=True)  
        trainer = Trainer(config.load(str(config_file)), builder)
        
        print("--> Training patent data with Rasa...")
        trainer.train(training_data, num_threads=8, verbose=True, n_jobs=-1, fixed_model_name="nlu")
        
        print("--> Saving model trained with Rasa (Rasa)...")
        model_directory = trainer.persist(filename_results)
Пример #13
0
def test_train_model_without_data(language, pipeline, component_builder, tmpdir):
    _config = RasaNLUModelConfig({"pipeline": pipeline, "language": language})

    trainer = Trainer(_config, component_builder)
    trainer.train(TrainingData())
    persisted_path = trainer.persist(tmpdir.strpath)

    loaded = Interpreter.load(persisted_path, component_builder)

    assert loaded.pipeline
    assert loaded.parse("Rasa is great!") is not None
Пример #14
0
def train_nlu():
    from rasa.nlu.training_data import load_data
    from rasa.nlu import config
    from rasa.nlu.model import Trainer

    training_data = load_data('data/nlu.md')
    trainer = Trainer(config.load("config.yml"))
    trainer.train(training_data)
    model_directory = trainer.persist('models/nlu/',
                                      fixed_model_name="current")

    return model_directory
Пример #15
0
    async def train(self):
        """Train the engine.
        """
        nltk.download('punkt')
        lang = self.config['language']
        if not os.path.exists('data/' + self.config['skill-id']):
            _LOGGER.info("Starting Skill training.")
            _LOGGER.info("Generating stories.")
            data, domain_data, stories = await GenerateStories.run(
                self.config['skill-id'], self.config['language'], self.asm)
            training_data = TrainingData(training_examples=data)
            nlu_config = RasaNLUModelConfig({
                "language": lang,
                "pipeline": self.config['pipeline'],
                "data": None
            })

            trainer = Trainer(nlu_config, None, True)
            _LOGGER.info("Training Arcus NLU")
            trainer.train(training_data)
            trainer.persist("data/" + self.config['skill-id'], None, 'nlu')

            # Rasa core
            domain = Domain.from_dict(domain_data)

            reader = StoryFileReader(domain, RegexInterpreter(), None, False)
            story_steps = await reader.process_lines(stories)
            graph = StoryGraph(story_steps)

            g = TrainingDataGenerator(
                graph,
                domain,
                remove_duplicates=True,
                unique_last_num_states=None,
                augmentation_factor=20,
                tracker_limit=None,
                use_story_concatenation=True,
                debug_plots=False,
            )

            training_trackers = g.generate()
            policy_list = SimplePolicyEnsemble.from_dict(
                {"policies": self.config['policies']})
            policy_ensemble = SimplePolicyEnsemble(policy_list)

            _LOGGER.info("Training Arcus Core")
            policy_ensemble.train(training_trackers, domain)
            policy_ensemble.persist(
                "data/" + self.config['skill-id'] + "/core", False)
            domain.persist("data/" + self.config['skill-id'] + "/core/model")
            domain.persist_specification("data/" + self.config['skill-id'] +
                                         "/core")
Пример #16
0
def train_test(td_file, config_file, model_dir):
    # helper function to split into test and train and evaluate on results.

    td = load_data(td_file)
    trainer = Trainer(config.load(config_file))
    train, test = td.train_test_split(train_frac=0.6)
    trainer.train(train)
    model_loc = trainer.persist(model_dir)
    with open("data/tmp/temp_test.json", "w", encoding="utf8") as f:
        f.write(test.as_json())
    with open("data/temp_train.json", "w", encoding="utf8") as f:
        f.write(train.as_json())
    evaluate_model("data/tmp/temp_test.json", model_loc)
Пример #17
0
def call():
    from rasa.nlu.training_data import load_data
    from rasa.nlu import config
    from rasa.nlu.components import ComponentBuilder
    from rasa.nlu.model import Trainer

    builder = ComponentBuilder(use_cache=True)

    training_data = load_data('./data/weapon.md')
    trainer = Trainer(config.load("./config.yml"), builder)
    trainer.train(training_data)
    model_directory = trainer.persist('./models', fixed_model_name="model")
    print('done')
    return model_directory
Пример #18
0
def train():
    td = load_data("{}/demo_rasa.json".format(prj_dir))
    _config = RasaNLUModelConfig(load_json("{}/config.json".format(prj_dir)))
    trainer = Trainer(_config)
    trainer.train(td)
    persisted_path = trainer.persist("{}/models".format(prj_dir))
    loaded = Interpreter.load(persisted_path)
    assert loaded.pipeline

    # Inference
    result = loaded.parse("i'm looking for a place in the north of town")
    result = loaded.parse("show me chinese restaurants")
    result = dict(filter(lambda item: item[0] not in ["intent_ranking"], result.items()))
    show_dict(result)
def train_test(td_file, config_file, model_dir, key="company", noise=0.1):
    """trains a model using the training data
       (split into train-test) and config"""
    td = load_data(td_file)

    trainer = Trainer(config.load(config_file))
    train, test = td.train_test_split(train_frac=0.8)
    test = add_noise(test, key, noise=noise)

    trainer.train(train)
    tmp_fname = "data/tmp/temp_test.json"
    model_loc = trainer.persist(model_dir)
    with open(tmp_fname, "w", encoding="utf8") as f:
        f.write(test.as_json())
    evaluate_model(tmp_fname, model_loc)
Пример #20
0
def train(nlu_config: Union[Text, RasaNLUModelConfig],
          data: Text,
          path: Optional[Text] = None,
          project: Optional[Text] = None,
          fixed_model_name: Optional[Text] = None,
          storage: Optional[Text] = None,
          component_builder: Optional[ComponentBuilder] = None,
          training_data_endpoint: Optional[EndpointConfig] = None,
          **kwargs: Any) -> Tuple[Trainer, Interpreter, Text]:
    """Loads the trainer and the data and runs the training of the model."""

    if isinstance(nlu_config, str):
        nlu_config = config.load(nlu_config)

    # Ensure we are training a model that we can save in the end
    # WARN: there is still a race condition if a model with the same name is
    # trained in another subprocess
    trainer = Trainer(nlu_config, component_builder)
    persistor = create_persistor(storage)
    if training_data_endpoint is not None:
        training_data = load_data_from_endpoint(training_data_endpoint,
                                                nlu_config.language)
    else:
        training_data = load_data(data, nlu_config.language)
    interpreter = trainer.train(training_data, **kwargs)

    if path:
        persisted_path = trainer.persist(path, persistor, project,
                                         fixed_model_name)
    else:
        persisted_path = None

    return trainer, interpreter, persisted_path
Пример #21
0
    def _async_train(self, config, nlu_data, model_name):
        training_start = timer()
        with self.lock:
            self.training_status[model_name] = {
                "status": "TRAINING",
            }
        
        data = self.data_reader.read_from_json({'rasa_nlu_data': nlu_data})
        with self.interpreter_cache.lock:
            trainer = Trainer(RasaNLUModelConfig(config), self.interpreter_cache.component_builder)
        
        interpreter = trainer.train(data)
        tempdir = tempfile.mkdtemp()
        trainer.persist(tempdir, None, "nlu")
        
        _model_package = create_package_rasa(tempdir, os.path.join("models", model_name))

        self.interpreter_cache.store(model_name, interpreter)
        
        with self.lock:
            training_end = timer()
            self.training_status[model_name] = {
                "status": "READY",
                "training_time": f"{training_end - training_start:.2f}"
            }
Пример #22
0
def train_nlu(
    config_file="config.yml", model_path="models/nlu", training_data_file="data/nlu.md"
):
    from rasa.nlu.training_data import load_data
    from rasa.nlu import config
    from rasa.nlu.model import Trainer

    training_data = load_data(training_data_file)
    trainer = Trainer(config.load(config_file))
    trainer.train(training_data)

    # Attention: trainer.persist stores the model and all meta data into a folder.
    # The folder itself is not zipped.
    model_directory = trainer.persist(model_path)

    logger.info("Model trained. Stored in '{}'.".format(model_directory))

    return model_directory
Пример #23
0
def train_update(update, examples_data, label_examples_data, algorithm,
                 ner_spacy, similarity_type, language, connection):
    with PokeLogging() as pl:
        try:
            examples = []
            label_examples = []

            for example in examples_data:
                examples.append(
                    Message.build(
                        text=example.get("text"),
                        intent=example.get("intent"),
                        entities=example.get("entities"),
                    ))

            for label_example in label_examples_data:
                label_examples.append(
                    Message.build(
                        text=label_example.get("text"),
                        entities=label_example.get("entities"),
                    ))

            rasa_nlu_config = get_rasa_nlu_config_from_update(
                algorithm, ner_spacy, similarity_type, language)
            trainer = Trainer(rasa_nlu_config,
                              ComponentBuilder(use_cache=False))
            training_data = BothubTrainingData(
                label_training_examples=label_examples,
                training_examples=examples)

            trainer.train(training_data)

            persistor = BothubPersistor(update, connection)
            trainer.persist(
                mkdtemp(),
                persistor=persistor,
                fixed_model_name=str(update),
            )
        except Exception as e:
            logger.exception(e)

            raise e
        finally:
            pass
Пример #24
0
def train_data(domain, locale, prop):
    prop_ = prop
    format = prop.get('format')
    dataFile = os.path.join(scriptDir, '..', 'data', domain + '_' + locale + '.' + format)
    configFile = os.path.join(scriptDir, 'config', prop.get('config_file'))
    modelFile = os.path.join(scriptDir, '..', 'models', 'dnn')
    MODEL_NAME = domain + '_' + locale

    if format == 'md' or format == 'json':
        training_data = load_data(dataFile)
        trainer = Trainer(config.load(configFile))
        if not is_config_stale(domain, locale, prop):
            trainer.train(training_data)
            trainer.persist(modelFile, fixed_model_name=MODEL_NAME)
        else:
            logger.warning("no changes found to training data, using pre-trained model")
            return json.loads('{"response":"WARNING: no changes found to training data, using pre-trained model"}')
    else:
        logger.error("unsupported format. Exiting...")
        return json.loads('{"response":"ERROR: unsupported format. Exiting..."}')

    training_examples = OrderedDict()
    INTENT = 'intent'
    for example in [e.as_dict_nlu() for e in training_data.training_examples]:
        intent = example[INTENT]
        training_examples.setdefault(intent, [])
        training_examples[intent].append(example)
    count = 0
    for x in training_examples:
        if isinstance(training_examples[x], list):
            count += len(training_examples[x])
    logger.info(f'Identified domain: {domain}')
    logger.info(f'Identified locale: {locale}')
    logger.info(f'Number of utterances for training: {count}')
    logger.info(f'Number of intents for training: {len(training_examples)}')

    message = {}
    message['domain'] = domain
    message['locale'] = locale
    message['Number of utterances'] = str(count)
    message['Number of intents'] = str(len(training_examples))
    response ={}
    response['response'] = message
    return response
Пример #25
0
def test_nlu_interpreter():
    #training_data = load_data("data/chitchat_nlu.md")
    training_data = load_data("data")
    trainer = Trainer(config.load("config.yml"))
    interpreter = trainer.train(training_data)
    test_interpreter_dir = trainer.persist("./tests/models", project_name="nlu")
    parsing = interpreter.parse('hello')

    assert parsing['intent']['name'] == 'greet'
    assert test_interpreter_dir
def train_nlu(
    config_file="config_simbert.yml",
    training_data_file="raw_data.json",
    model_directory: Text = "/model",
    model_name: Text = "current",
):
    training_data = load_data(training_data_file)  # 基于load_data,加载训练数据
    trainer = Trainer(
        config.load(config_file))  # 基于config.load加载配置文件,并定义Trainer类
    trainer.train(training_data)  # 基于训练数据training_data对每个组件进行训练
    print('---------train done!------------')
    # Attention: trainer.persist stores the model and all meta data into a folder.
    # The folder itself is not zipped.
    model_path = os.path.join(model_directory, model_name)
    print('model_path:', model_path)
    model_directory = trainer.persist(model_path,
                                      fixed_model_name="nlu")  # 保存模型文件
    logger.info("Model trained. Stored in '{}'.".format(model_directory))
    return model_directory  # 返回模型保存的目录
Пример #27
0
def test_train_model_without_data():
    td = load_data(DEFAULT_DATA_PATH)
    language, pipeline = pipelines_for_tests()[1]
    _config = RasaNLUModelConfig({"pipeline": pipeline, "language": language})
    show_dict({"pipeline": pipeline, "language": language})
    exit()

    trainer = Trainer(_config)
    trainer.train(td)
    persisted_path = trainer.persist(model_dir)
    loaded = Interpreter.load(persisted_path)
    assert loaded.pipeline

    # Inference
    result = loaded.parse("i'm looking for a place in the north of town")
    result = loaded.parse("show me chinese restaurants")
    result = dict(
        filter(lambda item: item[0] not in ["intent_ranking"], result.items()))
    show_dict(result)
    def train_nlu(data, configuration, model_dir, train):
        '''
		
        input : 
        		data: training data, in json format
        		configuration: configuration file
        		model_dir: where to save model after training
        		train : flag, to check that we really want to train

        output: 
        		model_directory : where the output model will be saved
		'''
        rasamodel.Train = train
        assert rasamodel.Train == True
        training_data = load_data(data)
        trainer = Trainer(config.load(configuration))
        trainer.train(training_data)
        model_directory = trainer.persist(model_dir,
                                          fixed_model_name='Intentnlu')
        return model_directory
Пример #29
0
def train_nlu(config_file="config.yml",
              model_directory="models",
              model_name="current",
              training_data_file="data/nlu.md"):
    from rasa.nlu.training_data import load_data
    from rasa.nlu import config
    from rasa.nlu.model import Trainer

    training_data = load_data(training_data_file)
    trainer = Trainer(config.load(config_file))
    trainer.train(training_data)

    # Attention: trainer.persist stores the model and all meta data into a folder.
    # The folder itself is not zipped.
    model_path = os.path.join(model_directory, model_name)
    model_directory = trainer.persist(model_path, fixed_model_name="nlu")

    logger.info(f"Model trained. Stored in '{model_directory}'.")

    return model_directory
Пример #30
0
 def __init__(self):
     try:
         test = Interpreter.load("./models/nlu/current")
         self.interpreter = test
     except Exception:
         training_data = load_data("./data/nlu.md")
         trainer = Trainer(config.load("config.yml"))
         self.interpreter = trainer.train(training_data)
         model_directory = trainer.persist("./models/nlu",
                                           fixed_model_name="current")
     self.music_verbs = ['Riproduci', 'Suona', 'Fai partire', 'Avvia']