示例#1
0
def _reader_factory(fformat: Text) -> Optional["TrainingDataReader"]:
    """Generates the appropriate reader class based on the file format."""
    from rasa.nlu.training_data.formats import (
        MarkdownReader,
        WitReader,
        LuisReader,
        RasaReader,
        DialogflowReader,
        NLGMarkdownReader,
    )

    reader = None
    if fformat == LUIS:
        reader = LuisReader()
    elif fformat == WIT:
        reader = WitReader()
    elif fformat in DIALOGFLOW_RELEVANT:
        reader = DialogflowReader()
    elif fformat == RASA:
        reader = RasaReader()
    elif fformat == MARKDOWN:
        reader = MarkdownReader()
    elif fformat == MARKDOWN_NLG:
        reader = NLGMarkdownReader()
    return reader
示例#2
0
def test_dump_trainable_entities(entity_extractor: Optional[Text],
                                 expected_output: Text):
    training_data_json = {
        "rasa_nlu_data": {
            "common_examples": [{
                "text":
                "test",
                "intent":
                "greet",
                "entities": [{
                    "start": 0,
                    "end": 4,
                    "value": "random",
                    "entity": "word"
                }],
            }]
        }
    }
    if entity_extractor is not None:
        training_data_json["rasa_nlu_data"]["common_examples"][0]["entities"][
            0]["extractor"] = entity_extractor

    training_data_object = RasaReader().read_from_json(training_data_json)
    md_dump = MarkdownWriter().dumps(training_data_object)
    assert md_dump.splitlines()[1] == expected_output
示例#3
0
def test_dump_entities(entity: Dict[Text, Any], expected_output: Text):
    training_data_json = {
        "rasa_nlu_data": {
            "common_examples": [
                {"text": "test", "intent": "greet", "entities": [entity]}
            ]
        }
    }
    training_data_object = RasaReader().read_from_json(training_data_json)
    md_dump = MarkdownWriter().dumps(training_data_object)
    assert md_dump.splitlines()[1] == expected_output
示例#4
0
async def test_multi_project_training(trained_async):
    example_directory = "data/test_multi_domain"
    config_file = os.path.join(example_directory, "config.yml")
    domain_file = os.path.join(example_directory, "domain.yml")
    files_of_root_project = os.path.join(example_directory, "data")

    trained_stack_model_path = await trained_async(
        config=config_file,
        domain=domain_file,
        training_files=files_of_root_project,
        force_training=True,
        persist_nlu_training_data=True,
    )

    unpacked = model.unpack_model(trained_stack_model_path)

    domain_file = os.path.join(
        unpacked, DEFAULT_CORE_SUBDIRECTORY_NAME, DEFAULT_DOMAIN_PATH
    )
    domain = Domain.load(domain_file)

    expected_intents = {
        "greet",
        "goodbye",
        "affirm",
        "deny",
        "mood_great",
        "mood_unhappy",
    }

    assert all([i in domain.intents for i in expected_intents])

    nlu_training_data_file = os.path.join(unpacked, "nlu", "training_data.json")
    nlu_training_data = RasaReader().read(nlu_training_data_file)

    assert expected_intents == nlu_training_data.intents

    expected_actions = [
        "utter_greet",
        "utter_cheer_up",
        "utter_did_that_help",
        "utter_happy",
        "utter_goodbye",
    ]

    assert all([a in domain.action_names for a in expected_actions])
示例#5
0
class AsyncTrainer():
    def __init__(self, interpreter_cache=InterpreterCache):
        self.interpreter_cache = interpreter_cache
        self.data_reader = RasaReader()
        self.lock = RLock()
        self.training_status = {}

    def train(self, nlu_data, model_name, config):
        thread = Thread(target=self._async_train, args=(config, nlu_data, model_name))
        thread.start()

    def status(self, model_name):
        with self.lock:
            return self.training_status.get(model_name, {"status": "UNKNOWN"})

    def _async_train(self, config, nlu_data, model_name):
        training_start = timer()
        with self.lock:
            self.training_status[model_name] = {
                "status": "TRAINING",
            }
        
        data = self.data_reader.read_from_json({'rasa_nlu_data': nlu_data})
        with self.interpreter_cache.lock:
            trainer = Trainer(RasaNLUModelConfig(config), self.interpreter_cache.component_builder)
        
        interpreter = trainer.train(data)
        tempdir = tempfile.mkdtemp()
        trainer.persist(tempdir, None, "nlu")
        
        _model_package = create_package_rasa(tempdir, os.path.join("models", model_name))

        self.interpreter_cache.store(model_name, interpreter)
        
        with self.lock:
            training_end = timer()
            self.training_status[model_name] = {
                "status": "READY",
                "training_time": f"{training_end - training_start:.2f}"
            }
示例#6
0
 def __init__(self, interpreter_cache=InterpreterCache):
     self.interpreter_cache = interpreter_cache
     self.data_reader = RasaReader()
     self.lock = RLock()
     self.training_status = {}