def compare_nlu_models( configs: List[Text], nlu: Text, output: Text, runs: int, exclusion_percentages: List[int], ): """Trains multiple models, compares them and saves the results.""" from rasa.nlu.test import drop_intents_below_freq from rasa.nlu.training_data import load_data from rasa.nlu.utils import write_json_to_file from rasa.utils.io import create_path from rasa.nlu.test import compare_nlu from rasa.core.test import plot_nlu_results data = load_data(nlu) data = drop_intents_below_freq(data, cutoff=5) create_path(output) bases = [os.path.basename(nlu_config) for nlu_config in configs] model_names = [os.path.splitext(base)[0] for base in bases] f1_score_results = { model_name: [[] for _ in range(runs)] for model_name in model_names } training_examples_per_run = compare_nlu( configs, data, exclusion_percentages, f1_score_results, model_names, output, runs, ) f1_path = os.path.join(output, RESULTS_FILE) write_json_to_file(f1_path, f1_score_results) plot_nlu_results(output, training_examples_per_run)
def test_entities_synonyms(tmp_path): data = """ { "rasa_nlu_data": { "entity_synonyms": [ { "value": "nyc", "synonyms": ["New York City", "nyc", "the big apple"] } ], "common_examples" : [ { "text": "show me flights to New York City", "intent": "unk", "entities": [ { "entity": "destination", "start": 19, "end": 32, "value": "NYC" } ] }, { "text": "show me flights to nyc", "intent": "unk", "entities": [ { "entity": "destination", "start": 19, "end": 22, "value": "nyc" } ] } ] } }""" f = tmp_path / "tmp_training_data.json" f.write_text(data, io_utils.DEFAULT_ENCODING) td = training_data.load_data(str(f)) assert td.entity_synonyms["New York City"] == "nyc"
def test_run_cv_evaluation(pretrained_embeddings_spacy_config): td = training_data.load_data("data/examples/rasa/demo-rasa.json") n_folds = 2 intent_results, entity_results, response_selection_results = cross_validate( td, n_folds, pretrained_embeddings_spacy_config ) assert len(intent_results.train["Accuracy"]) == n_folds assert len(intent_results.train["Precision"]) == n_folds assert len(intent_results.train["F1-score"]) == n_folds assert len(intent_results.test["Accuracy"]) == n_folds assert len(intent_results.test["Precision"]) == n_folds assert len(intent_results.test["F1-score"]) == n_folds assert len(entity_results.train["CRFEntityExtractor"]["Accuracy"]) == n_folds assert len(entity_results.train["CRFEntityExtractor"]["Precision"]) == n_folds assert len(entity_results.train["CRFEntityExtractor"]["F1-score"]) == n_folds assert len(entity_results.test["CRFEntityExtractor"]["Accuracy"]) == n_folds assert len(entity_results.test["CRFEntityExtractor"]["Precision"]) == n_folds assert len(entity_results.test["CRFEntityExtractor"]["F1-score"]) == n_folds
def train_nlu(config_file="config.yml", model_directory="models", model_name="current", training_data_file="data/nlu.md"): from rasa.nlu.training_data import load_data from rasa.nlu import config from rasa.nlu.model import Trainer training_data = load_data(training_data_file) trainer = Trainer(config.load(config_file)) trainer.train(training_data) # Attention: trainer.persist stores the model and all meta data into a folder. # The folder itself is not zipped. model_path = os.path.join(model_directory, model_name) model_directory = trainer.persist(model_path, fixed_model_name="nlu") logger.info(f"Model trained. Stored in '{model_directory}'.") return model_directory
def test_spacy_featurizer_casing(spacy_nlp): from rasa.nlu.featurizers import spacy_featurizer # if this starts failing for the default model, we should think about # removing the lower casing the spacy nlp component does when it # retrieves vectors. For compressed spacy models (e.g. models # ending in _sm) this test will most likely fail. td = training_data.load_data("data/examples/rasa/demo-rasa.json") for e in td.intent_examples: doc = spacy_nlp(e.text) doc_capitalized = spacy_nlp(e.text.capitalize()) vecs = spacy_featurizer.features_for_doc(doc) vecs_capitalized = spacy_featurizer.features_for_doc(doc_capitalized) assert np.allclose( vecs, vecs_capitalized, atol=1e-5), "Vectors are unequal for texts '{}' and '{}'".format( e.text, e.text.capitalize())
def test_demo_data(filename): td = training_data.load_data(filename) assert td.intents == {"affirm", "greet", "restaurant_search", "goodbye"} assert td.entities == {"location", "cuisine"} assert len(td.training_examples) == 42 assert len(td.intent_examples) == 42 assert len(td.entity_examples) == 11 assert td.entity_synonyms == { "Chines": "chinese", "Chinese": "chinese", "chines": "chinese", "vegg": "vegetarian", "veggie": "vegetarian", } assert td.regex_features == [ {"name": "greet", "pattern": r"hey[^\s]*"}, {"name": "zipcode", "pattern": r"[0-9]{5}"}, ]
def train_nlu(data, configuration, model_dir, train): ''' input : data: training data, in json format configuration: configuration file model_dir: where to save model after training train : flag, to check that we really want to train output: model_directory : where the output model will be saved ''' rasamodel.Train = train assert rasamodel.Train == True training_data = load_data(data) trainer = Trainer(config.load(configuration)) trainer.train(training_data) model_directory = trainer.persist(model_dir, fixed_model_name='Intentnlu') return model_directory
def test_interpreter(pipeline_template, component_builder, tmpdir): test_data = "data/examples/rasa/demo-rasa.json" _conf = utilities.base_test_conf(pipeline_template) _conf["data"] = test_data td = training_data.load_data(test_data) interpreter = utilities.interpreter_for( component_builder, "data/examples/rasa/demo-rasa.json", tmpdir.strpath, _conf ) texts = ["good bye", "i am looking for an indian spot"] for text in texts: result = interpreter.parse(text, time=None) assert result["text"] == text assert not result["intent"]["name"] or result["intent"]["name"] in td.intents assert result["intent"]["confidence"] >= 0 # Ensure the model doesn't detect entity types that are not present # Models on our test data set are not stable enough to # require the exact entities to be found for entity in result["entities"]: assert entity["entity"] in td.entities
def test_convert_featurizer_output_shape(): from rasa.nlu.featurizers.convert_featurizer import ConveRTFeaturizer td = training_data.load_data("data/examples/rasa/demo-rasa.json") convert_featurizer = ConveRTFeaturizer() convert_featurizer.train(td, config=None) text_features_dim = np.array([ example.get("text_features").shape[0] for example in td.intent_examples if example.get("text_features") is not None ]) response_features_dim = np.array([ example.get("response_features").shape[0] for example in td.intent_examples if example.get("response_features") is not None ]) assert np.all(text_features_dim == 1024) assert np.all(response_features_dim == 1024)
def test_composite_entities_data(): td = training_data.load_data("data/test/demo-rasa-composite-entities.md") assert not td.is_empty() assert len(td.entity_examples) == 11 assert len(td.intent_examples) == 45 assert len(td.training_examples) == 45 assert td.entity_synonyms == {"SF": "San Fransisco"} assert td.intents == { "order_pizza", "book_flight", "chitchat", "greet", "goodbye", "affirm", } assert td.entities == {"location", "topping", "size"} assert td.entity_groups == {"1", "2"} assert td.entity_roles == {"to", "from"} assert td.number_of_examples_per_entity["entity 'location'"] == 8 assert td.number_of_examples_per_entity["group '1'"] == 9 assert td.number_of_examples_per_entity["role 'from'"] == 3
def main(): parser = create_argument_parser() cmdline_args = parser.parse_args() utils.configure_colored_logging(cmdline_args.loglevel) if cmdline_args.mode == "crossvalidation": # TODO: move parsing into sub parser # manual check argument dependency if cmdline_args.model is not None: parser.error("Crossvalidation will train a new model " "- do not specify external model.") if cmdline_args.config is None: parser.error("Crossvalidation will train a new model " "you need to specify a model configuration.") nlu_config = config.load(cmdline_args.config) data = training_data.load_data(cmdline_args.data) data = drop_intents_below_freq(data, cutoff=5) results, entity_results = cross_validate(data, int(cmdline_args.folds), nlu_config) logger.info("CV evaluation (n={})".format(cmdline_args.folds)) if any(results): logger.info("Intent evaluation results") return_results(results.train, "train") return_results(results.test, "test") if any(entity_results): logger.info("Entity evaluation results") return_entity_results(entity_results.train, "train") return_entity_results(entity_results.test, "test") elif cmdline_args.mode == "evaluation": run_evaluation(cmdline_args.data, cmdline_args.model, cmdline_args.report, cmdline_args.successes, cmdline_args.errors, cmdline_args.confmat, cmdline_args.histogram) logger.info("Finished evaluation")
async def train( nlu_config: Union[Text, Dict, RasaNLUModelConfig], data: Union[Text, "TrainingDataImporter"], path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, storage: Optional[Text] = None, component_builder: Optional[ComponentBuilder] = None, training_data_endpoint: Optional[EndpointConfig] = None, persist_nlu_training_data: bool = False, **kwargs: Any, ) -> Tuple[Trainer, Interpreter, Optional[Text]]: """Loads the trainer and the data and runs the training of the model.""" from rasa.importers.importer import TrainingDataImporter if not isinstance(nlu_config, RasaNLUModelConfig): nlu_config = config.load(nlu_config) # Ensure we are training a model that we can save in the end # WARN: there is still a race condition if a model with the same name is # trained in another subprocess trainer = Trainer(nlu_config, component_builder) persistor = create_persistor(storage) if training_data_endpoint is not None: training_data = await load_data_from_endpoint(training_data_endpoint, nlu_config.language) elif isinstance(data, TrainingDataImporter): training_data = await data.get_nlu_data(nlu_config.data) else: training_data = load_data(data, nlu_config.language) training_data.print_stats() interpreter = trainer.train(training_data, **kwargs) if path: persisted_path = trainer.persist(path, persistor, fixed_model_name, persist_nlu_training_data) else: persisted_path = None return trainer, interpreter, persisted_path
def zipped_nlu_model(): spacy_config_path = "sample_configs/config_pretrained_embeddings_spacy.yml" cfg = config.load(spacy_config_path) trainer = Trainer(cfg) td = training_data.load_data(DEFAULT_DATA_PATH) trainer.train(td) trainer.persist("test_models", project_name="test_model_pretrained_embeddings") model_dir_list = os.listdir(TEST_MODEL_PATH) # directory name of latest model model_dir = sorted(model_dir_list)[-1] # path of that directory model_path = os.path.join(TEST_MODEL_PATH, model_dir) zip_path = zip_folder(model_path) return zip_path
async def visualize( config_path: Text, domain_path: Text, stories_path: Text, nlu_data_path: Text, output_path: Text, max_history: int, ): from rasa.core.agent import Agent from rasa.core import config policies = config.load(config_path) agent = Agent(domain_path, policies=policies) # this is optional, only needed if the `/greet` type of # messages in the stories should be replaced with actual # messages (e.g. `hello`) if nlu_data_path is not None: from rasa.nlu.training_data import load_data nlu_data_path = load_data(nlu_data_path) else: nlu_data_path = None logger.info("Starting to visualize stories...") await agent.visualize(stories_path, output_path, max_history, nlu_training_data=nlu_data_path) full_output_path = "file://{}".format(os.path.abspath(output_path)) logger.info( "Finished graph creation. Saved into {}".format(full_output_path)) import webbrowser webbrowser.open(full_output_path)
def test_dialogflow_data(): td = training_data.load_data("data/examples/dialogflow/") assert len(td.entity_examples) == 5 assert len(td.intent_examples) == 24 assert len(td.training_examples) == 24 assert len(td.lookup_tables) == 2 assert td.intents == {"affirm", "goodbye", "hi", "inform"} assert td.entities == {"cuisine", "location"} non_trivial_synonyms = {k: v for k, v in td.entity_synonyms.items() if k != v} assert non_trivial_synonyms == { "mexico": "mexican", "china": "chinese", "india": "indian", } # The order changes based on different computers hence the grouping assert {td.lookup_tables[0]["name"], td.lookup_tables[1]["name"]} == { "location", "cuisine", } assert { len(td.lookup_tables[0]["elements"]), len(td.lookup_tables[1]["elements"]), } == {4, 6}
def test_train_model_without_data(): td = load_data(DEFAULT_DATA_PATH) # language, pipeline = pipelines_for_tests()[1] # show_dict(pipeline) # exit() language = "en" pipeline = load_json( "{}/test_case/test_pipelines/config_pipeline.json".format(prj_dir)) # exit() _config = RasaNLUModelConfig({"pipeline": pipeline, "language": language}) trainer = Trainer(_config) trainer.train(td) persisted_path = trainer.persist(model_dir) loaded = Interpreter.load(persisted_path) assert loaded.pipeline # Inference # result = loaded.parse("i'm looking for a place in the north of town") result = loaded.parse("show me chinese restaurants") result = dict( filter(lambda item: item[0] not in ["intent_ranking"], result.items())) show_dict(result)
def train(nlu_config: Union[Text, RasaNLUModelConfig], data: Text, path: Optional[Text] = None, project: Optional[Text] = None, fixed_model_name: Optional[Text] = None, storage: Optional[Text] = None, component_builder: Optional[ComponentBuilder] = None, training_data_endpoint: Optional[EndpointConfig] = None, **kwargs: Any ) -> Tuple[Trainer, Interpreter, Text]: """Loads the trainer and the data and runs the training of the model.""" if isinstance(nlu_config, str): nlu_config = config.load(nlu_config) # Ensure we are training a model that we can save in the end # WARN: there is still a race condition if a model with the same name is # trained in another subprocess trainer = Trainer(nlu_config, component_builder) persistor = create_persistor(storage) if training_data_endpoint is not None: training_data = load_data_from_endpoint(training_data_endpoint, nlu_config.language) else: training_data = load_data(data, nlu_config.language) interpreter = trainer.train(training_data, **kwargs) if path: persisted_path = trainer.persist(path, persistor, project, fixed_model_name) else: persisted_path = None return trainer, interpreter, persisted_path
def get_nlu_stats() -> NoReturn: """Creates temporary file with NLU stats. Creates a temporary file with all intents and entities from the NLU data, `./data/nlu.md` file. This values can be used for updating intents and entities in the domain file. Note: It is recommended to use this function for checking new intents & entities. """ from tempfile import TemporaryFile from rasa.nlu.training_data import load_data try: make_dir(ai_dir['temp']) # Loads NLU data from `./data/nlu.md` file. nlu_data = load_data(str(ai_file['nlu'])) # Creates set of present intents and entities in the NLU data. intents = [nlu_data.intents][0] entities = [nlu_data.entities][0] # Creates a temporary file in `./temp/` directory. The created # temporary file does not auto delete. named_temp_file = TemporaryFile(dir=ai_dir['temp'], delete=False) # Creates a list of all the intents and entities using the sets. with open(named_temp_file.name, 'w', encoding=_ENCODING) as temp_file: temp_file.write('Intents:\n') for index in intents: temp_file.write(index + '\n') temp_file.write('\nEntities:\n') for index in entities: temp_file.write(index + '\n') show(f'Done. Results are stored in {named_temp_file.name} file.') except Exception as error: print('An error occured while performing this operation because of' f' {error} in function "{stack()[0][3]}" on line' f' {exc_info()[-1].tb_lineno}.')
# import rasa from rasa.nlu.training_data import load_data from rasa.nlu.config import RasaNLUModelConfig from rasa.nlu.model import Trainer from rasa.nlu import config from rasa.nlu.model import Metadata, Interpreter # Import speech_recognition import speech_recognition as sr # This will load the nlu data in the md file, train a model and save it as the current model # loading the nlu training samples training_data = load_data("./data/nlu.md") # trainer to educate our pipeline trainer = Trainer(config.load("config.yml")) # train the model! interpreter = trainer.train(training_data) # store it for future use model_directory = trainer.persist("./models", fixed_model_name="current") # Use this line when you already trained a model # interpreter = Interpreter.load('./models/current') # small helper to make dict dumps a bit prettier def pprint(o): print(json.dumps(o, indent=2)) r = sr.Recognizer()
def train_nlu(data, configs, model_dir): training_data = load_data(data) #load NLU training sample trainer = Trainer(config.load(configs)) #train the pipeline first interpreter = trainer.train(training_data) #train the model model_directory = trainer.persist("models/nlu", fixed_model_name = "chatter") #store in directory
def CV_eval(td_file, config_file, Nfolds=10): # trains a model with crossvalidation using the training data and config td = load_data(td_file) configuration = config.load(config_file) cross_validate(td, Nfolds, configuration)
def train_nlu(data, configuration, model_dir): training_data = load_data(data) trainer = Trainer(config.load(configuration)) trainer.train(training_data) model_directory = trainer.persist(model_dir, fixed_model_name='whethernlu')
def training_data(): return load_data(DEFAULT_DATA_PATH)
def test_section_value_with_delimiter(): td_section_with_delimiter = training_data.load_data( "data/test/markdown_single_sections/section_with_delimiter.md") assert td_section_with_delimiter.entity_synonyms == {"10:00 am": "10:00"}
def edit_tf_pt(): if request.method == 'POST': if 'create' in request.form: #create new data folder folder_name = request.form['new_name'] folder_path = os.path.join(app.config['UPLOAD_FOLDER'],secure_filename(folder_name)) label_path = os.path.join(folder_path,'label') text_path = os.path.join(folder_path,'seq.in') tags_path = os.path.join(folder_path,'seq.out') if not os.path.exists(folder_path): #create new folder & files if dont exist os.makedirs(folder_path) os.mknod(label_path) os.mknod(text_path) os.mknod(tags_path) else: #create files in folder if dont exist if not os.path.exists(label_path): os.mknod(label_path) if not os.path.exists(text_path): os.mknod(text_path) if not os.path.exists(tags_path): os.mknod(tags_path) return redirect(url_for('content_tf_pt',path=folder_path)) elif 'open' in request.form: #edit existing data folder #download multiple files from the folder list_folder = request.files.getlist('folder') #list() #check if folder contains correct files file_check = {'label':0, 'seq.in':0, 'seq.out':0} for file in list_folder: if os.path.basename(file.filename) in file_check: file_check[os.path.basename(file.filename)] = file_check[os.path.basename(file.filename)] + 1 if 0 in file_check.values(): #check if filenames meet requirement fail = True fail_message = 'Files uploaded do not match filename requirements. Please check if your label, text sequence and BIO-tag sequence files are named as label, seq.in and seq.out respectively for system to recognise.' return redirect(url_for('edit_tf_pt',fail=fail,fail_message=fail_message)) elif not all([False for value in file_check.values() if value>1]): #invalid data folder: contains more than one of each label,seq.in,seq.out files fail = True fail_message = 'Invalid folder selected! Folder contains more than required number of files (3). Please select the direct parent data folder with only one instance of label, seq.in and seq.out file.' return redirect(url_for('edit_tf_pt',fail=fail,fail_message=fail_message)) else: #success for file in list_folder: file.save(os.path.join(app.config['UPLOAD_FOLDER'],file.filename)) #save files into folder folder_path = os.path.join(app.config['UPLOAD_FOLDER'],os.path.dirname(list_folder[0].filename)) return redirect(url_for('content_tf_pt',path=folder_path)) elif 'convert_rasa' in request.form: #convert rasa data file to tf/pt format from rasa.nlu import training_data, load_data from rasa.nlu.tokenizers.whitespace_tokenizer import WhitespaceTokenizer curr = request.files['convert_rasa_file'] curr.save(os.path.join(app.config['UPLOAD_FOLDER'],secure_filename(curr.filename))) file = os.path.join(app.config['UPLOAD_FOLDER'],secure_filename(curr.filename)) td = training_data.load_data(file) formatted_examples = [ example.as_dict_nlu() for example in td.training_examples ] labels = [ex['intent'] for ex in formatted_examples] #Tokenize and clean text white_space_tokenizer = WhitespaceTokenizer() sentences = list() BIO_tagging = list() types = dict() for ex in formatted_examples: #Tokenize by white space white_space_tokens = white_space_tokenizer.tokenize(ex['text']) tokens = [token.text for token in white_space_tokens] #Form into input sentence sentence = ' '.join(tokens) sentences.append(sentence) #seq.in #Perform entity tagging if 'entities' in ex: #entity exists ent_values = [entity['value'] for entity in ex['entities']] #entity value ent_length = [len(value.split()) for value in ent_values] #length of entity word ent_types = [entity['entity'] for entity in ex['entities']] #entity type #form BI tags for idx, typ in enumerate(ent_types): ent_types[idx] = 'B-' + typ + ''.join([' I-' + typ]*(ent_length[idx] - 1)) types['B-' + typ] = True types['I-' + typ] = True #replace sentence with BI sentence = sentence.replace(ent_values[idx].strip(),ent_types[idx].strip()) #and, remove leading and trailing spaces tag_seq = sentence.split() for idx, token in enumerate(tag_seq): #replace sentence with O if token not in types: tag_seq[idx] = 'O' #no entity else: tag_seq = ['O' for t in tokens] tags = ' '.join(tag_seq) BIO_tagging.append(tags) file_chunk = { 'folder_name':os.path.splitext(os.path.basename(file))[0], 'label_name':'label', 'text_name':'seq.in', 'tags_name':'seq.out', 'label_content':'\n'.join([str(i) for i in labels]) + '\n', 'text_content':'\n'.join([str(i) for i in sentences]) + '\n', 'tags_content':'\n'.join([str(i) for i in BIO_tagging]) + '\n' } return render_template('/edit/editor_3.html', **file_chunk) else: #convert tf/pt data file to rasa format #download multiple files from the folder list_folder = request.files.getlist('convert_tf_pt_folder') #list() #check if folder contains correct files file_check = {'label':0, 'seq.in':0, 'seq.out':0} for file in list_folder: if os.path.basename(file.filename) in file_check: file_check[os.path.basename(file.filename)] = file_check[os.path.basename(file.filename)] + 1 if 0 in file_check.values(): #check if filenames meet requirement fail = True fail_message = 'Files uploaded do not match filename requirements. Please check if your label, text sequence and BIO-tag sequence files are named as label, seq.in and seq.out respectively for system to recognise.' return redirect(url_for('edit_tf_pt',fail=fail,fail_message=fail_message)) elif not all([False for value in file_check.values() if value>1]): #invalid data folder: contains more than one of each label,seq.in,seq.out files fail = True fail_message = 'Invalid folder selected! Folder contains more than required number of files (3). Please select the direct parent data folder with only one instance of label, seq.in and seq.out file.' return redirect(url_for('edit_tf_pt',fail=fail,fail_message=fail_message)) else: #success for file in list_folder: file.save(os.path.join(app.config['UPLOAD_FOLDER'],file.filename)) #save files into folder folder_path = os.path.join(app.config['UPLOAD_FOLDER'],os.path.dirname(list_folder[0].filename)) return redirect(url_for('content_to_rasa',path=folder_path)) else: if 'fail' in request.args: fail = request.args.get('fail') fail_msg = request.args.get('fail_message') else: fail = False fail_msg = "" return render_template('/edit/index_tf-pt.html',fail=fail,fail_message=fail_msg)
def run_evaluation( data_path: Text, model_path: Text, output_directory: Optional[Text] = None, successes: bool = False, errors: bool = False, confmat: Optional[Text] = None, histogram: Optional[Text] = None, component_builder: Optional[ComponentBuilder] = None, ) -> Dict: # pragma: no cover """ Evaluate intent classification, response selection and entity extraction. :param data_path: path to the test data :param model_path: path to the model :param output_directory: path to folder where all output will be stored :param successes: if true successful predictions are written to a file :param errors: if true incorrect predictions are written to a file :param confmat: path to file that will show the confusion matrix :param histogram: path fo file that will show a histogram :param component_builder: component builder :return: dictionary containing evaluation results """ # get the metadata config from the package data interpreter = Interpreter.load(model_path, component_builder) interpreter.pipeline = remove_pretrained_extractors(interpreter.pipeline) test_data = training_data.load_data(data_path, interpreter.model_metadata.language) result = { "intent_evaluation": None, "entity_evaluation": None, "response_selection_evaluation": None, } # type: Dict[Text, Optional[Dict]] if output_directory: io_utils.create_directory(output_directory) intent_results, response_selection_results, entity_results, = get_eval_data( interpreter, test_data) if intent_results: logger.info("Intent evaluation results:") result["intent_evaluation"] = evaluate_intents(intent_results, output_directory, successes, errors, confmat, histogram) if response_selection_results: logger.info("Response selection evaluation results:") result["response_selection_evaluation"] = evaluate_response_selections( response_selection_results, output_directory) if entity_results: logger.info("Entity evaluation results:") extractors = get_entity_extractors(interpreter) result["entity_evaluation"] = evaluate_entities( entity_results, extractors, output_directory, successes, errors) return result
from rasa.nlu.training_data import load_data # This re-uses the Rasa NLU converters code to turn a JSON Rasa NLU training # file into MD format and save it # Assumes you have Rasa NLU installed :-) # If you want other options, look at the NLU code to work out how to handle them # USE AT YOUR OWN RISK files = { './commands.json': '../data/auto-generated/commands.md', './clarification.json': '../data/auto-generated/clarification.md', } # ******************************************************* # TAKE CARE: output_md_file is overwritten automatically # ******************************************************* for file in files.keys(): output_md_file = files[file] input_training_file = file with open(output_md_file,'w') as f: f.write(load_data(input_training_file).as_markdown())
'''this file converts our nlu md training files to nlu json training files which are used to store in nosql databases''' import json import glob #used to read all file path in a specific directory from rasa.nlu import training_data nlu_mdfiles_path = '../mdfiles/*.md' nlu_jsonfiles_path = '../nlu/' files = glob.glob(nlu_mdfiles_path) #list of all the mdfile paths for f in files: td = training_data.load_data(f) output = td.as_json() json_data = json.loads(output) filename_list = f.split('\\') filename = filename_list[-1].split('.') with open(nlu_jsonfiles_path + filename[0] + '.json', 'w') as f: json.dump(json_data, f, indent=4)
from rasa.nlu.model import Trainer from rasa.nlu import config from rasa.nlu.training_data import load_data # loading training data training_data = load_data('./data/nlu.md') # initialising the trainer trainer = Trainer(config.load("config.yml")) # training trainer.train(training_data) # saving the model in the specified directory trainer.persist('./models/')
def test_markdown_not_existing_section(): with pytest.raises(ValueError): training_data.load_data( "data/test/markdown_single_sections/not_existing_section.md")