def test_markdown_single_sections(): td_regex_only = training_data.load_data('data/test/markdown_single_sections/regex_only.md') assert td_regex_only.regex_features == [{"name": "greet", "pattern": "hey[^\s]*"}] td_syn_only = training_data.load_data('data/test/markdown_single_sections/synonyms_only.md') assert td_syn_only.entity_synonyms == {'Chines': 'chinese', 'Chinese': 'chinese'}
def test_data_merging(files): td_reference = training_data.load_data(files[0]) td = training_data.load_data(files[1]) assert len(td.entity_examples) == len(td_reference.entity_examples) assert len(td.intent_examples) == len(td_reference.intent_examples) assert len(td.training_examples) == len(td_reference.training_examples) assert td.intents == td_reference.intents assert td.entities == td_reference.entities assert td.entity_synonyms == td_reference.entity_synonyms assert td.regex_features == td_reference.regex_features
def test_nonascii_entities(): data = """ { "luis_schema_version": "2.0", "utterances" : [ { "text": "I am looking for a ßäæ ?€ö) item", "intent": "unk", "entities": [ { "entity": "description", "startPos": 19, "endPos": 26 } ] } ] }""" with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f: f.write(data.encode("utf-8")) f.flush() td = training_data.load_data(f.name) assert len(td.entity_examples) == 1 example = td.entity_examples[0] entities = example.get("entities") assert len(entities) == 1 entity = entities[0] assert entity["value"] == "ßäæ ?€ö)" assert entity["start"] == 19 assert entity["end"] == 27 assert entity["entity"] == "description"
def test_multiword_entities(): data = """ { "rasa_nlu_data": { "common_examples" : [ { "text": "show me flights to New York City", "intent": "unk", "entities": [ { "entity": "destination", "start": 19, "end": 32, "value": "New York City" } ] } ] } }""" with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f: f.write(data.encode("utf-8")) f.flush() td = training_data.load_data(f.name) assert len(td.entity_examples) == 1 example = td.entity_examples[0] entities = example.get("entities") assert len(entities) == 1 tokens = WhitespaceTokenizer().tokenize(example.text) start, end = MitieEntityExtractor.find_entity(entities[0], example.text, tokens) assert start == 4 assert end == 7
def do_train(cfg, # type: RasaNLUModelConfig data, # type: Text path=None, # type: Text project=None, # type: Optional[Text] fixed_model_name=None, # type: Optional[Text] storage=None, # type: Text component_builder=None, # type: Optional[ComponentBuilder] **kwargs # type: Any ): # type: (...) -> Tuple[Trainer, Interpreter, Text] """Loads the trainer and the data and runs the training of the model.""" # Ensure we are training a model that we can save in the end # WARN: there is still a race condition if a model with the same name is # trained in another subprocess trainer = Trainer(cfg, component_builder) persistor = create_persistor(storage) training_data = load_data(data, cfg.language) interpreter = trainer.train(training_data, **kwargs) if path: persisted_path = trainer.persist(path, persistor, project, fixed_model_name) else: persisted_path = None return trainer, interpreter, persisted_path
def test_repeated_entities(): data = """ { "rasa_nlu_data": { "common_examples" : [ { "text": "book a table today from 3 to 6 for 3 people", "intent": "unk", "entities": [ { "entity": "description", "start": 35, "end": 36, "value": "3" } ] } ] } }""" with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f: f.write(data.encode("utf-8")) f.flush() td = training_data.load_data(f.name) assert len(td.entity_examples) == 1 example = td.entity_examples[0] entities = example.get("entities") assert len(entities) == 1 tokens = WhitespaceTokenizer().tokenize(example.text) start, end = MitieEntityExtractor.find_entity(entities[0], example.text, tokens) assert start == 9 assert end == 10
def test_run_cv_evaluation(): td = training_data.load_data('data/examples/rasa/demo-rasa.json') nlu_config = config.load( "sample_configs/config_pretrained_embeddings_spacy.yml") n_folds = 2 results, entity_results = cross_validate(td, n_folds, nlu_config) assert len(results.train["Accuracy"]) == n_folds assert len(results.train["Precision"]) == n_folds assert len(results.train["F1-score"]) == n_folds assert len(results.test["Accuracy"]) == n_folds assert len(results.test["Precision"]) == n_folds assert len(results.test["F1-score"]) == n_folds assert len(entity_results.train[ 'CRFEntityExtractor']["Accuracy"]) == n_folds assert len(entity_results.train[ 'CRFEntityExtractor']["Precision"]) == n_folds assert len(entity_results.train[ 'CRFEntityExtractor']["F1-score"]) == n_folds assert len(entity_results.test[ 'CRFEntityExtractor']["Accuracy"]) == n_folds assert len(entity_results.test[ 'CRFEntityExtractor']["Precision"]) == n_folds assert len(entity_results.test[ 'CRFEntityExtractor']["F1-score"]) == n_folds
def run_evaluation(data_path, model_path, component_builder=None): # pragma: no cover """Evaluate intent classification and entity extraction.""" # get the metadata config from the package data interpreter = Interpreter.load(model_path, component_builder) test_data = training_data.load_data(data_path, interpreter.model_metadata.language) extractors = get_entity_extractors(interpreter) entity_predictions, tokens = get_entity_predictions(interpreter, test_data) if duckling_extractors.intersection(extractors): entity_predictions = remove_duckling_entities(entity_predictions) extractors = remove_duckling_extractors(extractors) if is_intent_classifier_present(interpreter): intent_targets = get_intent_targets(test_data) intent_predictions = get_intent_predictions(interpreter, test_data) logger.info("Intent evaluation results:") evaluate_intents(intent_targets, intent_predictions) if extractors: entity_targets = get_entity_targets(test_data) logger.info("Entity evaluation results:") evaluate_entities(entity_targets, entity_predictions, tokens, extractors)
def test_drop_intents_below_freq(): td = training_data.load_data('data/examples/rasa/demo-rasa.json') clean_td = drop_intents_below_freq(td, 0) assert clean_td.intents == {'affirm', 'goodbye', 'greet', 'restaurant_search'} clean_td = drop_intents_below_freq(td, 10) assert clean_td.intents == {'affirm', 'restaurant_search'}
def test_wit_data(): td = training_data.load_data('data/examples/wit/demo-flights.json') assert len(td.entity_examples) == 4 assert len(td.intent_examples) == 1 assert len(td.training_examples) == 4 assert td.entity_synonyms == {} assert td.intents == {"flight_booking"} assert td.entities == {"location", "datetime"}
def test_luis_data(): td = training_data.load_data('data/examples/luis/demo-restaurants.json') assert len(td.entity_examples) == 8 assert len(td.intent_examples) == 28 assert len(td.training_examples) == 28 assert td.entity_synonyms == {} assert td.intents == {"affirm", "goodbye", "greet", "inform"} assert td.entities == {"location", "cuisine"}
def test_lookup_table_md(): lookup_fname = 'data/test/lookup_tables/plates.txt' td_lookup = training_data.load_data( 'data/test/lookup_tables/lookup_table.md') assert td_lookup.lookup_tables[0]['name'] == 'plates' assert td_lookup.lookup_tables[0]['elements'] == lookup_fname assert td_lookup.lookup_tables[1]['name'] == 'drinks' assert td_lookup.lookup_tables[1]['elements'] == [ 'mojito', 'lemonade', 'sweet berry wine', 'tea', 'club mate']
def test_prepare_data(): td = training_data.load_data('data/examples/rasa/demo-rasa.json') clean_data = prepare_data(td, 0) unique_intents = sorted(set([i.data["intent"] for i in clean_data])) assert(unique_intents == ['affirm', 'goodbye', 'greet', 'restaurant_search']) clean_data = prepare_data(td, 10) unique_intents = sorted(set([i.data["intent"] for i in clean_data])) assert(unique_intents == ['affirm', 'restaurant_search'])
def train(cfg_name, project_name): from rasa_nlu import training_data cfg = config.load(cfg_name) trainer = Trainer(cfg, component_builder) training_data = training_data.load_data(data) trainer.train(training_data) trainer.persist("test_projects", project_name=project_name)
def run_evaluation(data_path, model, report_folder=None, successes_filename=None, errors_filename='errors.json', confmat_filename=None, intent_hist_filename=None, component_builder=None): # pragma: no cover """Evaluate intent classification and entity extraction.""" # get the metadata config from the package data if isinstance(model, Interpreter): interpreter = model else: interpreter = Interpreter.load(model, component_builder) test_data = training_data.load_data(data_path, interpreter.model_metadata.language) extractors = get_entity_extractors(interpreter) entity_predictions, tokens = get_entity_predictions(interpreter, test_data) if duckling_extractors.intersection(extractors): entity_predictions = remove_duckling_entities(entity_predictions) extractors = remove_duckling_extractors(extractors) result = { "intent_evaluation": None, "entity_evaluation": None } if report_folder: utils.create_dir(report_folder) if is_intent_classifier_present(interpreter): intent_targets = get_intent_targets(test_data) intent_results = get_intent_predictions( intent_targets, interpreter, test_data) logger.info("Intent evaluation results:") result['intent_evaluation'] = evaluate_intents(intent_results, report_folder, successes_filename, errors_filename, confmat_filename, intent_hist_filename) if extractors: entity_targets = get_entity_targets(test_data) logger.info("Entity evaluation results:") result['entity_evaluation'] = evaluate_entities(entity_targets, entity_predictions, tokens, extractors, report_folder) return result
def test_train_test_split(filename): td = training_data.load_data(filename) assert td.intents == {"affirm", "greet", "restaurant_search", "goodbye"} assert td.entities == {"location", "cuisine"} assert len(td.training_examples) == 42 assert len(td.intent_examples) == 42 td_train, td_test = td.train_test_split(train_frac=0.8) assert len(td_train.training_examples) == 32 assert len(td_test.training_examples) == 10
def train_nlu(): from rasa_nlu.training_data import load_data from rasa_nlu import config from rasa_nlu.model import Trainer training_data = load_data('data/nlu_data/') trainer = Trainer(config.load("nlu_model_config.yml")) trainer.train(training_data) model_directory = trainer.persist('models/nlu', fixed_model_name="current") return model_directory
def test_dialogflow_data(): td = training_data.load_data('data/examples/dialogflow/') assert len(td.entity_examples) == 5 assert len(td.intent_examples) == 24 assert len(td.training_examples) == 24 assert td.intents == {"affirm", "goodbye", "hi", "inform"} assert td.entities == {"cuisine", "location"} non_trivial_synonyms = {k: v for k, v in td.entity_synonyms.items() if k != v} assert non_trivial_synonyms == {"mexico": "mexican", "china": "chinese", "india": "indian"}
def train_nlu_gao(): from rasa_nlu_gao.training_data import load_data from rasa_nlu_gao import config from rasa_nlu_gao.model import Trainer training_data = load_data('data/rasa_dataset_training.json') trainer = Trainer(config.load("config_embedding_bilstm.yml")) trainer.train(training_data) model_directory = trainer.persist('models/nlu_gao/', fixed_model_name="current") return model_directory
def train_nlu(): from rasa_nlu.training_data import load_data from rasa_nlu import config from rasa_nlu.model import Trainer training_data = load_data('./data/coco_data.json') trainer = Trainer(config.load("./config_spacy.yml")) trainer.train(training_data) model_directory = trainer.persist('./models/nlu/', fixed_model_name="coconlu") return model_directory
def test_training_data_conversion(tmpdir, data_file, gold_standard_file, output_format, language): out_path = tmpdir.join("rasa_nlu_data.json") convert_training_data(data_file, out_path.strpath, output_format, language) td = training_data.load_data(out_path.strpath, language) assert td.entity_examples != [] assert td.intent_examples != [] gold_standard = training_data.load_data(gold_standard_file, language) cmp_message_list(td.entity_examples, gold_standard.entity_examples) cmp_message_list(td.intent_examples, gold_standard.intent_examples) assert td.entity_synonyms == gold_standard.entity_synonyms # converting the converted file back to original # file format and performing the same tests rto_path = tmpdir.join("data_in_original_format.txt") convert_training_data(out_path.strpath, rto_path.strpath, 'json', language) rto = training_data.load_data(rto_path.strpath, language) cmp_message_list(gold_standard.entity_examples, rto.entity_examples) cmp_message_list(gold_standard.intent_examples, rto.intent_examples) assert gold_standard.entity_synonyms == rto.entity_synonyms
def run_evaluation(data_path, model, report_filename=None, successes_filename=None, errors_filename='errors.json', confmat_filename=None, intent_hist_filename=None, component_builder=None): # pragma: no cover """Evaluate intent classification and entity extraction.""" # get the metadata config from the package data if isinstance(model, Interpreter): interpreter = model else: interpreter = Interpreter.load(model, component_builder) test_data = training_data.load_data(data_path, interpreter.model_metadata.language) extractors = get_entity_extractors(interpreter) entity_predictions, tokens = get_entity_predictions(interpreter, test_data) if duckling_extractors.intersection(extractors): entity_predictions = remove_duckling_entities(entity_predictions) extractors = remove_duckling_extractors(extractors) result = { "intent_evaluation": None, "entity_evaluation": None } if is_intent_classifier_present(interpreter): intent_targets = get_intent_targets(test_data) intent_results = get_intent_predictions( intent_targets, interpreter, test_data) logger.info("Intent evaluation results:") result['intent_evaluation'] = evaluate_intents(intent_results, report_filename, successes_filename, errors_filename, confmat_filename, intent_hist_filename) if extractors: entity_targets = get_entity_targets(test_data) logger.info("Entity evaluation results:") result['entity_evaluation'] = evaluate_entities(entity_targets, entity_predictions, tokens, extractors) return result
def train_nlu(): # pipline from rasa_nlu.training_data import load_data from rasa_nlu import config from rasa_nlu.model import Trainer # training_data = load_data('../rasa/data/res_data.json') training_data = load_data('./data_loading/test_loading.json') trainer = Trainer(config.load("../rasa/config.yml")) trainer.train(training_data) model_directory = trainer.persist("./nlu_model/jarvis_nlu/", fixed_model_name="current") return model_directory
def train_nlu(): from rasa_nlu.training_data import load_data from rasa_nlu import config from rasa_nlu.model import Trainer training_data = load_data("data/mobile_nlu_data.json") trainer = Trainer(config.load("mobile_nlu_model_config.json")) trainer.train(training_data) model_directory = trainer.persist("models/", project_name="ivr", fixed_model_name="demo") return model_directory
def test_run_cv_evaluation(): td = training_data.load_data('data/examples/rasa/demo-rasa.json') nlu_config = RasaNLUConfig("sample_configs/config_spacy.json") n_folds = 3 results = run_cv_evaluation(td, n_folds, nlu_config) assert len(results.train["Accuracy"]) == n_folds assert len(results.train["Precision"]) == n_folds assert len(results.train["F1-score"]) == n_folds assert len(results.test["Accuracy"]) == n_folds assert len(results.test["Precision"]) == n_folds assert len(results.test["F1-score"]) == n_folds
def train_nlu(): from rasa_nlu.training_data import load_data from rasa_nlu import config from rasa_nlu.model import Trainer import jieba jieba.load_userdict("jieba_userdict.txt") training_data = load_data("data/train_file_new.json") trainer = Trainer(config.load("hainan_nlu_model_config.json")) trainer.train(training_data) model_directory = trainer.persist("models/") return model_directory
def train_nlu(): from rasa_nlu.training_data import load_data from rasa_nlu import config from rasa_nlu.model import Trainer training_data = load_data('data/nlu/') trainer = Trainer(config.load("nlu_model_config.yml")) trainer.train(training_data) model_directory = trainer.persist('models/nlu/', project_name='default', fixed_model_name="current") return model_directory
def test_train(): print(datetime.datetime.now()) # data_set generated by Chatito training_data = load_data(data_set) # load rasa pipeline trainer = Trainer( config.load( "/Users/guolei/Documents/EIT/GUOLEI/ContextManager/sample_configs/config_spacy.yml" )) trainer.train(training_data) # save model trainer.persist('./projects/') print(datetime.datetime.now())
def train_nlu(): from rasa_nlu.training_data import load_data from rasa_nlu import config from rasa_nlu.model import Trainer training_data = load_data( '../../../models/train_data/nlu_data/training_data.json') trainer = Trainer(config.load("../../../models/nlu_config.yml")) trainer.train(training_data) model_directory = trainer.persist('../../../models/models/nlu/', fixed_model_name="current") return model_directory
def __init__(self, model_path="config_spacy.yml", data='train.md'): # Create a trainer that uses this config trainer = Trainer(config.load("config_spacy.yml")) # Load the training data training_data = load_data('train.md') # Create an interpreter by training the model self.interpreter = trainer.train(training_data) trainer = Trainer(config.load("config_spacy.yml")) self.tieba_interpreter = trainer.train(load_data("tieba_train.md")) self.tieba = Tieba() self.respond_dict = { "TIEBA": self.respond_tieba, "default": self.respond_default, "get_posts": self.tieba.get_posts, "turn_to_post": self.tieba.turn_to_post, "LAUNCH": self.launch, "QUERY": self.query, "ROUTE": self.route } self.state = State.FREE self.message_trace = []
def train_nlu(data, configs, model_dir): """ Train a NLU model :param data: :param configs: :param model_dir: :return: None """ training_data = load_data(data) trainer = Trainer(config.load(configs)) trainer.train(training_data) model_directory = trainer.persist(model_dir, fixed_model_name='weathernlu')
def home(): train_data = load_data('rasa_dataset.json') trainer = Trainer(config.load("config_spacy.yaml")) trainer.train(train_data) model_directory = trainer.persist('/projects') interpreter = Interpreter.load(model_directory) if 'text' in request.args: txt = request.args['text'] #id = int(request.args['text']) return interpreter.parse(txt) else: return "Please write any query."
def train_test(td_file, config_file, model_dir): # helper function to split into test and train and evaluate on results. td = load_data(td_file) trainer = Trainer(config.load(config_file)) train, test = td.train_test_split(train_frac=0.6) trainer.train(train) model_loc = trainer.persist(model_dir) with open('data/tmp/temp_test.json', 'w', encoding="utf8") as f: f.write(test.as_json()) with open('data/temp_train.json', 'w', encoding="utf8") as f: f.write(train.as_json()) evaluate_model('data/tmp/temp_test.json', model_loc)
def train_nlu(): from rasa_nlu.training_data import load_data from rasa_nlu.model import Trainer from rasa_nlu import config training_data = load_data('data/testdata.json') trainer = Trainer(config.load("nlu_config.yml")) trainer.train(training_data) model_directory = trainer.persist( 'models/nlu', fixed_model_name="nlu", project_name="current") # Returns the directory the model is stored in return model_directory
def rasa_train(message): training_data = load_data('demo-rasa.json') # Create a trainer trainer = Trainer(config.load("config_spacy.yml")) # Create an interpreter by training the model interpreter = trainer.train(training_data) response = interpreter.parse(message) matched_intent = None for intent, pattern in patterns.items(): if re.search(pattern, message) is not None: matched_intent = intent response["intent"]["name"] = matched_intent return response
def train_nlu(): from rasa_nlu.training_data import load_data from rasa_nlu import config from rasa_nlu.model import Trainer training_data = load_data('data/intents/') trainer = Trainer(config.load('nlu_config.yml')) trainer.train(training_data) model_directory = trainer.persist('models/nlu/', fixed_model_name='current') return model_directory
def train_nlu(): from rasa_nlu.training_data import load_data from rasa_nlu.config import RasaNLUModelConfig from rasa_nlu.model import Trainer from rasa_nlu import config from pathlib import Path training_data = load_data('training_data/general-chat.md') trainer = Trainer( config.load("training_data/config_tensorflow_embedding.yaml")) trainer.train(training_data) model_directory = trainer.persist(Path('.').parent / "models", project_name='ctraubot', fixed_model_name='nlu')
def rasa_base(request): if request.method == "POST": user = request.POST.get('user') training_data = load_data('main/Bank_Data.json') trainer = Trainer(config.load('main/config_spacy.yml')) trainer.train(training_data) model_directory = trainer.persist('main/') interpreter = Interpreter.load(model_directory) print(user) output = interpreter.parse(str(user)) else: return render(request, "CHUG/rasa.html") return render(request, 'CHUG/rasa.html', {'data': output})
def train(data_path): # 示例数据 getcwd = os.getcwd() print(getcwd) training_data = load_data(data_path) # pipeline配置指定了配置文件地址 trainer = Trainer(config.load("./../nlu_config.yml")) trainer.train(training_data) model_directory = trainer.persist('./../models_center/intent/fb') print(model_directory)
def train_nlu(domain_id="default"): from rasa_nlu.training_data import load_data from rasa_nlu import config from rasa_nlu.model import Trainer training_data = load_data('{}/{}/intellei_rasa.json'.format( data_folder, domain_id)) trainer = Trainer(config.load("intelleibot/nlu_model_config.yml")) trainer.train(training_data) model_directory = trainer.persist('{}/{}/nlu/'.format( model_folder, domain_id), fixed_model_name="current") return model_directory
def initInterPreter(): # Import necessary modules from rasa_nlu.training_data import load_data #from rasa_nlu.config import RasaNLUModelConfig from rasa_nlu.model import Trainer from rasa_nlu import config # Create a trainer that uses this config trainer = Trainer(config.load("config_spacy.yml")) # Load the training data training_data = load_data('data/training_dataset14.json') # Create an interpreter by training the model globleDict['interpreter'] = trainer.train(training_data)
def train_test(td_file, config_file, model_dir, key='company', noise=0.1): """trains a model using the training data (split into train-test) and config""" td = load_data(td_file) trainer = Trainer(config.load(config_file)) train, test = td.train_test_split(train_frac=0.8) test = add_noise(test, key, noise=noise) trainer.train(train) tmp_fname = 'data/tmp/temp_test.json' model_loc = trainer.persist(model_dir) with open(tmp_fname, 'w', encoding="utf8") as f: f.write(test.as_json()) evaluate_model(tmp_fname, model_loc)
def test_run_cv_evaluation(): import numpy as np td = training_data.load_data('data/examples/rasa/demo-rasa.json') n_folds = 3 nlu_config = RasaNLUConfig("sample_configs/config_defaults.json") np.seed(2018) results = run_cv_evaluation(td, n_folds, nlu_config) rel_tol = 1e-09 abs_tol = 0.01 acc = np.mean(results["accuracy"]) exp_acc = 0.65 # expected result np.testing.assert_approx_equal(acc, exp_acc, significant=5)
def run_evaluation(config, model_path, component_builder=None): # pragma: no cover """Evaluate intent classification and entity extraction.""" # get the metadata config from the package data test_data = training_data.load_data(config['data'], config['language']) interpreter = Interpreter.load(model_path, config, component_builder) intent_targets, entity_targets = get_targets(test_data) intent_predictions, entity_predictions, tokens = get_predictions(interpreter, test_data) extractors = get_entity_extractors(interpreter) if extractors.intersection(duckling_extractors): entity_predictions = patch_duckling_entities(entity_predictions) extractors = patch_duckling_extractors(interpreter, extractors) evaluate_intents(intent_targets, intent_predictions) evaluate_entities(entity_targets, entity_predictions, tokens, extractors)
def test_run_cv_evaluation(): import numpy as np td = training_data.load_data('data/examples/rasa/demo-rasa.json') n_folds = 3 nlu_config = RasaNLUConfig("sample_configs/config_defaults.json") np.seed(2018) results = run_cv_evaluation(td, n_folds, nlu_config) rel_tol=1e-09 abs_tol=0.01 acc = np.mean(results["accuracy"]) exp_acc = 0.65 # expected result np.testing.assert_approx_equal(acc, exp_acc, significant=5)
def train_model(): # trains a model and times it t = time() # training_data = load_data('demo_train.md') training_data = load_data('data/company_train_lookup.json') td_load_time = time() - t trainer = Trainer(config.load('config.yaml')) t = time() trainer.train(training_data) train_time = time() - t clear_model_dir() t = time() model_directory = trainer.persist('./tmp/models') # Returns the directory the model is stored in persist_time = time() - t return td_load_time, train_time, persist_time
def generate_rasa_training_data_and_interpreter(self, bot_directories, module_id): training_data_dir = bot_directories[ "training_data_directory"] + module_id + ".json" config_file_dir = bot_directories[ "training_data_directory"] + 'config_spacy.yaml' model_dir = bot_directories["model_directory"] training_data = load_data(training_data_dir) trainer = Trainer(config.load(config_file_dir)) # train the model and save it to a folder trainer.train(training_data) model_directory = trainer.persist(model_dir) print("trained model for module '" + module_id + "'") rasa_interpreter = Interpreter.load(model_directory) return training_data, rasa_interpreter
def intent_pooled_model(is_train): if is_train: training_data = load_data('./data/nlpcc_intent/rasa_nlpcc_train.json') config_file = './sample_configs/config_bert_intent_classifier_pooled.yml' ModelConfig = config.load(config_file) trainer = Trainer(ModelConfig) interpreter = trainer.train(training_data) else: model_directory = './models/rasa_bert/nlpcc_pooled' interpreter = Interpreter.load(model_directory) query = "播放一首歌" while query != "Stop": print(interpreter.parse(query)) query = input("input query: (insert Stop to close)\n") print('intent classifier close')
def sentiment_analyzer(is_train): if is_train: training_data = load_data('./data/sentiment_analyzer/trainset.json') config_file = './sample_configs/config_bert_sentiment.yml' ModelConfig = config.load(config_file) trainer = Trainer(ModelConfig) interpreter = trainer.train(training_data) else: model_directory = './models/sentiment/sentiment_demo' interpreter = Interpreter.load(model_directory) query = "今天好开心呀" while query != "Stop": print(interpreter.parse(query)) query = input("input query: (insert Stop to close)\n") print('sentiment_analyzer close')
def en_spell_checker_model(is_train): if is_train: training_data = load_data('./data/examples/rasa/demo-rasa.json') config_file = './sample_configs/config_bert_spell_checker_en.yml' ModelConfig = config.load(config_file) trainer = Trainer(ModelConfig) interpreter = trainer.train(training_data) else: model_directory = './models/spell_checker/rasa_bert_spell_checker_en' interpreter = Interpreter.load(model_directory) query = "How old aer you?" while query != "Stop": print(interpreter.parse(query)) query = input("input query: (insert Stop to close)\n") print('spell_checker close')
def NER(is_train): if is_train: training_data = load_data('./data/ner/bert_ner_train.json') config_file = './sample_configs/config_bert_ner.yml' ModelConfig = config.load(config_file) trainer = Trainer(ModelConfig) interpreter = trainer.train(training_data) else: model_directory = './models/rasa_bert/ner_demo' interpreter = Interpreter.load(model_directory) query = "这是中国领导人首次在哈佛大学发表演讲。" while query != "Stop": print(interpreter.parse(query)) query = input("input query: (insert Stop to close)\n") print('Ner close')
def test_demo_data(filename): td = training_data.load_data(filename) assert td.intents == {"affirm", "greet", "restaurant_search", "goodbye"} assert td.entities == {"location", "cuisine"} assert len(td.training_examples) == 42 assert len(td.intent_examples) == 42 assert len(td.entity_examples) == 11 assert td.entity_synonyms == {'Chines': 'chinese', 'Chinese': 'chinese', 'chines': 'chinese', 'vegg': 'vegetarian', 'veggie': 'vegetarian'} assert td.regex_features == [{"name": "greet", "pattern": r"hey[^\s]*"}, {"name": "zipcode", "pattern": r"[0-9]{5}"}]
def do_train(config, # type: RasaNLUConfig component_builder=None # type: Optional[ComponentBuilder] ): # type: (...) -> Tuple[Trainer, Interpreter, Text] """Loads the trainer and the data and runs the training of the model.""" # Ensure we are training a model that we can save in the end # WARN: there is still a race condition if a model with the same name is # trained in another subprocess trainer = Trainer(config, component_builder) persistor = create_persistor(config) training_data = load_data(config['data'], config['language']) interpreter = trainer.train(training_data) persisted_path = trainer.persist(config['path'], persistor, config['project'], config['fixed_model_name']) return trainer, interpreter, persisted_path
def test_interpreter(pipeline_template, component_builder): test_data = "data/examples/rasa/demo-rasa.json" _conf = utilities.base_test_conf(pipeline_template) _conf["data"] = test_data td = training_data.load_data(test_data) interpreter = utilities.interpreter_for(component_builder, _conf) texts = ["good bye", "i am looking for an indian spot"] for text in texts: result = interpreter.parse(text, time=None) assert result['text'] == text assert not result['intent']['name'] or result['intent']['name'] in td.intents assert result['intent']['confidence'] >= 0 # Ensure the model doesn't detect entity types that are not present # Models on our test data set are not stable enough to require the exact entities to be found for entity in result['entities']: assert entity['entity'] in td.entities
def test_dialogflow_data(): td = training_data.load_data('data/examples/dialogflow/') assert len(td.entity_examples) == 5 assert len(td.intent_examples) == 24 assert len(td.training_examples) == 24 assert len(td.lookup_tables) == 2 assert td.intents == {"affirm", "goodbye", "hi", "inform"} assert td.entities == {"cuisine", "location"} non_trivial_synonyms = {k: v for k, v in td.entity_synonyms.items() if k != v} assert non_trivial_synonyms == {"mexico": "mexican", "china": "chinese", "india": "indian"} # The order changes based on different computers hence the grouping assert {td.lookup_tables[0]['name'], td.lookup_tables[1]['name']} == {'location', 'cuisine'} assert {len(td.lookup_tables[0]['elements']), len(td.lookup_tables[1]['elements'])} == {4, 6}
def test_run_cv_evaluation(): td = training_data.load_data('data/examples/rasa/demo-rasa.json') nlu_config = config.load("sample_configs/config_spacy.yml") n_folds = 2 results, entity_results = run_cv_evaluation(td, n_folds, nlu_config) assert len(results.train["Accuracy"]) == n_folds assert len(results.train["Precision"]) == n_folds assert len(results.train["F1-score"]) == n_folds assert len(results.test["Accuracy"]) == n_folds assert len(results.test["Precision"]) == n_folds assert len(results.test["F1-score"]) == n_folds assert len(entity_results.train['ner_crf']["Accuracy"]) == n_folds assert len(entity_results.train['ner_crf']["Precision"]) == n_folds assert len(entity_results.train['ner_crf']["F1-score"]) == n_folds assert len(entity_results.test['ner_crf']["Accuracy"]) == n_folds assert len(entity_results.test['ner_crf']["Precision"]) == n_folds assert len(entity_results.test['ner_crf']["F1-score"]) == n_folds
def test_entities_synonyms(): data = u""" { "rasa_nlu_data": { "entity_synonyms": [ { "value": "nyc", "synonyms": ["New York City", "nyc", "the big apple"] } ], "common_examples" : [ { "text": "show me flights to New York City", "intent": "unk", "entities": [ { "entity": "destination", "start": 19, "end": 32, "value": "NYC" } ] }, { "text": "show me flights to nyc", "intent": "unk", "entities": [ { "entity": "destination", "start": 19, "end": 22, "value": "nyc" } ] } ] } }""" with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f: f.write(data.encode("utf-8")) f.flush() td = training_data.load_data(f.name) assert td.entity_synonyms["New York City"] == "nyc"
def test_spacy_featurizer_casing(spacy_nlp): from rasa_nlu.featurizers import spacy_featurizer # if this starts failing for the default model, we should think about # removing the lower casing the spacy nlp component does when it # retrieves vectors. For compressed spacy models (e.g. models # ending in _sm) this test will most likely fail. td = training_data.load_data('data/examples/rasa/demo-rasa.json') for e in td.intent_examples: doc = spacy_nlp(e.text) doc_capitalized = spacy_nlp(e.text.capitalize()) vecs = spacy_featurizer.features_for_doc(doc) vecs_capitalized = spacy_featurizer.features_for_doc(doc_capitalized) assert np.allclose(vecs, vecs_capitalized, atol=1e-5), \ "Vectors are unequal for texts '{}' and '{}'".format( e.text, e.text.capitalize())
def main(): parser = create_argument_parser() cmdline_args = parser.parse_args() utils.configure_colored_logging(cmdline_args.loglevel) if cmdline_args.mode == "crossvalidation": # TODO: move parsing into sub parser # manual check argument dependency if cmdline_args.model is not None: parser.error("Crossvalidation will train a new model " "- do not specify external model.") if cmdline_args.config is None: parser.error("Crossvalidation will train a new model " "you need to specify a model configuration.") nlu_config = config.load(cmdline_args.config) data = training_data.load_data(cmdline_args.data) data = drop_intents_below_freq(data, cutoff=5) results, entity_results = cross_validate( data, int(cmdline_args.folds), nlu_config) logger.info("CV evaluation (n={})".format(cmdline_args.folds)) if any(results): logger.info("Intent evaluation results") return_results(results.train, "train") return_results(results.test, "test") if any(entity_results): logger.info("Entity evaluation results") return_entity_results(entity_results.train, "train") return_entity_results(entity_results.test, "test") elif cmdline_args.mode == "evaluation": run_evaluation(cmdline_args.data, cmdline_args.model, cmdline_args.report, cmdline_args.successes, cmdline_args.errors, cmdline_args.confmat, cmdline_args.histogram) logger.info("Finished evaluation")