def intent_pooled_model(is_train): if is_train: training_data = load_data('./data/nlpcc_intent/rasa_nlpcc_train.json') config_file = './sample_configs/config_bert_intent_classifier_pooled.yml' ModelConfig = config.load(config_file) trainer = Trainer(ModelConfig) interpreter = trainer.train(training_data) else: model_directory = './models/rasa_bert/nlpcc_pooled' interpreter = Interpreter.load(model_directory) query = "播放一首歌" while query != "Stop": print(interpreter.parse(query)) query = input("input query: (insert Stop to close)\n") print('intent classifier close')
def train_nlu(project='simpletalk'): from rasa_nlu.training_data import load_data from rasa_nlu.config import RasaNLUModelConfig from rasa_nlu.model import Trainer from rasa_nlu import config training_data = load_data('clofusbot/projects/' + project + '/intents') trainer = Trainer( config.load('clofusbot/projects/' + project + '/config_spacy.yml')) trainer.train(training_data) model_directory = trainer.persist('clofusbot/projects/' + project + '/models/nlu/', fixed_model_name="current") return model_directory
def en_spell_checker_model(is_train): if is_train: training_data = load_data('./data/examples/rasa/demo-rasa.json') config_file = './sample_configs/config_bert_spell_checker_en.yml' ModelConfig = config.load(config_file) trainer = Trainer(ModelConfig) interpreter = trainer.train(training_data) else: model_directory = './models/spell_checker/rasa_bert_spell_checker_en' interpreter = Interpreter.load(model_directory) query = "How old aer you?" while query != "Stop": print(interpreter.parse(query)) query = input("input query: (insert Stop to close)\n") print('spell_checker close')
def sentiment_analyzer(is_train): if is_train: training_data = load_data('./data/sentiment_analyzer/trainset.json') config_file = './sample_configs/config_bert_sentiment.yml' ModelConfig = config.load(config_file) trainer = Trainer(ModelConfig) interpreter = trainer.train(training_data) else: model_directory = './models/sentiment/sentiment_demo' interpreter = Interpreter.load(model_directory) query = "今天好开心呀" while query != "Stop": print(interpreter.parse(query)) query = input("input query: (insert Stop to close)\n") print('sentiment_analyzer close')
def NER(is_train): if is_train: training_data = load_data('./data/ner/bert_ner_train.json') config_file = './sample_configs/config_bert_ner.yml' ModelConfig = config.load(config_file) trainer = Trainer(ModelConfig) interpreter = trainer.train(training_data) else: model_directory = './models/rasa_bert/ner_demo' interpreter = Interpreter.load(model_directory) query = "这是中国领导人首次在哈佛大学发表演讲。" while query != "Stop": print(interpreter.parse(query)) query = input("input query: (insert Stop to close)\n") print('Ner close')
def run_cv_evaluation(data, n_folds, nlu_config): # type: (List[rasa_nlu.training_data.Message], int, RasaNLUConfig) -> Dict[Text, List[float]] """Stratified cross validation on data :param data: list of rasa_nlu.training_data.Message objects :param n_folds: integer, number of cv folds :param nlu_config: nlu config file :return: dictionary with key, list structure, where each entry in list corresponds to the relevant result for one fold """ from sklearn.model_selection import StratifiedKFold from collections import defaultdict trainer = Trainer(nlu_config) train_results = defaultdict(list) test_results = defaultdict(list) y_true = [e.get("intent") for e in data] skf = StratifiedKFold(n_splits=n_folds, random_state=11, shuffle=True) counter = 1 logger.info("Evaluation started") for train_index, test_index in skf.split(data, y_true): train = [data[i] for i in train_index] test = [data[i] for i in test_index] logger.debug("Fold: {}".format(counter)) logger.debug("Training ...") trainer.train(TrainingData(training_examples=train)) model_directory = trainer.persist( "projects/") # Returns the directory the model is stored in logger.debug("Evaluation ...") interpreter = Interpreter.load(model_directory, nlu_config) # calculate train accuracy compute_metrics(interpreter, train, train_results) # calculate test accuracy compute_metrics(interpreter, test, test_results) # increase fold counter counter += 1 Results = namedtuple('Results', 'train test') results = Results(dict(train_results), dict(test_results)) return results
def train_qa_nlu(path_prefix, slash, time_str): core_model_directory = path_prefix + "now_models" + slash + "core" nlu_directory_path = path_prefix + "now_models" + slash + "nlu" qa_model_directory = nlu_directory_path + slash + "qa" task_model_directory = nlu_directory_path + slash + "task" training_data = load_data(path_prefix + 'training' + slash + 'd.json') trainer = Trainer(config.load(path_prefix + "nlu_config.yml")) trainer.train(training_data) model_directory = trainer.persist( path=path_prefix + 'old_models' + slash + 'nlu', project_name='qa', fixed_model_name='qa' + "_" + time_str) # Returns the directory the model is stored in return model_directory
def __init__(self, training_data_file="training_data.json", config_file="training_config.json"): training_data = load_data(training_data_file) trainer = Trainer(config.load(config_file)) self.interpreter = trainer.train(training_data) self.confidence_threshold = 0.7 # Create supported intents context = {'confidence_threshold': self.confidence_threshold} self.intents = { "greet": intent.HelloIntent(self, context), "get_time": intent.GetTimeIntent(self, context), "ask_joke": intent.JokeIntent(self, context), "unknown": intent.UnKnownIntent(self, context) }
def train_model(): # trains a model and times it t = time() # training_data = load_data('demo_train.md') training_data = load_data("data/company_train_lookup.json") td_load_time = time() - t trainer = Trainer(config.load("config.yaml")) t = time() trainer.train(training_data) train_time = time() - t clear_model_dir() t = time() model_directory = trainer.persist( "./tmp/models") # Returns the directory the model is stored in persist_time = time() - t return td_load_time, train_time, persist_time
def call(): from rasa_nlu.training_data import load_data from rasa_nlu import config from rasa_nlu.components import ComponentBuilder from rasa_nlu.model import Trainer builder = ComponentBuilder(use_cache=True) training_data = load_data('./rasa_nlu_api/dataset.json') trainer = Trainer(config.load("./rasa_nlu_api/config_pipeline.yml"), builder) trainer.train(training_data) model_directory = trainer.persist('./rasa_nlu_api/', fixed_model_name="model") print('done') return model_directory
def train(request): from rasa_nlu.converters import load_data from rasa_nlu.config import RasaNLUConfig from rasa_nlu.model import Trainer training_data = load_data('data/demo_rasa.json') trainer = Trainer(RasaNLUConfig("nlu_model_config.json")) trainer.train(training_data) model_directory = trainer.persist('models/nlu/', fixed_model_name="current") from rasa_nlu.model import Metadata, Interpreter interpreter = Interpreter.load(train_nlu()) json = interpreter.parse("hello") print(json) return HttpResponse(json)
def train_nlu(): print("=> Importing rasa_nlu...") from rasa_nlu.training_data import load_data from rasa_nlu import config from rasa_nlu.model import Trainer shutil.rmtree(botpath.NLU_MODEL_PATH, ignore_errors=True) print("=> Training NLU...%s - %s" % (botpath.NLU_DATA_FILE, botpath.CONFIG_FILE)) training_data = load_data(botpath.NLU_DATA_FILE) train_config = config.load(botpath.CONFIG_FILE) trainer = Trainer(train_config, skip_validation=True) trainer.train(training_data) print("=> Saving Result...%s" % botpath.NLU_MODEL_PATH) trainer.persist(botpath.NLU_MODEL_PATH, fixed_model_name=botpath.PROJECT)
def __init__(self): # Dict containing the entity and their function self.intent_functions = { 'arena_where_located': self.arena_where_located, 'arena_count_object': self.arena_count_object, 'crowd_count': self.crowd_count, 'crowd_person_gesture': self.crowd_person_gesture, 'arena_color_object': self.arena_color_object, 'arena_which_object': self.arena_which_object, 'object_category': self.object_category, 'object_adjective': self.object_adjective } self.personType = { "girl": "female", "girls": "female", "women": "female", "woman": "female", "females": "female", "female": "female", "men": "male", "male": "male", "males": "male", "boys": "male", "boy": "male", "man": "male", "people": "" } sys.stderr = open(os.devnull, 'w') # Load the config files print("Loading config files...") rospack = rospkg.RosPack() training_data = load_data(rospack.get_path('wm_nlu')+"/script/robocup_spr.json") trainer = Trainer(config.load(rospack.get_path('wm_nlu')+"/script/config_spacy.yml")) print("Training the model...") # Train the model based on the robocup_spr.json file trainer.train(training_data) # Returns the directory the model is stored in model_directory = trainer.persist(rospack.get_path('wm_nlu')+'/script/default_spr/') print("Loading the model...") self.interpreter = Interpreter.load(model_directory) print("RasaNLU init done.")
def retrain_using_new_info(answer, intent='OK', synonym={}): entities = answer['entities'] print intent # new_answer = check_answer(answer) if intent == 'OK': answer['intent'] = answer['intent']['name'] else: answer['intent'] = intent answer.pop('intent_ranking') with open('../Codes and More/question_training_data2.json') as json_train: train = json.load(json_train) json_train.close() st_ind = [ i for i in range(len(entities)) if entities[i]['entity'] == 'state' ] if len(st_ind) > 0: curr_state = answer['entities'][st_ind[0]]['value'].lower() else: curr_state = 'asdiyvls' for s in states.keys(): t = deepcopy(answer) t['text'] = t['text'].lower().replace(curr_state, s.lower()) t['entities'] = [{ 'value': j['value'].replace(curr_state, s.lower()), 'entity': j['entity'], 'start': j['start'], 'end': j['end'] } for j in t['entities']] train['rasa_nlu_data']['common_examples'].append(t) if len(synonym) > 0: train['rasa_nlu_data']['entity_synonyms'].append(synonym) print '*** New Synonym Added' with open('../Codes and More/question_training_data2.json', 'w') as json_data: json.dump(train, json_data) json_data.close() print '*** Re-Creating the Models' training_data = load_data('../Codes and More/question_training_data2.json') trainer = Trainer(RasaNLUConfig("../Codes and More/config_spacy.json")) trainer.train(training_data) model_directory = trainer.persist('../Codes and More/', fixed_model_name='my_model') print '*** Building Interpreter' interpreter = Interpreter.load( model_directory, RasaNLUConfig("../Codes and More/config_spacy.json")) print '--- DONE ---' return interpreter
def run_trial(space): """The objective function is pickled and transferred to the workers. Hence, this function has to contain all the imports we need. """ data_dir = os.environ.get("DATA_DIRECTORY", "./data") model_dir = os.environ.get("MODEL_DIRECTORY", "./models") target_metric = os.environ.get("TARGET_METRIC", "f1_score") if target_metric not in AVAILABLE_METRICS: logger.error("The metric '{}' is not in the available metrics. " "Please use one of the available metrics: {}." "".format(target_metric, AVAILABLE_METRICS)) return {"loss": 1, "status": STATUS_FAIL} logger.debug("Search space: {}".format(space)) # The epoch has to be an int since `tqdm` otherwise will cause an exception. if "epochs" in space: space["epochs"] = int(space["epochs"]) with open(os.path.join(data_dir, "template_config.yml")) as f: config_yml = f.read().format(**space) config = read_yaml(config_yml) config = RasaNLUModelConfig(config) trainer = Trainer(config) training_data = load_data(os.path.join(data_dir, "train.md")) test_data_path = os.path.join(data_dir, "validation.md") # wrap in train and eval in try/except in case # nlu_hyperopt proposes invalid combination of params try: model = trainer.train(training_data) model_path = trainer.persist(model_dir) if target_metric is None or target_metric == "threshold_loss": loss = _get_threshold_loss(model, test_data_path) else: loss = _get_nlu_evaluation_loss(model_path, target_metric, test_data_path) return {"loss": loss, "status": STATUS_OK} except Exception as e: logger.error(e) return {"loss": 1, "status": STATUS_FAIL}
def do_train(config, # type: RasaNLUConfig component_builder=None # type: Optional[ComponentBuilder] ): # type: (...) -> Tuple[Trainer, Interpreter, Text] """Loads the trainer and the data and runs the training of the model.""" # Ensure we are training a model that we can save in the end # WARN: there is still a race condition if a model with the same name is # trained in another subsuggestion trainer = Trainer(config, component_builder) persistor = create_persistor(config) training_data = load_data(config['data'], config['language']) interpreter = trainer.train(training_data) persisted_path = trainer.persist(config['path'], persistor, config['project'], config['fixed_model_name']) return trainer, interpreter, persisted_path
def train_nlu(train_path, test_path, configs, model_path): logging.basicConfig(filename=logfile, level=logging.DEBUG) training_data = load_data(train_path) trainer = Trainer(config.load(configs)) trainer.train(training_data) model_directory = trainer.persist(model_path, project_name='current', fixed_model_name='nlu') result = run_evaluation(test_path, model_directory) predictions = result['intent_evaluation']['predictions'] for predict in predictions: print('{}:{}-{}'.format(predict['text'], predict['intent'], predict['confidence'])) print('Acc: {}'.format(result['intent_evaluation']['accuracy'])) print('F1 : {}'.format(result['intent_evaluation']['f1_score'])) print('Pre: {}'.format(result['intent_evaluation']['precision']))
def train_nlu(): from rasa_nlu.training_data import load_data from rasa_nlu import config from rasa_nlu.model import Trainer from rasa_nlu.convert import convert_training_data import convert convert.read_excel() convert_training_data('data/nlu.md', 'nludata/train_data_md.json', 'json', 'ay') training_data = load_data('nludata/') trainer = Trainer(config.load("nlu_model_config.yml")) trainer.train(training_data) model_directory = trainer.persist('models/nlu/', fixed_model_name="current") return model_directory
def __init__(self, training_data_file = "./data/shopping-list/rasa/shopping-list-small.json", config_file = "./config/shopping-list/config_spacy.json"): training_data = load_data(training_data_file) trainer = Trainer(RasaNLUConfig(config_file)) self.interpreter = trainer.train(training_data) self.shopping_list = {} # Create supported intents context = {'confidence_threshold':0.8} self.intents = { "greet" : HelloIntent(self, "greet", context), "add_item" : AddItemsIntent(self, "add_item", context), "clear_list": ClearListIntent(self, "clear_list", context), "show_items": ShowItemsIntent(self, "show_items", context), "_num_items": ShowStatsIntent(self, "_num_items", context) }
def startTraining(self): print("training") training_data = load_data(self.train_data_path) trainer = Trainer(config.load(self.config_path)) trainer.train(training_data) self.model_directory = trainer.persist('./projects/default/', fixed_model_name='Neo4jNlu') # if __name__ == '__main__': # training_data = "./nlu.json" # conf_path = "./nlu_config.yml" # train = trainNluModel(training_data, conf_path) # # #start training # train.startTraining()
def cross_validate(data: TrainingData, n_folds: int, nlu_config: Union[RasaNLUModelConfig, Text] ) -> CVEvaluationResult: """Stratified cross validation on data. Args: data: Training Data n_folds: integer, number of cv folds nlu_config: nlu config file Returns: dictionary with key, list structure, where each entry in list corresponds to the relevant result for one fold """ from collections import defaultdict import tempfile if isinstance(nlu_config, str): nlu_config = config.load(nlu_config) trainer = Trainer(nlu_config) train_results = defaultdict(list) test_results = defaultdict(list) entity_train_results = defaultdict(lambda: defaultdict(list)) entity_test_results = defaultdict(lambda: defaultdict(list)) tmp_dir = tempfile.mkdtemp() for train, test in generate_folds(n_folds, data): interpreter = trainer.train(train) # calculate train accuracy train_results = combine_intent_result(train_results, interpreter, train) test_results = combine_intent_result(test_results, interpreter, test) # calculate test accuracy entity_train_results = combine_entity_result(entity_train_results, interpreter, train) entity_test_results = combine_entity_result(entity_test_results, interpreter, test) shutil.rmtree(tmp_dir, ignore_errors=True) return (CVEvaluationResult(dict(train_results), dict(test_results)), CVEvaluationResult(dict(entity_train_results), dict(entity_test_results)))
def train_nlu_mode(nlu_train_file="/nlu_train_data/testData.json", nlu_config_file="/nlu_model/config_spacy.yml", nlu_persist_dir="/nlu_model", nlu_model_name="evanlu"): # will cache components between pipelines (where possible) builder = ComponentBuilder(use_cache=True) training_data = load_data(add_cur_dir(nlu_train_file)) #trainer = Trainer(RasaNLUModelConfig(add_cur_dir(nlu_config_file)), builder) trainer = Trainer(config.load(add_cur_dir(nlu_config_file)), builder) trainer.train(training_data) model_directory = trainer.persist(add_cur_dir(nlu_persist_dir), fixed_model_name=nlu_model_name) return model_directory pass
def train_nlu(): from rasa_nlu.training_data import load_data from rasa_nlu.model import Trainer from rasa_nlu import config configs = config.load("config/nlu_model_config.yml") project = configs.get("project") model = configs.get("fixed_model_name") path = configs.get("path") num_threads = configs.get('num_threads') nlu_data_path = str(configs.get("data")) training_data = load_data(nlu_data_path) trainer = Trainer(configs) trainer.train(training_data, num_threads=num_threads) model_directory = trainer.persist(path=path, project_name=project, fixed_model_name=model) return model_directory
def train_nlu(aggregated=False): """ Sets up training the NLU module. If aggregated is false then model is trained only with the training data. If it is true then it is trained with training+testing data. aggregated -- bool - whether we train with the aggregated (if True) or only training (if false) data """ from rasa_nlu.training_data import load_data from rasa_nlu import config from rasa_nlu.model import Trainer training_data = load_data( "data/" + ("train_rasa" if not aggregated else "aggregated") + ".json") trainer = Trainer(config.load("nlu_model_config.yml")) trainer.train(training_data) model_directory = trainer.persist('models/nlu/', fixed_model_name="current") return model_directory
def train_nlu(project='Lambton'): from rasa_nlu.training_data import load_data from rasa_nlu.config import RasaNLUModelConfig from rasa_nlu.model import Trainer from rasa_nlu import config training_data = load_data( 'Lambton/data/nuRobot-data.json') # + project + '/intents') print("*** Training data :" + str(training_data.intents)) trainer = Trainer(config.load('./NLU/config_spacy.json') ) # projects/' + project + '/config_spacy.yml')) print("*** Config :" + str(trainer.config)) trainer.train(training_data) #model_directory = trainer.persist('./NLU/models/default/' + project +'/', fixed_model_name='dialogue') model_directory = trainer.persist('./NLU/models/', fixed_model_name=project) return model_directory
def train_nlu_model(self): if(os.path.exists(self.model_directory)): return # loading the nlu training samples training_data = load_data(self.nlu_data_filename) # trainer to educate our pipeline trainer = Trainer(config.load(self.config_filename)) # train the model! self.interpreter = trainer.train(training_data) # store it for future use self.model_directory = trainer.persist(self.nlu_model_dirname, fixed_model_name=self.nlu_model_name) self.nlu_model = Interpreter.load(self.model_directory)
def train_nlu(project='Lambton'): from rasa_nlu.training_data import load_data from rasa_nlu.config import RasaNLUModelConfig from rasa_nlu.model import Trainer from rasa_nlu import config training_data = load_data('../Chatbots/projects/' + project + '/intents') #training_data = load_data('projects/' + project + '/intents') #trainer = Trainer(config.load('../Chatbots/projects/' + project + '/config_spacy_tensor.yml')) #trainer = Trainer(config.load('../Chatbots/projects/'+project+'/config_spacy.yml')) trainer = Trainer( config.load('../Chatbots/projects/' + project + '/config_mitie.yml')) #trainer = Trainer(config.load('../Chatbots/projects/' + project + '/config_tensorflow.yml')) trainer.train(training_data) model_directory = trainer.persist('../Chatbots/projects/' + project + '/models/nlu/', fixed_model_name="current") return model_directory
def auto_trainer(): print('\n\n') training_data = load_data('training_data.json') trainer = Trainer(RasaNLUConfig('config_mitie.json')) trainer.train(training_data) model_directory = trainer.persist('./models')+'/' files = os.listdir(model_directory) start = model_directory.find('model_') for file in files: client.upload_file(model_directory+file,'grassroot-nlu','models/'+model_directory[start:]+file) print('model upload successful') model = {'dir': model_directory} os.remove('training_data.json') accuracy_check(**model)
def run_cv_evaluation(td, n_folds, nlu_config): # type: (TrainingData, int, RasaNLUConfig) -> CVEvaluationResult """Stratified cross validation on data :param td: Training Data :param n_folds: integer, number of cv folds :param nlu_config: nlu config file :return: dictionary with key, list structure, where each entry in list corresponds to the relevant result for one fold """ from collections import defaultdict import tempfile trainer = Trainer(nlu_config) train_results = defaultdict(list) test_results = defaultdict(list) entity_train_results = defaultdict(lambda: defaultdict(list)) entity_test_results = defaultdict(lambda: defaultdict(list)) tmp_dir = tempfile.mkdtemp() for train, test in generate_folds(n_folds, td): trainer.train(train) model_dir = trainer.persist(tmp_dir) interpreter = Interpreter.load(model_dir, nlu_config) # calculate train accuracy train_results = combine_intent_result(train_results, interpreter, train) test_results = combine_intent_result(test_results, interpreter, test) # calculate test accuracy entity_train_results = combine_entity_result(entity_train_results, interpreter, train) entity_test_results = combine_entity_result(entity_test_results, interpreter, test) utils.remove_model(model_dir) os.rmdir(os.path.join(tmp_dir, "default")) os.rmdir(tmp_dir) return (CVEvaluationResult(dict(train_results), dict(test_results)), CVEvaluationResult(dict(entity_train_results), dict(entity_test_results)))
def train_nlu(self, force=False): if self.disable_nlu != "yes": print("TRY NLU TRAIN {} {} {}".format(force, self.isNluModified(), self.isNluModelMissing())) if (force or self.isNluModified() or self.isNluModelMissing()): print("NLU TRAIN {} {} {}".format(force, self.isNluModified(), self.isNluModelMissing())) from rasa_nlu.converters import load_data from rasa_nlu.config import RasaNLUConfig from rasa_nlu.model import Trainer training_data = load_data(self.nlu_training_file) trainer = Trainer(RasaNLUConfig(self.config_file)) trainer.train(training_data) #model_directory = trainer.persist('models/nlu/', fixed_model_name="current") model_directory = trainer.persist(self.nlu_model_path, fixed_model_name="current") #self.core_model_modified=self.getCoreModelModified() return model_directory