async def test_random_seed(component_builder, tmpdir): """test if train result is the same for two runs of tf embedding""" _config = utilities.base_test_conf("supervised_embeddings") # set fixed random seed of the embedding intent classifier to 1 _config.set_component_attr(6, random_seed=1) # first run (trained_a, _, persisted_path_a) = await train( _config, path=tmpdir.strpath + "_a", data=DEFAULT_DATA_PATH, component_builder=component_builder, ) # second run (trained_b, _, persisted_path_b) = await train( _config, path=tmpdir.strpath + "_b", data=DEFAULT_DATA_PATH, component_builder=component_builder, ) loaded_a = Interpreter.load(persisted_path_a, component_builder) loaded_b = Interpreter.load(persisted_path_b, component_builder) result_a = loaded_a.parse("hello")["intent"]["confidence"] result_b = loaded_b.parse("hello")["intent"]["confidence"] assert result_a == result_b
def __init__(self, **kwargs): model_path = kwargs.get("model", None) try: if model_path is not None: logger.info("Loading model " + model_path) if os.path.isdir(model_path): self.interpreter = Interpreter.load(model_path) elif model_path.endswith(".tar.gz"): model = unpack_model(model_path) if os.path.isdir(model + "/nlu"): self.interpreter = Interpreter.load(model + "/nlu") else: self.interpreter = Interpreter.load(model) self.interpreter.parse("ok") """Cleanup tmp files and directories""" try : if model is not None: remove_file_or_dir(model) tmp_dir = "/tmp/" for root, dirs, files in os.walk(tmp_dir): for file in files: if file.startswith("tmp") and file.endswith(".py"): remove_file_or_dir(tmp_dir+file) except Exception as e: logger.error("Exception when cleanup tmp files and directories.", exc_info=True) else: self.interpreter = None except Exception as e: logger.error("Error when loading model {0}, exception {1}".format(model_path, e), exc_info=True) remove_file_or_dir(model_path) raise
def on_message_user_msg(client, userdata, msg): global nlpUp global interpreter global client_response global loading_model global heartbeat_thread global current_model data = msg.payload.decode() if data.find("NLP Up") != -1: if not loading_model: logging.info("User Message: " + data) model = data[data.find(",")+1:] if current_model != model: current_model = model nlpUp = False loading_model = True logging.info("Loading " + model + " model now...") interpreter = Interpreter.load('/nlp_module/models/' + model) logging.info("Done Loading nlu model!") nlpUp = True loading_model = False if heartbeat_thread is None: heartbeat_thread = threading.Thread(target=heartbeat_func, daemon=True) heartbeat_thread.start() else: logging.info("Data: " + data) intent = interpreter.parse(data) intent_data = {"topic" : intent['intent']['name'], "confidence" : intent['intent']['confidence'], "entities" : intent['entities']} intent_data = json.dumps(intent_data) logging.info("Intent: " + str(intent_data)) client_response.publish("cait/nlpResponse", intent_data, qos=0)
def load_interpreter(model_path): tar = tarfile.open(model_path, "r:gz") tar.extractall(path="./models/unpack") tar.close() interpreter = Interpreter.load("./models/unpack/nlu") shutil.rmtree("./models/unpack") return interpreter
def predict(domain, locale, userUtterance): modelFile = os.path.join(scriptDir, '..', '..', 'models', 'nlu') global dataFile dataFile = os.path.join(scriptDir, '..', '..', '..', 'trainingData', 'intents', domain + '_' + locale + '.md') MODEL_NAME = domain + '_' + locale interpreter = Interpreter.load(os.path.join(modelFile, MODEL_NAME)) data = interpreter.parse(userUtterance) intent_, score_, utterance_ = [], [], [] intent_.append(data['intent_ranking'][0]['name']) intent_.append(data['intent_ranking'][1]['name']) intent_.append(data['intent_ranking'][2]['name']) score_.append("{:.2f}".format(data['intent_ranking'][0]['confidence'])) score_.append("{:.2f}".format(data['intent_ranking'][1]['confidence'])) score_.append("{:.2f}".format(data['intent_ranking'][2]['confidence'])) utterance_.append(getUtterance(intent_[0])) utterance_.append(getUtterance(intent_[1])) utterance_.append(getUtterance(intent_[2])) entities_ = data['entities'] text_ = data['text'] intent_ranking_ = [{ "name": p, "confidence": q, "utterance": r } for p, q, r in zip(intent_, score_, utterance_)] intent_top_ = {"name": intent_[0], "confidence": score_[0]} # build JSON response response = {} response['intent'] = intent_top_ response['entities'] = entities_ response['intent_ranking'] = intent_ranking_ response['text'] = text_ log_util.loginfomsg(f"[PREDICT_NLU] prediction: {response}") result = str(response).replace("'", '"').strip() return result
async def test_train_persist_load_with_composite_entities( component_builder: ComponentBuilder, tmp_path: Path ): pipeline = pipeline_from_components("WhitespaceTokenizer", "CRFEntityExtractor") _config = RasaNLUModelConfig({"pipeline": pipeline, "language": "en"}) (trainer, trained, persisted_path) = await train( _config, path=str(tmp_path), data="../../../data/test/demo-rasa-composite-entities.md", component_builder=component_builder, ) assert trainer.pipeline assert trained.pipeline loaded = Interpreter.load(persisted_path, component_builder) assert loaded.pipeline # text = "I am looking for an italian restaurant" text = "i'm looking for a flight to Berlin" print("--------------------------------------------------") print(trained.parse(text)) print("++++++++++++++++++++++++++++++++++++++++++++++++++") print("++++++++++++++++++++++++++++++++++++++++++++++++++") print(loaded.parse(text)) print("--------------------------------------------------")
def test_parse(): # model = get_validated_path(args.model, "model", DEFAULT_MODELS_PATH) model = get_validated_path(None, "model", DEFAULT_MODELS_PATH) model_path = get_model(model) if not model_path: print_error("No model found. Train a model before running the " "server using `rasa train nlu`.") exit(1) _, nlu_model = get_model_subdirectories(model_path) if not os.path.exists(nlu_model): print_error("No NLU model found. Train a model before running the " "server using `rasa train nlu`.") exit(1) # input shell # rasa.nlu.run.run_cmdline(nlu_model) print("model_path is {},nlu_model is {}".format(model_path, nlu_model)) print("please input your text to parse") # message = input().strip() # message = "这款衣服有货吗" message = "身高170体重140" interpreter = Interpreter.load(nlu_model, component_builder) result = interpreter.parse(message) print(json.dumps(result, indent=2))
def test(): test_data = '/home/li/code/en_intent/en_intent/en_intent/599109455/test.txt' test_list = [] with open(test_data, encoding="utf-8") as f: lines = f.readlines() for line in tqdm(lines): if not line: continue if line and len(line.split('\t')) > 1: text, label = line.split('\t')[0], line.split('\t')[1] test_list.append((text.strip(), label.strip())) import numpy as np # test_list = np.random.permutation(test_list) model_dir = './model/raw_data-dcnn/nlu/' os.environ["CUDA_VISIBLE_DEVICES"] = "0" interpreter = Interpreter.load(model_dir) # 载入训练后的模型,定义预测器Interpreter对象 corr_num = 0 # test_list = test_list all_num = len(test_list) print_num = 0 for i in tqdm(range(len(test_list))): text, label = test_list[i] print_num += 1 if print_num % 200 == 0: print(print_num) result = interpreter.parse(text) intent = result.get('intent') if intent.get('name') == str(label): corr_num += 1 print(1.0 * corr_num / all_num)
async def test_train_model_no_training_data_persisted(component_builder, tmpdir, nlu_as_json_path: Text): _config = RasaNLUModelConfig({ "pipeline": [{ "name": "KeywordIntentClassifier" }], "language": "en" }) (trained, _, persisted_path) = await rasa.nlu.train.train( _config, path=tmpdir.strpath, data=nlu_as_json_path, component_builder=component_builder, persist_nlu_training_data=False, ) assert trained.pipeline loaded = Interpreter.load(persisted_path, component_builder) assert loaded.pipeline assert loaded.model_metadata.get("training_data") is None
def interpret_message(self, user_input): try: def get_model_path(): m_path = f'{self.model_folder}/{u.tag_registration_form}' json_file = 'metadata.json' found = False while not found: if path.exists(f'{m_path}/{json_file}'): found = True else: m_path = f'{m_path}/{u.tag_registration_form}' return m_path if not self.model_path_found: self.model_path = get_model_path() self.model_path_found = True interpreter = Interpreter.load(self.model_path) latest_message = interpreter.parse(user_input) if u.DEBUG: print( f"interpret_message - message: \n{latest_message['intent']}" ) return latest_message except: print( "A problem occured while a registration form bot tries to interprete the input <<{}>>" .format(user_input)) raise Exception
async def test_train_persist_load_with_composite_entities( classifier_params, component_builder, tmpdir ): pipeline = as_pipeline( "WhitespaceTokenizer", "CountVectorsFeaturizer", "DIETClassifier" ) assert pipeline[2]["name"] == "DIETClassifier" pipeline[2].update(classifier_params) _config = RasaNLUModelConfig({"pipeline": pipeline, "language": "en"}) (trainer, trained, persisted_path) = await train( _config, path=tmpdir.strpath, data="data/test/demo-rasa-composite-entities.md", component_builder=component_builder, ) assert trainer.pipeline assert trained.pipeline loaded = Interpreter.load(persisted_path, component_builder) assert loaded.pipeline text = "I am looking for an italian restaurant" assert loaded.parse(text) == trained.parse(text)
async def interpreter_for(component_builder, data, path, config): (trained, _, path) = await train(config, data, path, component_builder=component_builder) interpreter = Interpreter.load(path, component_builder) return interpreter
def run_evaluation( data_path: Text, model_path: Text, out_directory: Optional[Text] = None, report: Optional[Text] = None, successes: Optional[Text] = None, errors: Optional[Text] = "errors.json", confmat: Optional[Text] = None, histogram: Optional[Text] = None, component_builder: Optional[ComponentBuilder] = None, ) -> Dict: # pragma: no cover """ Evaluate intent classification and entity extraction. :param data_path: path to the test data :param model_path: path to the model :param out_directory: path to folder where all output will be stored :param report: path to folder where reports are stored :param successes: path to file that will contain success cases :param errors: path to file that will contain error cases :param confmat: path to file that will show the confusion matrix :param histogram: path fo file that will show a histogram :param component_builder: component builder :return: dictionary containing evaluation results """ # get the metadata config from the package data interpreter = Interpreter.load(model_path, component_builder) interpreter.pipeline = remove_pretrained_extractors(interpreter.pipeline) test_data = training_data.load_data(data_path, interpreter.model_metadata.language) result = { "intent_evaluation": None, "entity_evaluation": None, } # type: Dict[Text, Optional[Dict]] if report: if out_directory: report = os.path.join(out_directory, report) io_utils.create_directory(report) intent_results, entity_results = get_eval_data(interpreter, test_data) if intent_results: logger.info("Intent evaluation results:") result["intent_evaluation"] = evaluate_intents(intent_results, report, successes, errors, confmat, histogram, out_directory) if entity_results: logger.info("Entity evaluation results:") extractors = get_entity_extractors(interpreter) result["entity_evaluation"] = evaluate_entities( entity_results, extractors, report, out_directory) return result
async def _train_persist_load_with_different_settings( pipeline: List[Dict[Text, Any]], component_builder: ComponentBuilder, tmp_path: Path, should_finetune: bool, ): _config = RasaNLUModelConfig({"pipeline": pipeline, "language": "en"}) (trainer, trained, persisted_path) = await rasa.nlu.train.train( _config, path=str(tmp_path), data="data/examples/rasa/demo-rasa-multi-intent.yml", component_builder=component_builder, ) assert trainer.pipeline assert trained.pipeline loaded = Interpreter.load( persisted_path, component_builder, new_config=_config if should_finetune else None, ) assert loaded.pipeline assert loaded.parse("Rasa is great!") == trained.parse("Rasa is great!")
async def test_train_persist_load_with_composite_entities( component_builder: ComponentBuilder, tmp_path: Path): pipeline = [{ "name": "WhitespaceTokenizer" }, { "name": "CRFEntityExtractor" }] _config = RasaNLUModelConfig({"pipeline": pipeline, "language": "en"}) (trainer, trained, persisted_path) = await rasa.nlu.train.train( _config, path=str(tmp_path), data="data/test/demo-rasa-composite-entities.yml", component_builder=component_builder, ) assert trainer.pipeline assert trained.pipeline loaded = Interpreter.load(persisted_path, component_builder) assert loaded.pipeline text = "I am looking for an italian restaurant" assert loaded.parse(text) == trained.parse(text)
async def test_train_persist_with_different_configurations( config_params: Dict[Text, Any], component_builder: ComponentBuilder, tmp_path: Path ): pipeline = pipeline_from_components( "SpacyNLP", "SpacyTokenizer", "CRFEntityExtractor" ) assert pipeline[2]["name"] == "CRFEntityExtractor" pipeline[2].update(config_params) _config = RasaNLUModelConfig({"pipeline": pipeline, "language": "en"}) (trainer, trained, persisted_path) = await train( _config, path=str(tmp_path), data="data/examples/rasa", component_builder=component_builder, ) assert trainer.pipeline assert trained.pipeline loaded = Interpreter.load(persisted_path, component_builder) assert loaded.pipeline text = "I am looking for an italian restaurant" assert loaded.parse(text) == trained.parse(text) detected_entities = loaded.parse(text).get(ENTITIES) assert len(detected_entities) == 1 assert detected_entities[0]["entity"] == "cuisine" assert detected_entities[0]["value"] == "italian"
def run_evaluation( data_path, model, report_folder=None, successes_filename=None, errors_filename="errors.json", confmat_filename=None, intent_hist_filename=None, component_builder=None, ) -> Dict: # pragma: no cover """Evaluate intent classification and entity extraction.""" # get the metadata config from the package data if isinstance(model, Interpreter): interpreter = model else: interpreter = Interpreter.load(model, component_builder) test_data = training_data.load_data(data_path, interpreter.model_metadata.language) extractors = get_entity_extractors(interpreter) if is_intent_classifier_present(interpreter): intent_targets = get_intent_targets(test_data) else: intent_targets = [None] * test_data.training_examples intent_results, entity_predictions, tokens = get_predictions( interpreter, test_data, intent_targets) if duckling_extractors.intersection(extractors): entity_predictions = remove_duckling_entities(entity_predictions) extractors = remove_duckling_extractors(extractors) result = {"intent_evaluation": None, "entity_evaluation": None} if report_folder: utils.create_dir(report_folder) if is_intent_classifier_present(interpreter): logger.info("Intent evaluation results:") result["intent_evaluation"] = evaluate_intents( intent_results, report_folder, successes_filename, errors_filename, confmat_filename, intent_hist_filename, ) if extractors: entity_targets = get_entity_targets(test_data) logger.info("Entity evaluation results:") result["entity_evaluation"] = evaluate_entities( entity_targets, entity_predictions, tokens, extractors, report_folder) return result
async def test_eval_data( component_builder: ComponentBuilder, tmp_path: Path, project: Text, unpacked_trained_rasa_model: Text, ): config_path = os.path.join(project, "config.yml") data_importer = TrainingDataImporter.load_nlu_importer_from_config( config_path, training_data_paths=[ "data/examples/rasa/demo-rasa.yml", "data/examples/rasa/demo-rasa-responses.yml", ], ) _, nlu_model_directory = rasa.model.get_model_subdirectories( unpacked_trained_rasa_model ) interpreter = Interpreter.load(nlu_model_directory, component_builder) data = await data_importer.get_nlu_data() (intent_results, response_selection_results, entity_results) = get_eval_data( interpreter, data ) assert len(intent_results) == 46 assert len(response_selection_results) == 0 assert len(entity_results) == 46
async def _nlu_model_for_finetuning( model_to_finetune: Text, file_importer: TrainingDataImporter, finetuning_epoch_fraction: float = 1.0, called_from_combined_training: bool = False, ) -> Optional[Interpreter]: path_to_archive = model.get_model_for_finetuning(model_to_finetune) if not path_to_archive: return None rasa.shared.utils.cli.print_info( f"Loading NLU model from {path_to_archive} for finetuning...", ) with model.unpack_model(path_to_archive) as unpacked: _, old_nlu = model.get_model_subdirectories(unpacked) new_fingerprint = await model.model_fingerprint(file_importer) old_fingerprint = model.fingerprint_from_path(unpacked) if not model.can_finetune( old_fingerprint, new_fingerprint, nlu=True, core=called_from_combined_training, ): rasa.shared.utils.cli.print_error_and_exit( "NLU model can not be finetuned.") config = await file_importer.get_config() model_to_finetune = Interpreter.load( old_nlu, new_config=config, finetuning_epoch_fraction=finetuning_epoch_fraction, ) if not model_to_finetune: return None return model_to_finetune
async def test_softmax_normalization( component_builder, tmpdir, classifier_params, data_path, output_length, output_should_sum_to_1, ): pipeline = as_pipeline("WhitespaceTokenizer", "CountVectorsFeaturizer", "EmbeddingIntentClassifier") assert pipeline[2]["name"] == "EmbeddingIntentClassifier" pipeline[2].update(classifier_params) _config = RasaNLUModelConfig({"pipeline": pipeline}) (trained_model, _, persisted_path) = await train( _config, path=tmpdir.strpath, data=data_path, component_builder=component_builder, ) loaded = Interpreter.load(persisted_path, component_builder) parse_data = loaded.parse("hello") intent_ranking = parse_data.get("intent_ranking") # check that the output was correctly truncated after normalization assert len(intent_ranking) == output_length # check whether normalization had the expected effect output_sums_to_1 = sum([ intent.get("confidence") for intent in intent_ranking ]) == pytest.approx(1) assert output_sums_to_1 == output_should_sum_to_1 # check whether the normalization of rankings is reflected in intent prediction assert parse_data.get("intent") == intent_ranking[0]
async def test_margin_loss_is_not_normalized(monkeypatch, component_builder, tmpdir, classifier_params, output_length): pipeline = as_pipeline("WhitespaceTokenizer", "CountVectorsFeaturizer", "EmbeddingIntentClassifier") assert pipeline[2]["name"] == "EmbeddingIntentClassifier" pipeline[2].update(classifier_params) mock = Mock() monkeypatch.setattr(train_utils, "normalize", mock.normalize) _config = RasaNLUModelConfig({"pipeline": pipeline}) (trained_model, _, persisted_path) = await train( _config, path=tmpdir.strpath, data="data/test/many_intents.md", component_builder=component_builder, ) loaded = Interpreter.load(persisted_path, component_builder) parse_data = loaded.parse("hello") intent_ranking = parse_data.get("intent_ranking") # check that the output was not normalized mock.normalize.assert_not_called() # check that the output was correctly truncated assert len(intent_ranking) == output_length # make sure top ranking is reflected in intent prediction assert parse_data.get("intent") == intent_ranking[0]
async def test_train(component_builder, tmpdir): pipeline = [ { "name": "ConveRTTokenizer" }, { "name": "CountVectorsFeaturizer" }, { "name": "ConveRTFeaturizer" }, { "name": "EmbeddingIntentClassifier" }, ] _config = RasaNLUModelConfig({"pipeline": pipeline, "language": "en"}) (trained, _, persisted_path) = await train( _config, path=tmpdir.strpath, data=DEFAULT_DATA_PATH, component_builder=component_builder, ) assert trained.pipeline loaded = Interpreter.load(persisted_path, component_builder) assert loaded.pipeline assert loaded.parse("hello") is not None assert loaded.parse("Hello today is Monday, again!") is not None
async def test_train(component_builder, tmpdir): pipeline = [ { "name": "ConveRTTokenizer", "intent_tokenization_flag": True, "intent_split_symbol": "+", }, { "name": "CountVectorsFeaturizer" }, { "name": "ConveRTFeaturizer" }, { "name": "EmbeddingIntentClassifier" }, ] _config = RasaNLUModelConfig({"pipeline": pipeline, "language": "en"}) (trained, _, persisted_path) = await train( _config, path=tmpdir.strpath, data="data/examples/rasa/demo-rasa-multi-intent.md", component_builder=component_builder, ) assert trained.pipeline loaded = Interpreter.load(persisted_path, component_builder) assert loaded.pipeline assert loaded.parse("hello") is not None assert loaded.parse("Hello today is Monday, again!") is not None
async def test_train_persist_load_with_composite_entities( classifier_params, component_builder, tmpdir): # pipeline = as_pipeline( # "WhitespaceTokenizer", "CountVectorsFeaturizer", "DIETClassifier" # ) tmpdir.strpath = "../models" pipeline = as_pipeline("MitieNLP", "JiebaTokenizer", "MitieEntityExtractor", "MitieFeaturizer", "SklearnIntentClassifier") assert pipeline[4]["name"] == "SklearnIntentClassifier" pipeline[4].update(classifier_params) _config = RasaNLUModelConfig({"pipeline": pipeline, "language": "cn"}) (trainer, trained, persisted_path) = await train( _config, path=tmpdir.strpath, data="../../../data/test/demo-rasa-zh.json", component_builder=component_builder, ) assert trainer.pipeline assert trained.pipeline loaded = Interpreter.load(persisted_path, component_builder) assert loaded.pipeline text = "感冒了怎么办" print("--------------------------------------------------") print(trained.parse(text)) print("++++++++++++++++++++++++++++++++++++++++++++++++++") print("++++++++++++++++++++++++++++++++++++++++++++++++++") print(loaded.parse(text)) print("--------------------------------------------------") assert loaded.parse(text) == trained.parse(text)
def rasa_result(): comment = request.get_data() json_data = json.loads(comment.decode()) text = json_data["q"] model_path = model_path1 + json_data["model"] loaded = Interpreter.load(model_path, component_builder) print("++++++++++++++++++++++++++++++++++++++++++++++++++") return loaded.parse(text)
def test_load_and_persist_without_train(language, pipeline, component_builder, tmpdir): _config = RasaNLUModelConfig({"pipeline": pipeline, "language": language}) trainer = Trainer(_config, component_builder) persistor = create_persistor(_config) persisted_path = trainer.persist(tmpdir.strpath, persistor) loaded = Interpreter.load(persisted_path, component_builder) assert loaded.pipeline assert loaded.parse("hello") is not None assert loaded.parse("Hello today is Monday, again!") is not None
def run_cmdline(model_path, component_builder=None): interpreter = Interpreter.load(model_path, component_builder) logger.info("NLU model loaded. Type a message and press enter to parse it.") while True: text = input().strip() r = interpreter.parse(text) print (json.dumps(r, indent=2)) logger.info("Next message:")
def __init__(self): self.lac = hub.Module(name="lac") self.confirm_interpreter = Interpreter.load( "intent/{}/nlu".format(confirm_model_name)) self.senta_gru = hub.Module(name="senta_gru") self.db_obj = Database(database_address, database_name) self.intent_model = judge_intent.Intent(intent_model_name) self.user_dict = {} self.user_timeout_recoder = {}
def _load_model(self, model_name): model_path = os.path.join("models", model_name) tempdir = tempfile.mkdtemp() unpacked_model = unpack_model(model_path, tempdir) _, nlu_model = get_model_subdirectories(unpacked_model) with self.lock: interpreter = Interpreter.load(nlu_model, self.component_builder) return interpreter
async def test_set_random_seed(component_builder, tmpdir, nlu_as_json_path: Text): """test if train result is the same for two runs of tf embedding""" # set fixed random seed _config = RasaNLUModelConfig({ "pipeline": [ { "name": "WhitespaceTokenizer" }, { "name": "CountVectorsFeaturizer" }, { "name": "DIETClassifier", RANDOM_SEED: 1, EPOCHS: 1 }, ], "language": "en", }) # first run (trained_a, _, persisted_path_a) = await rasa.nlu.train.train( _config, path=tmpdir.strpath + "_a", data=nlu_as_json_path, component_builder=component_builder, ) # second run (trained_b, _, persisted_path_b) = await rasa.nlu.train.train( _config, path=tmpdir.strpath + "_b", data=nlu_as_json_path, component_builder=component_builder, ) loaded_a = Interpreter.load(persisted_path_a, component_builder) loaded_b = Interpreter.load(persisted_path_b, component_builder) result_a = loaded_a.parse("hello")["intent"]["confidence"] result_b = loaded_b.parse("hello")["intent"]["confidence"] assert result_a == result_b