def test_training_data_conversion(tmpdir, data_file, gold_standard_file, output_format): out_path = tmpdir.join("mynlu_data.json") convert_training_data(data_file, out_path.strpath, output_format) td = load_data(out_path.strpath) assert td.entity_examples != [] assert td.intent_examples != [] gold_standard = load_data(gold_standard_file) cmp_message_list(td.entity_examples, gold_standard.entity_examples) cmp_message_list(td.intent_examples, gold_standard.intent_examples) assert td.entity_synonyms == gold_standard.entity_synonyms # converting the converted file back to original file format and performing the same tests rto_path = tmpdir.join("data_in_original_format.txt") convert_training_data(out_path.strpath, rto_path.strpath, 'json') rto = load_data(rto_path.strpath) cmp_message_list(gold_standard.entity_examples, rto.entity_examples) cmp_message_list(gold_standard.intent_examples, rto.intent_examples) assert gold_standard.entity_synonyms == rto.entity_synonyms
def train(cfg_name, model_name): from mynlu.cli.train import create_persistor from mynlu.converters import load_data config = MyNLUConfig(cfg_name) trainer = Trainer(config) training_data = load_data(config['data']) trainer.train(training_data) persistor = create_persistor(config) trainer.persist("test_models", persistor, model_name=model_name)
def test_markdown_data(): td = load_data('data/examples/mynlu/demo-mynlu.md') assert len(td.sorted_entity_examples()) >= len( [e for e in td.entity_examples if e.get("entities")]) assert len(td.sorted_intent_examples()) == len(td.intent_examples) assert td.entity_synonyms == { u'Chines': u'chinese', u'Chinese': u'chinese', u'chines': u'chinese', u'vegg': u'vegetarian', u'veggie': u'vegetarian' }
def test_entities_synonyms(): data = u""" { "mynlu_data": { "entity_synonyms": [ { "value": "nyc", "synonyms": ["New York City", "nyc", "the big apple"] } ], "common_examples" : [ { "text": "show me flights to New York City", "intent": "unk", "entities": [ { "entity": "destination", "start": 19, "end": 32, "value": "NYC" } ] }, { "text": "show me flights to nyc", "intent": "unk", "entities": [ { "entity": "destination", "start": 19, "end": 22, "value": "nyc" } ] } ] } }""" with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f: f.write(data.encode("utf-8")) f.flush() td = load_data(f.name) assert td.entity_synonyms["New York City"] == "nyc"
"used to insert example messages into the graph") return parser if __name__ == '__main__': parser = create_argparser() args = parser.parse_args() logging.basicConfig(level="DEBUG") domain = TemplateDomain.load(args.domain) story_steps = StoryFileReader.read_from_file(args.stories, domain) # this is optional, only needed if the `_greet` type of # messages in the stories should be replaced with actual # messages (e.g. `hello`) if args.nlu_data is not None: from mynlu.converters import load_data nlu_data = load_data(args.nlu_data) else: nlu_data = None logger.info("Starting to visualize stories...") visualize_stories(story_steps, args.output, args.max_history, training_data=nlu_data) logger.info("Finished graph creation. Saved into {}".format( os.path.abspath(args.output)))
def test_api_data(): td = load_data('data/examples/api/') assert td.entity_examples != [] assert td.intent_examples != [] assert td.entity_synonyms != {}
def test_wit_data(): td = load_data('data/examples/wit/demo-flights.json') assert td.entity_examples != [] assert td.intent_examples != [] assert td.entity_synonyms == {}
def test_luis_data(): td = load_data('data/examples/luis/demo-restaurants.json') assert td.entity_examples != [] assert td.intent_examples != [] assert td.entity_synonyms == {}