def test_training_data_conversion(tmpdir, data_file, gold_standard_file,
                                  output_format):
    out_path = tmpdir.join("mynlu_data.json")
    convert_training_data(data_file, out_path.strpath, output_format)
    td = load_data(out_path.strpath)
    assert td.entity_examples != []
    assert td.intent_examples != []

    gold_standard = load_data(gold_standard_file)
    cmp_message_list(td.entity_examples, gold_standard.entity_examples)
    cmp_message_list(td.intent_examples, gold_standard.intent_examples)
    assert td.entity_synonyms == gold_standard.entity_synonyms

    # converting the converted file back to original file format and performing the same tests
    rto_path = tmpdir.join("data_in_original_format.txt")
    convert_training_data(out_path.strpath, rto_path.strpath, 'json')
    rto = load_data(rto_path.strpath)
    cmp_message_list(gold_standard.entity_examples, rto.entity_examples)
    cmp_message_list(gold_standard.intent_examples, rto.intent_examples)
    assert gold_standard.entity_synonyms == rto.entity_synonyms
Exemplo n.º 2
0
    def train(cfg_name, model_name):
        from mynlu.cli.train import create_persistor
        from mynlu.converters import load_data

        config = MyNLUConfig(cfg_name)
        trainer = Trainer(config)
        training_data = load_data(config['data'])

        trainer.train(training_data)
        persistor = create_persistor(config)
        trainer.persist("test_models", persistor, model_name=model_name)
def test_markdown_data():
    td = load_data('data/examples/mynlu/demo-mynlu.md')
    assert len(td.sorted_entity_examples()) >= len(
        [e for e in td.entity_examples if e.get("entities")])
    assert len(td.sorted_intent_examples()) == len(td.intent_examples)
    assert td.entity_synonyms == {
        u'Chines': u'chinese',
        u'Chinese': u'chinese',
        u'chines': u'chinese',
        u'vegg': u'vegetarian',
        u'veggie': u'vegetarian'
    }
def test_entities_synonyms():
    data = u"""
{
  "mynlu_data": {
    "entity_synonyms": [
      {
        "value": "nyc",
        "synonyms": ["New York City", "nyc", "the big apple"]
      }
    ],
    "common_examples" : [
      {
        "text": "show me flights to New York City",
        "intent": "unk",
        "entities": [
          {
            "entity": "destination",
            "start": 19,
            "end": 32,
            "value": "NYC"
          }
        ]
      },
      {
        "text": "show me flights to nyc",
        "intent": "unk",
        "entities": [
          {
            "entity": "destination",
            "start": 19,
            "end": 22,
            "value": "nyc"
          }
        ]
      }
    ]
  }
}"""
    with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f:
        f.write(data.encode("utf-8"))
        f.flush()
        td = load_data(f.name)
        assert td.entity_synonyms["New York City"] == "nyc"
                        "used to insert example messages into the graph")
    return parser


if __name__ == '__main__':
    parser = create_argparser()
    args = parser.parse_args()
    logging.basicConfig(level="DEBUG")

    domain = TemplateDomain.load(args.domain)
    story_steps = StoryFileReader.read_from_file(args.stories, domain)

    # this is optional, only needed if the `_greet` type of
    # messages in the stories should be replaced with actual
    # messages (e.g. `hello`)
    if args.nlu_data is not None:
        from mynlu.converters import load_data

        nlu_data = load_data(args.nlu_data)
    else:
        nlu_data = None

    logger.info("Starting to visualize stories...")
    visualize_stories(story_steps,
                      args.output,
                      args.max_history,
                      training_data=nlu_data)

    logger.info("Finished graph creation. Saved into {}".format(
        os.path.abspath(args.output)))
def test_api_data():
    td = load_data('data/examples/api/')
    assert td.entity_examples != []
    assert td.intent_examples != []
    assert td.entity_synonyms != {}
def test_wit_data():
    td = load_data('data/examples/wit/demo-flights.json')
    assert td.entity_examples != []
    assert td.intent_examples != []
    assert td.entity_synonyms == {}
def test_luis_data():
    td = load_data('data/examples/luis/demo-restaurants.json')
    assert td.entity_examples != []
    assert td.intent_examples != []
    assert td.entity_synonyms == {}