loading_timer = tm.time() schema = Schema(model_args.schema_path, None) data_generator = get_data_generator(args, model_args, schema) mappings = data_generator.mappings if args.vocab_only: import sys; sys.exit() if args.verbose: print("Finished loading and pre-processing data, took {:.1f} seconds".format(tm.time() - loading_timer)) # TODO: load from checkpoint ckpt = None # Build the model model = build_model(model_args, args, mappings, ckpt, model_path=args.agent_checkpoint) tally_parameters(model) create_path(args.model_path) config_path = os.path.join(args.model_path, 'config.json') write_json(vars(args), config_path) builder = UtteranceBuilder(mappings['tgt_vocab'], 1, has_tgt=True) # Build optimizer and trainer optim = build_optim(args, model, ckpt) # vocab is used to make_loss, so use target vocab trainer = build_trainer(args, model, mappings['tgt_vocab'], optim) trainer.builder = builder # Perform actual training trainer.learn(args, data_generator, report_func)
examples = read_examples(args.transcripts, args.max_examples, Scenario) parsed_dialogues = [] templates = Templates() lexicon = Lexicon(['ball', 'hat', 'book']) for example in examples: utterances = parse_example(example, lexicon, templates) parsed_dialogues.append(utterances) templates.finalize() templates.save(args.templates_output) templates.dump(n=10) if args.transcripts_output: write_json([e.to_dict() for e in examples], args.transcripts_output) # Train n-gram model sequences = [] for d in parsed_dialogues: sequences.append([u.lf.intent for u in d]) manager = Manager.from_train(sequences) manager.save(args.model_output) generator = Generator(templates) action = manager.choose_action(None, context=('<start>', '<start>')) print action print generator.retrieve('<start>', context_tag='<start>', tag=action).template