def main(): parser = argparse.ArgumentParser( "Train Viggo transformer model (ls=inc freq, da=base+phrases)") parser.add_argument("output_dir", type=Path, help="save directory") parser.add_argument("--seed", type=int, default=234222452) parser.add_argument("--layers", default=2, type=int, help="num layers") parser.add_argument("--ls", default=0.1, type=float, help="label smoothing") parser.add_argument("--wd", default=0.0, type=float, help="weight decay") parser.add_argument("--opt", choices=["adam", "sgd"], default="sgd", help='optimizer') parser.add_argument("--lr", default=0.5, type=float, help="learning rate") parser.add_argument("--tie-embeddings", action="store_true", help="share decoder input/output embeddings") parser.add_argument("--attn", choices=['bahdanau', 'luong-general'], default='luong-general', help='attention type') parser.add_argument("--max-epochs", default=100, type=int, help="max training epochs") parser.add_argument("--gpu", default=-1, type=int) parser.add_argument("--n-procs", default=2, type=int, help="num data loader processes") parser.add_argument("--tr-batch-size", default=128, type=int, help="training batch size") parser.add_argument("--va-batch-size", default=1, type=int, help="valid batch size") args = parser.parse_args() seeds(args.seed) assert os.getenv("MRT_EVAL_SCRIPT") is not None eval_script = os.getenv("MRT_EVAL_SCRIPT") dataset = "E2E" lin_strat = "inc_freq" is_delex = False mr_vcb, utt_vcb = setup_vocab(dataset, lin_strat, is_delex) tr_ds = setup_training_data(dataset, lin_strat, is_delex, mr_vcb, utt_vcb, include_phrases=True) va_ds = setup_validation_data(dataset, lin_strat, is_delex, mr_vcb, utt_vcb) tr_batches = make_batches(tr_ds, args.tr_batch_size, args.n_procs, lin_strat, is_delex) va_batches = make_batches(va_ds, args.va_batch_size, args.n_procs, lin_strat, is_delex) model = setup_model("gru", "bi", args.layers, args.attn, args.tie_embeddings, mr_vcb, utt_vcb, beam_size=8) trainer = setup_trainer(model, args.opt, args.lr, args.wd, args.ls, tr_batches, va_batches, args.max_epochs, utt_vcb, eval_script, mr_utils, lambda: f"E2E/bi-gru/if/base+phrases/{args.seed}") env = {'proj_dir': args.output_dir, "gpu": args.gpu} trainer.run(env, verbose=True)
def main(): parser = argparse.ArgumentParser( "Train Viggo transformer model (ls=inc freq, da=base+phrases)") parser.add_argument("output_dir", type=Path, help="save directory") parser.add_argument("--seed", type=int, default=234222452) parser.add_argument("--layers", default=2, type=int, help="num layers") parser.add_argument("--ls", default=0.1, type=float, help="label smoothing") parser.add_argument("--tie-embeddings", action="store_true", help="share decoder input/output embeddings") parser.add_argument("--max-epochs", default=100, type=int, help="max training epochs") parser.add_argument("--gpu", default=-1, type=int) parser.add_argument("--n-procs", default=2, type=int, help="num data loader processes") parser.add_argument("--tr-batch-size", default=128, type=int, help="training batch size") parser.add_argument("--va-batch-size", default=1, type=int, help="valid batch size") args = parser.parse_args() seeds(args.seed) assert os.getenv("MRT_EVAL_SCRIPT") is not None eval_script = os.getenv("MRT_EVAL_SCRIPT") dataset = "Viggo" lin_strat = "inc_freq" is_delex = True mr_vcb, utt_vcb = setup_vocab(dataset, lin_strat, is_delex) tr_ds = setup_training_data(dataset, lin_strat, is_delex, mr_vcb, utt_vcb, include_phrases=True) va_ds = setup_validation_data(dataset, lin_strat, is_delex, mr_vcb, utt_vcb) tr_batches = make_batches(tr_ds, args.tr_batch_size, args.n_procs, lin_strat, is_delex) va_batches = make_batches(va_ds, args.va_batch_size, args.n_procs, lin_strat, is_delex) model = setup_model("transformer", None, args.layers, None, args.tie_embeddings, mr_vcb, utt_vcb, beam_size=8) trainer = setup_trainer(model, "adamtri", None, None, args.ls, tr_batches, va_batches, args.max_epochs, utt_vcb, eval_script, mr_utils, lambda: f"Viggo/transformer/if/base+phrases/{args.seed}") env = {'proj_dir': args.output_dir, "gpu": args.gpu} trainer.run(env, verbose=True)
eval_script = os.getenv("MRT_EVAL_SCRIPT") src_seq = f"{lin_strat}_{'delex' if is_delex else 'lex'}" work_dir(f"experiments/{dataset}") WKRS = HP('WKRS', 2, description='data loader processes') T = HP('T', 500, description='max training epochs') LR = HP('LR', 0.1, description='learning rate') WD = HP('WD', 0.0, description='weight decay') LS = HP('LS', 0.1, description='label smoothing') L = HP('L', 2, description='num layers') mr_vcb, utt_vcb = setup_vocab(dataset, lin_strat, is_delex) tr_ds = setup_training_data(dataset, lin_strat, is_delex, mr_vcb, utt_vcb) tr_phrase_ds = setup_training_data(dataset, lin_strat, is_delex, mr_vcb, utt_vcb, include_phrases=True) tr_templ_ds = setup_training_data(dataset, lin_strat, is_delex, mr_vcb, utt_vcb, include_templates=True) tr_phrase_templ_ds = setup_training_data(dataset, lin_strat, is_delex,
no_encoder = os.getenv("MRT_NO_ENCODER", "false") == "true" src_seq = f"{lin_strat}_{'delex' if is_delex else 'lex'}" work_dir( f"experiments/hps/{dataset}/{lin_strat}/{'delex' if is_delex else 'lex'}") WKRS = HP('WKRS', 2, description='data loader processes') T = HP('T', 500, description='max training epochs') LR = HP('LR', 0.1, description='learning rate') WD = HP('WD', 0.0, description='weight decay') LS = HP('LS', 0.1, description='label smoothing') L = HP('L', 2, description='num layers') mr_vcb, utt_vcb = setup_vocab(dataset, lin_strat, is_delex) tr_ds = setup_training_data(dataset, lin_strat, is_delex, mr_vcb, utt_vcb) va_ds = setup_validation_data(dataset, lin_strat, is_delex, mr_vcb, utt_vcb) tr_batches = make_batches(tr_ds, 128, WKRS, lin_strat, is_delex) va_batches = make_batches(va_ds, 128, WKRS, lin_strat, is_delex) model = setup_model(model_type, rnn_dir, L, rnn_attn, tie_dec_emb, mr_vcb, utt_vcb, no_posemb=no_posemb, no_encoder=no_encoder) if dataset == 'Viggo': mr_utils = mrt.viggo.mr_utils elif dataset == 'E2E': mr_utils = mrt.e2e.mr_utils else: raise Exception(f"Bad dataset: {dataset}")