def main(z, lr, net, sample_size, p, clip, nokl): train_db = LmReconstructionDatabase("train", batches_per_epoch=1000, sample_size=sample_size) valid_db = LmReconstructionDatabase("valid", batches_per_epoch=100, sample_size=sample_size) model = make_model(z, net, sample_size, p, train_db.n_classes) if nokl: model.anneal_start = 1e20 model.anneal_end = 1e21 #out = nn.utils.forward(model, train_db, out=model.output(model.input)) #print out.shape #return print model.total_params if net == "conv": print "not using clipping for conv model" clip = 0.0 name = "vae.%d.%s.%d.%.2f.clip_%d.lr_%.4f" % (z, net, sample_size, p, clip, lr) opt = Optimizer(model, train_db, valid_db, Adam(lr), grad_clip=MaxNorm(clip), name=name, print_info=True) opt.train(100)
def main(z, lr, sample_size, p, encdec_layers, charcnn_size, charcnn_layers, alpha): train_db = LmReconstructionDatabase("train", batches_per_epoch=1000, sample_size=sample_size) valid_db = LmReconstructionDatabase("valid", batches_per_epoch=100, sample_size=sample_size) model = make_model(z, sample_size, p, train_db.n_classes, encdec_layers, charcnn_size, charcnn_layers, alpha) #out = nn.utils.forward(model, train_db, out=model.output(model.input)) #print(out.shape) #return print(model.total_params) name = "lm.charvae.z_%d.len_%d.layers_%d.p_%.2f.alpha_%.2f.charcnnsize_%d.charcnnlayers_%d" % \ (z, sample_size, encdec_layers, p, alpha, charcnn_size, charcnn_layers) opt = Optimizer(model, train_db, valid_db, Adam(lr), name=name, print_info=True) opt.train(100, decay_after=20, lr_decay=0.95)
def main(z, lr, sample_size, p, lstm_size, alpha): train_db = TwitterReconstructionDatabase("train", batch_size=32, max_len=sample_size, batches_per_epoch=1000) valid_db = TwitterReconstructionDatabase("valid", batch_size=50, max_len=sample_size, batches_per_epoch=100) model = make_model(z, sample_size, p, train_db.n_classes, lstm_size, alpha) model.anneal_start = 50000. model.anneal_end = 60000. #out = nn.utils.forward(model, train_db, out=model.output(model.input)) #print out.shape #return print model.total_params name = "twittervae.charlevel.z_%d.len_%d.p_%.2f.lstmsz_%d.alpha_%.2f" % ( z, sample_size, p, lstm_size, alpha) opt = Optimizer(model, train_db, valid_db, Adam(lr), name=name, print_info=True) opt.train(100, decay_after=70, lr_decay=0.95)
def main(z, lr, sample_size, p, encdec_layers, lstm_size, alpha, anneal): train_db = LmReconstructionDatabase("train", batches_per_epoch=1000, sample_size=sample_size) valid_db = LmReconstructionDatabase("valid", batches_per_epoch=100, sample_size=sample_size) anneal = bool(anneal) print(anneal) model = make_model(z, sample_size, p, train_db.n_classes, encdec_layers, lstm_size, alpha) if not anneal: model.anneal = False #out = nn.utils.forward(model, train_db, out=model.output(model.input)) #print(out.shape) #return print(model.total_params) name = "lm.charvae.z_%d.len_%d.layers_%d.p_%.2f.alpha_%.2f.lstmsz_%d" % \ (z, sample_size, encdec_layers, p, alpha, lstm_size) if not anneal: name += ".noanneal" opt = Optimizer(model, train_db, valid_db, Adam(lr), name=name, print_info=True) opt.train(100, decay_after=20, lr_decay=0.95)
def main(z, lr, anneal_start, anneal_end, p, alpha, lstm_size, num_epochs, max_len, batch_size, session, dataset, sp_model, resume): train_db = TextProjectReconstructionDatabase(dataset=dataset, phase="train", batch_size=batch_size, max_len=max_len, sp_model=sp_model or None) valid_db = TextProjectReconstructionDatabase(dataset=dataset, phase="valid", batch_size=batch_size, max_len=max_len, sp_model=sp_model or None) model = make_model(z, max_len, p, train_db.n_classes, lstm_size, alpha) model.anneal_start = float(anneal_start) model.anneal_end = float(anneal_end) vocab = train_db.vocab print("Using vocab with %s tokens" % len(vocab)) if resume: model.load("session/%s/model.flt" % session) print("Resuming session %s" % session) #out = nn.utils.forward(model, train_db, out=model.output(model.input)) #print out.shape #return print("Total params: %s" % model.total_params) opt = Optimizer(model, train_db, valid_db, Adam(lr), name=session, print_info=True, restore=resume) with open("%s/vocab.pkl" % opt.opt_folder, "wb") as vocab_file: pickle.dump(train_db.vocab, vocab_file) nn.utils.save_json("%s/hyper_params.json" % opt.opt_folder, { "z": z, "max_len": max_len, "p": p, "lstm_size": lstm_size, "alpha": alpha, "dataset": dataset, "vocab": "vocab.pkl", "sp_model": sp_model or None }) decay_after_num_epochs = num_epochs * 0.7 opt.train(epochs=num_epochs, decay_after=decay_after_num_epochs, lr_decay=0.95, decay_schedule_in_iters=False)
def main(lr, sample_size, charcnn_size, charcnn_layers): train_db = LMDatabase("train", batch_size=64, sample_size=sample_size) valid_db = LMDatabase("valid", sample_size=sample_size) n_classes = numpy.max(train_db.dataset) + 1 model = make_model(n_classes, charcnn_size, charcnn_layers) out = nn.utils.forward(model, train_db) print(out) print(model.total_params) name = "charcnn.len_%d.charcnnsize_%d.charcnnlayers_%d" % ( sample_size, charcnn_size, charcnn_layers) opt = Optimizer(model, train_db, valid_db, Adam(lr), name=name, print_info=True) opt.train(100, decay_after=20, lr_decay=0.95)