def run_step(step_num): if step_num == 0: model = models.RandomModel() elif step_num == 1: model = models.PriorModel() elif step_num == 2: model = models.SupModel() elif step_num == 3: print('Training will take 6-7 minutes') model = models.EmbedModel() elif step_num == 4: print('Training will take 30-40 minutes') gru = models.GRU() model = models.NeuralModel(gru) else: raise ValueError('Invalid step number') trainset = Dataset.get('train') num_train_candidates = Candidate.get_count() model.fit(trainset, num_train_candidates) print('Training finished!') for dsname in Dataset.ds2path.keys(): ds = Dataset.get(dsname) pred_cids = model.predict(ds) print(dsname, ds.eval(pred_cids))
def load_model(path_model, path_config, vocab): config = Config(path_config) model_name = config.getstr("model") word_dim = config.getint("word_dim") state_dim = config.getint("state_dim") if model_name == "rnn": model = models.RNN(vocab_size=len(vocab), word_dim=word_dim, state_dim=state_dim, initialW=None, EOS_ID=vocab["<EOS>"]) elif model_name == "lstm": model = models.LSTM(vocab_size=len(vocab), word_dim=word_dim, state_dim=state_dim, initialW=None, EOS_ID=vocab["<EOS>"]) elif model_name == "gru": model = models.GRU(vocab_size=len(vocab), word_dim=word_dim, state_dim=state_dim, initialW=None, EOS_ID=vocab["<EOS>"]) else: print "[error] Unkwown model name: %s" % model_name sys.exit(-1) serializers.load_npz(path_model, model) return model
def eval_pre_trained(): model = models.GRU() model.load_state_dict(torch.load('../../assign2.model')) model.eval() net = models.NeuralModel(model) ds = Dataset.get('ace') pred_cids = net.predict(ds) print('ace', ds.eval(pred_cids))
def get_model(args, dm): model = None if args.model_name == "GCN": model = models.GCN(adj=dm.adj, input_dim=args.seq_len, output_dim=args.hidden_dim) if args.model_name == "GRU": model = models.GRU(input_dim=dm.adj.shape[0], hidden_dim=args.hidden_dim) if args.model_name == "TGCN": model = models.TGCN(adj=dm.adj, hidden_dim=args.hidden_dim) return model
def get_model(args, dm): model = None if args.model_name == 'GCN': model = models.GCN(adj=dm.adj, input_dim=args.seq_len, output_dim=args.hidden_dim) if args.model_name == 'GRU': model = models.GRU(input_dim=dm.adj.shape[0], hidden_dim=args.hidden_dim) if args.model_name == 'TGCN': model = models.TGCN(adj=dm.adj, hidden_dim=args.hidden_dim, loss=args.loss) return model
if continue_with_previous: print "Loading previous model state" net, state = models.load(model_file, MINIBATCH_SIZE, x) gsums, learning_rate, validation_ppl_history, starting_epoch, rng = state best_ppl = min(validation_ppl_history) else: rng = np.random rng.seed(1) print "Building model..." net = models.GRU( rng=rng, x=x, minibatch_size=MINIBATCH_SIZE, n_hidden=num_hidden, x_vocabulary=word_vocabulary, y_vocabulary=punctuation_vocabulary ) starting_epoch = 0 best_ppl = np.inf validation_ppl_history = [] gsums = [theano.shared(np.zeros_like(param.get_value(borrow=True))) for param in net.params] cost = net.cost(y) + L2_REG * net.L2_sqr gparams = T.grad(cost, net.params) updates = OrderedDict()
fc_path = os.path.join(fc_path, 'eps.pth') torch.save(eps, fc_path) else: fc_path = os.path.join(fc_path, 'eps.pth') return torch.load(fc_path), fc_path fc_dir = './fc_dir/' ## Initialize Generator, RNN, and latent codes generator = models.Generator(ngpu, z_dim, ngf, ndf, nc) generator = generator.cuda() gru = models.GRU(dim_z_motion, 500, gpu=True) gru.initWeight() gru = gru.cuda() z_c = utils.sample_z_content(dim_z_content) ## Start training for ep in range(num_epoch): #Random shuffle data_folder np.random.shuffle(data_folders) train_tqdm = tqdm(range(100)) psnr = 0.0
def main(gpu, path_corpus, path_config, path_word2vec): MAX_EPOCH = 50 EVAL = 200 MAX_LENGTH = 70 config = utils.Config(path_config) model_name = config.getstr("model") word_dim = config.getint("word_dim") state_dim = config.getint("state_dim") grad_clip = config.getfloat("grad_clip") weight_decay = config.getfloat("weight_decay") batch_size = config.getint("batch_size") print "[info] CORPUS: %s" % path_corpus print "[info] CONFIG: %s" % path_config print "[info] PRE-TRAINED WORD EMBEDDINGS: %s" % path_word2vec print "[info] MODEL: %s" % model_name print "[info] WORD DIM: %d" % word_dim print "[info] STATE DIM: %d" % state_dim print "[info] GRADIENT CLIPPING: %f" % grad_clip print "[info] WEIGHT DECAY: %f" % weight_decay print "[info] BATCH SIZE: %d" % batch_size path_save_head = os.path.join( config.getpath("snapshot"), "rnnlm.%s.%s" % (os.path.basename(path_corpus), os.path.splitext(os.path.basename(path_config))[0])) print "[info] SNAPSHOT: %s" % path_save_head sents_train, sents_val, vocab, ivocab = \ utils.load_corpus(path_corpus=path_corpus, max_length=MAX_LENGTH) if path_word2vec is not None: word2vec = utils.load_word2vec(path_word2vec, word_dim) initialW = utils.create_word_embeddings(vocab, word2vec, dim=word_dim, scale=0.001) else: initialW = None cuda.get_device(gpu).use() if model_name == "rnn": model = models.RNN(vocab_size=len(vocab), word_dim=word_dim, state_dim=state_dim, initialW=initialW, EOS_ID=vocab["<EOS>"]) elif model_name == "lstm": model = models.LSTM(vocab_size=len(vocab), word_dim=word_dim, state_dim=state_dim, initialW=initialW, EOS_ID=vocab["<EOS>"]) elif model_name == "gru": model = models.GRU(vocab_size=len(vocab), word_dim=word_dim, state_dim=state_dim, initialW=initialW, EOS_ID=vocab["<EOS>"]) elif model_name == "bd_lstm": model = models.BD_LSTM(vocab_size=len(vocab), word_dim=word_dim, state_dim=state_dim, initialW=initialW, EOS_ID=vocab["<EOS>"]) else: print "[error] Unknown model name: %s" % model_name sys.exit(-1) model.to_gpu(gpu) opt = optimizers.SMORMS3() opt.setup(model) opt.add_hook(chainer.optimizer.GradientClipping(grad_clip)) opt.add_hook(chainer.optimizer.WeightDecay(weight_decay)) print "[info] Evaluating on the validation sentences ..." loss_data, acc_data = evaluate(model, model_name, sents_val, ivocab) perp = math.exp(loss_data) print "[validation] iter=0, epoch=0, perplexity=%f, accuracy=%.2f%%" \ % (perp, acc_data*100) it = 0 n_train = len(sents_train) vocab_size = model.vocab_size for epoch in xrange(1, MAX_EPOCH + 1): perm = np.random.permutation(n_train) for data_i in xrange(0, n_train, batch_size): if data_i + batch_size > n_train: break words = sents_train[perm[data_i:data_i + batch_size]] if model_name == "bd_lstm": xs, ms = utils.make_batch(words, train=True, tail=False, mask=True) ys = model.forward(xs=xs, ms=ms, train=True) else: xs = utils.make_batch(words, train=True, tail=False) ys = model.forward(ts=xs, train=True) ys = F.concat(ys, axis=0) ts = F.concat(xs, axis=0) ys = F.reshape(ys, (-1, vocab_size)) # (TN, |V|) ts = F.reshape(ts, (-1, )) # (TN,) loss = F.softmax_cross_entropy(ys, ts) acc = F.accuracy(ys, ts, ignore_label=-1) model.zerograds() loss.backward() loss.unchain_backward() opt.update() it += 1 loss_data = float(cuda.to_cpu(loss.data)) perp = math.exp(loss_data) acc_data = float(cuda.to_cpu(acc.data)) print "[training] iter=%d, epoch=%d (%d/%d=%.03f%%), perplexity=%f, accuracy=%.2f%%" \ % (it, epoch, data_i+batch_size, n_train, float(data_i+batch_size)/n_train*100, perp, acc_data*100) if it % EVAL == 0: print "[info] Evaluating on the validation sentences ..." loss_data, acc_data = evaluate(model, model_name, sents_val, ivocab) perp = math.exp(loss_data) print "[validation] iter=%d, epoch=%d, perplexity=%f, accuracy=%.2f%%" \ % (it, epoch, perp, acc_data*100) serializers.save_npz( path_save_head + ".iter_%d.epoch_%d.model" % (it, epoch), model) utils.save_word2vec( path_save_head + ".iter_%d.epoch_%d.vectors.txt" % (it, epoch), utils.extract_word2vec(model, vocab)) print "[info] Saved." print "[info] Done."
sys.exit("'Learning rate' argument missing!") model_file_name = "Model_%s_h%d_lr%s.pcl" % (model_name, num_hidden, learning_rate) print(num_hidden, learning_rate, model_file_name) rng = np.random rng.seed(1) print("Building model ...") vocab_len = len(data.read_vocabulary(data.WORD_VOCAB_FILE)) x_len = vocab_len if vocab_len < data.MAX_WORD_VOCABULARY_SIZE else data.MAX_WORD_VOCABULARY_SIZE + data.MIN_WORD_COUNT_IN_VOCAB x = np.ones((x_len, MINIBATCH_SIZE)).astype(int) # Initialize the weights of the model without any real data, comparable to placeholders in earlier Tensorflow version net = models.GRU(rng, x, num_hidden) optimizer = tf.keras.optimizers.Adagrad(learning_rate=learning_rate, initial_accumulator_value=1e-6) starting_epoch = 0 best_ppl = np.inf validation_ppl_history = [] print( f"Total number of trainable parameters: {sum(np.prod([dim for dim in param.get_shape()]) for param in net.params)}" ) print("Training...") for epoch in range(starting_epoch, MAX_EPOCHS): t0 = time() total_neg_log_likelihood = 0
train_loaders = list() for d in train_datasets: temp = DataLoader(dataset=d, batch_size=args.batch_size, shuffle=True) train_loaders.append(temp) # train_loader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True) validation_loader = DataLoader(dataset=validation_dataset, batch_size=args.batch_size, shuffle=False) # Model, loss, and optimizer if args.model == 'lstm': model = models.LSTM(args.input_size, args.hidden_size, args.num_layers, args.num_classes, args.noise_std).to(device) elif args.model == 'gru': model = models.GRU(args.input_size, args.hidden_size, args.num_layers, args.num_classes, args.noise_std).to(device) elif args.model == 'rnn': model = models.RNN(args.input_size, args.hidden_size, args.num_layers, args.num_classes, args.noise_std).to(device) criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) # Train the model # total_step = len(train_loader) # total_train_step = len(train_loader) # total_val_step = len(validation_loader) def evaluate(dataloader): total_loss = 0.0
batch_size=10, vocab_size=10000, num_layers=2, dp_keep_prob=0.35) elif (model_type == 'GRU'): #Generate samples using RNN dir = 'models/gru/best_params.pt' print("GRU model loaded.") model = models.GRU(emb_size=200, hidden_size=1500, seq_len=35, batch_size=10, vocab_size=10000, num_layers=2, dp_keep_prob=0.35) model.load_state_dict(torch.load(dir)) #To remove the dropout model.eval() #Size of vocabulary vocab = 10000 #Sample of size batch_size from the vocab using a uniform distribution #Take a random word as input to create the samples inp = np.random.choice(vocab, size=10, replace=True, p=None)
f_.write(log_str + '\n') # SAVE LEARNING CURVES lc_path = os.path.join(save_dir, 'learning_curves.npy') print('\nDONE\n\nSaving learning curves to ' + lc_path) np.save( lc_path, { 'train_ppls': train_ppls, 'val_ppls': val_ppls, 'train_losses': train_losses, 'val_losses': val_losses, 'times': times }) if __name__ == '__main__': gru = models.GRU(batch_size=20, seq_len=35, hidden_size=1500, num_layers=2, vocab_size=10000, dp_keep_prob=.35, emb_size=200) valid_data, word_to_id, id_2_word = ptb_valid_data(data_path='data/') tokens = get_distribution(valid_data, 20, 35) generate_sequence(gru, word_to_id, id_2_word, tokens, sequence_length=35)