def test_running(): epochs = 100000 seq_batch_size = 100 print_yes = 100 loss_func = torch.nn.functional.nll_loss # create network and optimizer net = RNN(100, 120, 150, 2) net.to(device) # add cuda to device optim = torch.optim.Adam(net.parameters(), lr=3e-5) # main training loop: for epoch in range(epochs): dat = get_batch(train_data, seq_batch_size) dat = torch.LongTensor([vocab.find(item) for item in dat]) # pull x and y x_t = dat[:-1] y_t = dat[1:] hidden = net.init_hidden() # turn all into cuda x_t, y_t, hidden = x_t.to(device), y_t.to(device), hidden.to(device) # initialize hidden state and forward pass logprob, hidden = net.forward(x_t, hidden) loss = loss_func(logprob, y_t) # update optim.zero_grad() loss.backward() optim.step() # print the loss for every kth iteration if epoch % print_yes == 0: print('*' * 100) print('\n epoch {}, loss:{} \n'.format(epoch, loss)) # make sure to pass True flag for running on cuda print('sample speech:\n', run_words(net, vocab, 500, True))
def main(): epochs = 301 seq_batch_size = 200 print_yes = 100 iscuda = False # create our network, optimizer and loss function net = RNN(len(chars), 100, 150, 2) #instanciate a RNN object optim = torch.optim.Adam(net.parameters(), lr=6e-4) loss_func = torch.nn.functional.nll_loss if iscuda: net = net.cuda() # main training loop: for epoch in range(epochs): dat = getSequence(book, seq_batch_size) dat = torch.LongTensor( [chars.find(item) for item in dat] ) #find corresponding char index for each character and store this in tensor # pull x, y and initialize hidden state if iscuda: x_t = dat[:-1].cuda() y_t = dat[1:].cuda() hidden = net.init_hidden().cuda() else: x_t = dat[:-1] y_t = dat[1:] hidden = net.init_hidden() # forward pass logprob, hidden = net.forward(x_t, hidden) loss = loss_func(logprob, y_t) # update optim.zero_grad() loss.backward() optim.step() # print the loss for every kth iteration if epoch % print_yes == 0: print('*' * 60) print('\n epoch {}, loss:{} \n'.format(epoch, loss)) print('sample speech:\n', test_words(net, chars, seq_batch_size)) torch.save(net.state_dict(), 'trainedBook_v2.pt')
def no_test_forward(): loss_func = torch.nn.functional.nll_loss net = RNN(100, 100, 100) net.to(device) # add cuda to device optim = torch.optim.Adam(net.parameters(), lr=1e-4) # step 2: create a training batch of data, size 101, format this data and convert it to pytorch long tensors dat = get_batch(train_data, 100) dat = torch.LongTensor([vocab.find(item) for item in dat]) # step 3: convert our dat into input/output x_t = dat[:-1] y_t = dat[1:] ho = net.init_hidden() # remember to load all variables used by the model to the device, this means the i/o as well as the hidden state x_t, y_t, ho = x_t.to(device), y_t.to(device), ho.to(device) # test forward pass log_prob, hidden = net.forward(x_t, ho) # let's see if the forward pass of the next hidden state is already cuda #log_prob2, hidden2 = net.forward(x_t, hidden) loss = loss_func(log_prob, y_t) optim.zero_grad() loss.backward() optim.step()
p.data.add_(-lr, p.grad.data) if step % (epoch_size // 10) == 10: print('step: '+ str(step) + '\t' \ + "loss (sum over all examples' seen this epoch):" + str(costs) + '\t' \ + 'speed (wps):' + str(iters * model.batch_size / (time.time() - start_time))) return np.exp(costs / iters), losses ############################################################################### # # RUN MAIN LOOP (TRAIN AND VAL) # ############################################################################### print("\n########## Running Main Loop ##########################") print("\n########## Generating Samples #########################") hidden_zero = model.init_hidden() #hidden_zero.to(torch.long) hidden_zero = hidden_zero.to(device) #print(type(hidden_zero)) #samples = model.generate(torch.cuda.LongTensor([0+ 3*i for i in range(20)]),hidden_zero,10) train_slice = train_data[5355:5365] train_slice = np.array(train_slice, dtype=np.int32) inputs = torch.from_numpy(train_slice.astype(np.int64)).contiguous().to(device) samples = model.generate(inputs, hidden_zero, 70) # NOTE ============================================== # To load these, run # >>> x = np.load(lc_path)[()] # You will need these values for plotting learning curves (Problem 4)
# SAVE LEARNING CURVES lc_path = os.path.join(args.save_dir, 'learning_curves.npy') print('\nDONE\n\nSaving learning curves to ' + lc_path) np.save( lc_path, { 'train_ppls': train_ppls, 'val_ppls': val_ppls, 'train_losses': train_losses, 'val_losses': val_losses }) # NOTE ============================================== # To load these, run # >>> x = np.load(lc_path)[()] # You will need these values for plotting learning curves (Problem 4) hidden = model.init_hidden() hidden = hidden.to(device) start = [word_to_id["the"]] start.append(word_to_id["a"]) start.append(word_to_id["an"]) start.append(word_to_id["he"]) start.append(word_to_id["she"]) start.append(word_to_id["it"]) start.append(word_to_id["they"]) start.append(word_to_id["why"]) start.append(word_to_id["how"]) start.append(word_to_id["to"]) hidden = repackage_hidden(hidden) short_samples = model.generate(start, hidden, args.seq_len)
hidden_size=argsdict["GRU_hidden_size"], seq_len=argsdict["seq_len"], batch_size=argsdict["batch_size"], vocab_size=vocab_size, num_layers=argsdict["GRU_num_layers"], dp_keep_prob=1) # Load the model weight rnn.load_state_dict(torch.load(args.RNN_path)) gru.load_state_dict(torch.load(args.GRU_path)) rnn.eval() gru.eval() # Initialize the hidden state hidden = [rnn.init_hidden(), gru.init_hidden()] # Set the random seed manually for reproducibility. torch.manual_seed(args.seed) # Generate the word seed using random words # in the first 100 most common words. input = torch.randint(0, 100, (args.batch_size, 1)).squeeze() for name_model, model, init_hidden in zip(["RNN", "GRU"], [rnn, gru], hidden): print("------------------------------------") print(name_model) print("------------------------------------") print_sentence(model.generate(input, init_hidden, args.seq_len)) print_sentence(model.generate(input, init_hidden, 2 * args.seq_len))
model = model.to(device) print("device is = ", model) print("Load model parameters, best_params.pt") dir = args.save_dir bp_path = os.path.join(dir, 'best_params.pt') model.load_state_dict(torch.load(bp_path)) txt_path = os.path.join(dir, '_generate.txt') model.eval() nb_sentence = 20 inputs = torch.LongTensor(nb_sentence).random_(0, model.vocab_size).to(device) generated_seq_len = model.seq_len f = open(txt_path, "w+") f.write("==============seq_len===============\n") hidden = repackage_hidden(model.init_hidden()) samples = model.generate(inputs, hidden.to(device), generated_seq_len) # import pdb; pdb.set_trace() for i in range(nb_sentence): sentence = "" for t in range(generated_seq_len): sentence += id_2_word[samples[t][i].data.item()] + ' ' f.write("---------------------This is line %d\r\n" % (i + 1)) f.write(sentence + '\n') f.write("==============seq_len * 2===============\n") inputs = torch.LongTensor(nb_sentence).random_(0, model.vocab_size).to(device) generated_seq_len = (generated_seq_len * 2) print('generated_seq_len', generated_seq_len) f.write("---------------------This is line %d\r\n" % (i + 1))
# np.save(lc_path, {'val_ts_losses':timestep_loss.numpy(), # 'val_ppls':val_ppls, # #'train_losses':train_losses, # 'val_losses':val_losses, # 'times': times}) # NOTE ============================================== # To load these, run # >>> x = np.load(lc_path)[()] # You will need these values for plotting learning curves (Problem 4) val_loss = [] val_ppl = [] timestep_loss = [] #val_ppl, val_loss, timestep_loss = run_epoch(model, valid_data) #print(val_ppl) input = torch.tensor([1]).to(device) hidden = model.init_hidden().to( device ) #hidden = torch.zeros([args.num_layers, model.batch_size,args.hidden_size]).to(device) seq_length = args.seq_len for i in (35, 70): for j in range(10): input = torch.randint(low=0, high=999, size=(1, ), dtype=torch.long).to(device) sequence = model.generate(input, hidden, i) phrase = "\n" + str(input.item()) + ": " for token_id in enumerate(sequence): phrase += id_2_word[token_id[1].item()] + " " print(phrase)
# GENERATING # ############################################################################### print("\n########## Running Main Loop ##########################") model.load_state_dict( torch.load(os.path.join(args.save_dir, 'best_params.pt'), map_location=lambda storage, location: storage)) model = model.to(device) print(model) given = np.array(list(id_2_word.keys())[10:20], dtype=np.int64) print([id_2_word[id] for id in given]) given = torch.from_numpy(given).to(device) hidden = model.init_hidden().to(device) model.eval() res = model.generate(given, hidden, args.gen_length).transpose(0, 1) print(res.shape) res = res.to('cpu').numpy() res = [' '.join([id_2_word[id] for id in sent]) for sent in res] with open('{}_{}.txt'.format(args.model, args.gen_length), 'w') as writer: for i, sent in enumerate(res): writer.write('{}. {}\n\n'.format( i + 1, sent.replace('<unk>', '!unk!').replace('<eos>', '!eos!').replace('$', '\$')))
hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) elif args.model == 'GRU': model = GRU(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) model.load_state_dict(torch.load(args.load_model)) #, map_location='cpu')) # model.to(device) ??? model.eval() print(model) input = torch.LongTensor(args.batch_size).random_(0, vocab_size) generated_seq_len = 70 results = model.generate( input, model.init_hidden(), generated_seq_len) ### input, hidden, generated_seq_len) # print(results) results = results.numpy().transpose().tolist() for l in results: for i in range(len(l)): l[i] = id_2_word[l[i]] print(" ".join(l))
batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=1) elif args.model == 'GRU': model = GRU(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=1) else: raise Exception("Unknown model") model.load_state_dict( torch.load('{}/best_params.pt'.format(output_dir), map_location=device)) model.to(device) # start with zeros seed = torch.zeros(args.batch_size, dtype=torch.long) samples = model.generate(seed, model.init_hidden(), seq_len) samples = samples.transpose(0, 1) # shape (batch_size, generated_seq_len) with open(os.path.join(output_dir, "samples.{}.txt".format(seq_len)), "w") as of: for sample in samples: print(" ".join([id_2_word[idx] for idx in sample.numpy()]), file=of)
n_blocks=args.num_layers, dropout=1. - args.dp_keep_prob) # these 3 attributes don't affect the Transformer's computations; # they are only used in run_epoch model.batch_size = args.batch_size model.seq_len = args.seq_len model.vocab_size = vocab_size else: print("Model type not recognized.") if args.model_path is not None and args.generate: model.load_state_dict(torch.load(args.model_path)) model = model.to(device) gen = model.generate( torch.LongTensor(args.batch_size).random_(0, model.vocab_size).cuda(), model.init_hidden().cuda(), 10) gen = [[id_2_word[w] for w in seq.data.cpu().numpy()] for seq in gen] for i in range(10): print(gen[i]) # LOSS FUNCTION loss_fn = nn.CrossEntropyLoss(reduction='none') if args.optimizer == 'ADAM': optimizer = torch.optim.Adam(model.parameters(), lr=args.initial_lr) # LEARNING RATE SCHEDULE lr = args.initial_lr lr_decay_base = 1 / 1.15 m_flat_lr = 14.0 # we will not touch lr for the first m_flat_lr epochs ###############################################################################