def speak(length): dataset = torch.load('./outputs/secondexperiment/saved_dataset.dataset') device = torch.device('cpu') model = TextGenerationModel(64, 90, dataset.vocab_size, 128, 2, 0, device) model.load_state_dict( torch.load('./outputs/secondexperiment/saved_model.pt', map_location='cpu')) _input = open('sample_start.txt', 'r', encoding='utf-8').read() idxs = dataset.convert_from_string(_input) text = generate_sentence(idxs, model, dataset, 0.5, length) print(text)
def load_model(model_name): """ Loads LSTM model """ with open(model_name, 'rb') as f: checkpoint = torch.load(f) loaded_model = TextGenerationModel( checkpoint['batch_size'], checkpoint['seq_length'], checkpoint['vocabulary_size'], checkpoint['lstm_num_hidden'], checkpoint['lstm_num_layers'], checkpoint['drop_prob'], checkpoint['device']).to(checkpoint['device']) loaded_model.load_state_dict(checkpoint['state_dict']) return loaded_model
def generate_sequence(config, seed=0, temp=0, seq_length=30, model_path='output_dir/kant_100_4.pt', init_char='t'): np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # Initialize the device which to run the model on device = torch.device(config.device) print(device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # Initialize the model that we are going to use model = TextGenerationModel(1, 1, dataset.vocab_size, config.lstm_num_hidden, config.lstm_num_layers, config.device).to(device) model.load_state_dict(torch.load(model_path, map_location=config.device)) model.eval() # print(init_char) word_list = [dataset._char_to_ix[char] for char in init_char] state = model.init_state() for step in range(seq_length): last = torch.tensor([[word_list[step]]]).long().to(device) # print(last) output, state = model.predict(last, state, temp=temp) # print(output.squeeze()) if step + 1 >= len(word_list): if temp > 0: word_list.append(torch.multinomial(output.squeeze(), 1).item()) else: word_list.append(torch.argmax(output).item()) # plt.hist(output.squeeze().numpy(), 100) # plt.show() sequence = ''.join([dataset._ix_to_char[ix] for ix in word_list]) return sequence
def train(config): # Initialize the text dataset dataset = TextDataset(config.txt_file) # Initialize the model model = TextGenerationModel( batch_size=config.batch_size, seq_length=config.seq_length, vocabulary_size=dataset.vocab_size, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers ) ########################################################################### # Implement code here. ########################################################################### # Define the optimizer optimizer = tf.train.RMSPropOptimizer(config.learning_rate) # Compute the gradients for each variable grads_and_vars = optimizer.compute_gradients(model.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step) grads, variables = zip(*grads_and_vars) grads_clipped, _ = tf.clip_by_global_norm(grads, clip_norm=config.max_norm_gradient) apply_gradients_op = optimizer.apply_gradients(zip(grads_clipped, variables), global_step=global_step) ########################################################################### # Implement code here. ########################################################################### for train_step in range(int(config.train_steps)): # Only for time measurement of step through network t1 = time.time() ####################################################################### # Implement code here. ####################################################################### # sess.run ( .. ) # Only for time measurement of step through network t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) # Output the training progress if train_step % config.print_every == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, Loss = XX".format( datetime.now().strftime("%Y-%m-%d %H:%M"), train_step+1, int(config.train_steps), config.batch_size, examples_per_second ))
def train(config): # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(...) # fixme data_loader = DataLoader(dataset, config.batch_size) # Initialize the model that we are going to use model = TextGenerationModel(...) # FIXME # Setup the loss and optimizer criterion = None # FIXME optimizer = None # FIXME for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() ####################################################### # Add more code here ... ####################################################### loss = np.inf # fixme accuracy = 0.0 # fixme # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if (step + 1) % config.print_every == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, \ Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) if (step + 1) % config.sample_every == 0: # Generate some sentences by sampling from the model pass if step == config.train_steps: # If you receive a PyTorch data-loader error, # check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def eval(): # Torch settings torch.set_default_tensor_type(torch.FloatTensor) # Initialize the dataset dataset = TextDataset(config.txt_file, config.seq_length) # Get temperature temp = config.temperature # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size) # Load model, if there's any model to load model, steps = load_model(model) print("Model trained for", steps, "steps") model.eval() try: while True: # Get input for the start of the sentence start = input("\nStart: ") # Convert input to one-hot representation (length x vocab_size) try: start_oh = get_one_hot(start, dataset) except KeyError: print("One or more characters were not recognized. Try again!") continue # Generate the rest of the sentence sentence = dataset.convert_to_string( model.cmd_generate(start_oh, temp, config.seq_length)) print("Model says:\n") print(start + sentence) except KeyboardInterrupt: print("\n\n" + random.choice(quit_msgs))
def generate_sentences(config, sentence): state = torch.load('checkpoints/{}'.format( config.txt_file.split("/", 1)[1].replace('.txt', ''))) device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length, config.batch_size, config.train_steps) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size).to(device=device) model.load_state_dict(state['state_dict']) char_list = dataset.convert_to_ix(sentence) return_list = [[torch.tensor(char)] for char in char_list] for i in range(len(sentence) + 50): tensor = [torch.tensor([char_list[i]])] tensor = torch.unsqueeze(torch.unsqueeze(tensor[-1], 0), 0).float().to(device=config.device) if i == 0: predictions = model(tensor, 1) else: predictions = model(tensor) out = torch.max(predictions, 1)[1] char_list.append(out) return_v = int(out.cpu().numpy()[0]) return_list.append([torch.tensor(return_v)]) indices = [] for char in return_list: indices.append(int(char[0].numpy())) generated_sentence = dataset.convert_to_string(indices) return generated_sentence
def generate(config): np.random.seed(config.seed) torch.manual_seed(config.seed) # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # fixme data_loader = DataLoader(dataset, config.batch_size, drop_last=True) # Initialize the model that we are going to use model = TextGenerationModel(batch_size=config.batch_size, seq_length=config.seq_length, vocabulary_size=86, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, device=config.device).to(device) model.load_state_dict(torch.load(config.model)) for l in ["In 1776 ", "Liberty is ", "Democracy is "]: char_id = torch.tensor([dataset._char_to_ix[ch] for ch in l]).reshape(-1, 1).to(device) hidden = (torch.zeros( (config.lstm_num_layers, 1, config.lstm_num_hidden)).to(device), torch.zeros((config.lstm_num_layers, 1, config.lstm_num_hidden)).to(device)) sequence = sample(model=model, dataset=dataset, init_seq=char_id, init_hidden=hidden, seq_length=200, device=device, temp=config.temp) print( dataset.convert_to_string(char_id.cpu().numpy().reshape(-1)) + sequence)
def eval(config): use_cuda = torch.cuda.is_available() if use_cuda: device = torch.device('cuda:0') else: device = torch.device('cpu') # Initialize the device which to run the model on device = torch.device(device) dtype = torch.cuda.LongTensor if use_cuda else torch.LongTensor # Initialize the dataset and data loader (note the +1) dataset = pickle.load(open(config.dataset_path, 'rb')) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, \ config.lstm_num_hidden, config.dropout_keep_prob, config.lstm_num_layers).to(device) model.load_state_dict(torch.load(config.model_path)) # Setup the loss and optimizer model.eval() print('Evaluating: ') num_summaries = 5 # get random intial chars rand_chars = [ dataset._char_to_ix[random.choice(dataset._chars)] for i in range(num_summaries) ] # to tensor prev_pred = torch.Tensor(rand_chars).type(dtype) prev_pred_one_hot = to_one_hot(prev_pred, dataset.vocab_size, dtype) predictions = [] for i in range(config.sample_length): # batch size 1 prev_pred_one_hot = torch.unsqueeze(prev_pred_one_hot, 1) if i is 0: y_pred, hidden = model(prev_pred_one_hot.float()) else: y_pred, hidden = model(prev_pred_one_hot.float(), hidden) # get argmax # Sample from the network as a multinomial distribution if config.sampling_method == 'temp': output_dist = y_pred.data.div(config.temperature).exp() y_pred_batch_idx = output_dist.squeeze(1).multinomial(1).type( dtype) else: y_pred_batch_idx = y_pred.argmax(2).type(dtype) # to one hot prev_pred_one_hot = to_one_hot(y_pred_batch_idx.flatten(), dataset.vocab_size, dtype) predictions.append(y_pred_batch_idx.flatten().cpu().detach().numpy()) predictions = np.asarray(predictions).T summaries = [dataset.convert_to_string(pred) for pred in list(predictions)] print("{} \n".format('\n'.join(summaries)))
def train(config): def acc(predictions, targets): hotvec = predictions.argmax(-2) == targets accuracy = torch.mean(hotvec.float()) return accuracy # Initialize the device which to run the model on device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=0) print('batch', config.batch_size) vocabulary_size = dataset.vocab_size print('vocab', vocabulary_size) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, vocabulary_size=vocabulary_size, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, dropout=1 - config.dropout_keep_prob, device=device) model = model.to(device) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=1e-5) gamma = 1 - config.learning_rate_decay lr_optim = torch.optim.lr_scheduler.StepLR(optimizer, config.learning_rate_step, gamma=gamma, last_epoch=-1) print('Hi') acc_list = [] loss_list = [] step_list = [] text_list = [] epoch = 100 offset = 2380 temperature = 1 policy = 'greedy' for e in range(epoch): torch.save(model.state_dict(), str(e + 1) + 'tunedmodel.pt') for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network lr_optim.step() optimizer.zero_grad() t1 = time.time() inputs = torch.stack([*batch_inputs], dim=1) targets = torch.stack([*batch_targets], dim=1) inputs = inputs.to(device) targets = targets.to(device) out = model.forward(inputs)[0] out = out.permute(0, 2, 1) loss = criterion(out, targets) accuracy = acc(out, targets) torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) loss.backward() optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.print_every == 0: print('accuracy, loss, step: \n', np.around(accuracy.item(), 4), np.around(loss.item(), 4), step, '\n') acc_list.append(accuracy.item()) loss_list.append(loss.item()) step_list.append(step + offset * e) if step % config.sample_every == 0: # Generate some sentences by sampling from the model generator = torch.randint(low=0, high=vocabulary_size, size=(1, 1)).to(device) hidden = None char_list = [] for _ in range(config.seq_length): generator, hidden = model.forward(generator, hidden) if policy == 'greedy': idx = torch.argmax(generator).item() else: pass generator = torch.Tensor([idx]).unsqueeze(-1) generator = generator.to(device) char_list.append(idx) char = dataset.convert_to_string(char_list) with open("MyTunedBook.txt", "a") as text_file: print('Epoch. ', e, 'Stahp: ', step, '\n Output: ', char, file=text_file) print('Epoch. ', e, 'Stahp: ', step, '\n Output: ', char) text_list.append((str((step + offset * e)) + '\n' + char)) pass if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') with open('FinalTunedBook.txt', 'w+') as f: for item in text_list: f.write("%s\n" % item) # save with pandas header = ['accuracy', 'length', 'loss', 'step'] savefiles = zip(acc_list, [config.seq_length] * len(acc_list), loss_list, step_list) df = pd.DataFrame(list(savefiles), columns=header) df.to_csv('GEN' + str(config.seq_length) + 'tunedlstm.csv') print('I am Loaded') temp_list = [0., 0.5, 1., 2.] policy_list = ['greedy', 'temp'] seq_length = 111 alice_string = list('Alice') # Generate some sentences by sampling from the model for policy in policy_list: for temperature in temp_list: char_list = [] hidden = None for alice in alice_string: idx = dataset.convert_to_idx(alice) char_list.append(idx) generator = torch.tensor([idx]).unsqueeze(-1) generator = generator.to(device) generator, hidden = model.forward(generator, hidden) for _ in range(seq_length): if policy == 'greedy': idx = torch.argmax(generator).item() else: temp = generator.squeeze() / temperature soft = torch.softmax(temp, dim=0) idx = torch.multinomial(soft, 1)[-1].item() generator = torch.tensor([idx]).unsqueeze(-1) generator = generator.to(device) generator, hidden = model.forward(generator, hidden) char_list.append(idx) char = dataset.convert_to_string(char_list) with open( "BonusTemp" + str(int(np.floor(temperature))) + "Book.txt", "w+") as text_file: print(policy + ': ', temperature, '\n Output: ', char, file=text_file) print(policy + ': ', temperature, '\n Output: ', char) print('Finito!')
def train(config, CHOICES): # Initialize the device which to run the model on #device = torch.device(config.device)# fix this! device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) # Initialize the model that we are going to use # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length ); # fixme model = TextGenerationModel( config.batch_size, config.seq_length, dataset.vocab_size, config.temperature).cuda(); if (CHOICES['LOAD_BEST_MODEL']): model.load_state_dict(torch.load('./model_parameter.txt')); #print(model.state_dict()); data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss(); optimizer = torch.optim.RMSprop(model.parameters(),lr=config.learning_rate); if (CHOICES['LOAD_BEST_MODEL']): optimizer.load_state_dict(torch.load('./model_optimizer.txt')); accuracy_list = []; loss_list = []; string_list = []; tmp_accuracy = 0; a = 76; while (tmp_accuracy == 0) or (accuracy_list[-1] >0.85): for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() batch_inputs = torch.stack(batch_inputs)[:,:, None].view(config.seq_length, -1).to(device); # sequ_length * batch_size batch_targets = torch.stack(batch_targets)[:,:, None].view(config.seq_length, -1).to(device); # sequ_length * batch_size if not((int(batch_inputs.size()[1])) == config.batch_size): continue; #print(dataset.convert_to_string(batch_inputs[:, 0].cpu().numpy())); batch_inputs_onehot = one_hot(batch_inputs, dataset.vocab_size); # seq_length * batch_size * vacab_size; optimizer.zero_grad(); torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm); out = model(batch_inputs_onehot); values, indices = torch.max(out, 1); loss_criterion = criterion(out,batch_targets); loss_criterion.backward(); optimizer.step(); loss = loss_criterion.data[0]/(config.seq_length); values, indices = torch.max(out, 1); accuracy = ((indices[indices == batch_targets].size())[0])/(config.batch_size*config.seq_length); # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % config.print_every == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, int(config.train_steps), config.batch_size, examples_per_second, accuracy, loss)) # generate sentences if step % 50000 == 0 and CHOICES['GENERATE_FIVE_SENTENCES']: model.eval(); test_input = (torch.Tensor(batch_inputs.size())).type(torch.LongTensor).to(device); a = a + 1; test_input = test_input.fill_(a); output_string = generate_new_stings(model, test_input, dataset.vocab_size, config.seq_length); tmp = dataset.convert_to_string(output_string.cpu().numpy().tolist()); string_list += [tmp]; print(tmp); print('---') model.train(); # save parameter torch.save(model.state_dict(), './model_parameter{:d}.txt'.format(step)); torch.save(optimizer.state_dict(), './model_optimizer{:d}.txt'.format(step)); if (CHOICES['DRAW_ACCURACY_PLOT']): accuracy_list += [accuracy]; loss_list += [loss]; if step == config.sample_every: # Generate some sentences by sampling from the model pass if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break if (CHOICES['GENERATE_FIVE_SENTENCES']) and (len(string_list) == 5): break; if (CHOICES['GENERATE_FIVE_SENTENCES']) and (len(string_list) == 5): break; print("============ finish {} epoch ============ ".format(len(accuracy_list))); torch.save(model.state_dict(), './model_parameter.txt'); torch.save(optimizer.state_dict(), './model_optimizer.txt'); print('Done training.'); if (CHOICES['GENERATE_FIVE_SENTENCES']): if (CHOICES['DRAW_ACCURACY_PLOT']): fig, ax = plt.subplots(); ax.plot(np.arange(len(accuracy_list)), accuracy_list, 'r', label = 'accuracy'); ax.plot(np.arange(len(accuracy_list)), loss_list, 'b', label = 'loss'); legend = ax.legend(loc='upper center'); plt.xlabel('Steps'); plt.title('loss and accuracy of LSTM in 2000 steps'); plt.show(); for idx in range(5): print('====') print(string_list[idx]);
def train(config): # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # should we do +1?? torch.save(dataset, config.save_dataset) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, config.lstm_num_hidden, config.lstm_num_layers, 1 - config.dropout_keep_prob, device) criterion = nn.CrossEntropyLoss() optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) losses = [] accuracies = [] # run through the dataset several times till u reach max_steps step = 0 while step < config.train_steps: for (batch_inputs, batch_targets) in data_loader: step += 1 # Only for time measurement of step through network t1 = time.time() batch_inputs = torch.stack(batch_inputs).to(device) batch_targets = torch.stack(batch_targets, dim=1).to( device) #dim=1 to avoid transposing batch_predictions, (_, _) = model.forward(batch_inputs) batch_predictions = batch_predictions.permute(1, 2, 0) loss = criterion(batch_predictions, batch_targets) losses.append(loss.item()) model.zero_grad() # should we do this?? loss.backward() torch.nn.utils.clip_grad_norm( model.parameters(), max_norm=config.max_norm) # prevents maximum gradient problem optimizer.step() accuracy = accuracy_(batch_predictions, batch_targets) accuracies.append(accuracy) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.print_every == 0: print( "[{}] Train Step {}/{}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), int(step), int(config.train_steps), config.batch_size, examples_per_second, accuracy, loss)) if step % config.sample_every == 0: for temperature in [0]: for length in [30, 60, 90, 120]: sentence = generate_sentence(model, dataset, temperature, length, device) with open(config.save_generated_text, 'a', encoding='utf-8') as file: file.write("{};{};{};{}\n".format( step, temperature, length, sentence)) if step % config.save_every == 0: torch.save(model.state_dict(), config.save_model) if step == config.train_steps: # save only the model parameters torch.save(model.state_dict(), config.save_model) # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break # revive the model # model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size(), # config.lstm_num_hidden, config.lstm_num_layers, device) # model.load_state_dict(torch.load(config.save_model)) print('Done training.')
def train(config): writer = torch.utils.tensorboard.SummaryWriter() # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) data_loader = DataLoader( dataset, config.batch_size, config.seq_length, ) # Initialize the model that we are going to use vocabulary_size = dataset.vocab_size model = TextGenerationModel(batch_size=config.batch_size, seq_length=config.seq_length, vocabulary_size=vocabulary_size) model.to(device) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) accuracies = [] losses = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() ####################################################### # Move to GPU batch_inputs = to_tensor_rep(batch_inputs).to(device) batch_targets = to_tensor_rep(batch_targets).to(device) # Reset for next iteration model.zero_grad() ####################################################### model_output = model(batch_inputs, c_0=torch.zeros(config.lstm_num_layers, batch_inputs.shape[1], config.lstm_num_hidden, device=device), h_0=torch.zeros(config.lstm_num_layers, batch_inputs.shape[1], config.lstm_num_hidden, device=device)) # for each timestep, the crossentropy loss is computed and subsequently averaged batch_losses = torch.zeros(config.seq_length, device=device) for i in range(config.seq_length): batch_losses[i] = criterion(model_output[i], batch_targets[i]) loss = (1 / config.seq_length) * torch.sum(batch_losses) # compute the gradients, clip them to prevent exploding gradients and backpropagate loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) optimizer.step() # calculate accuracy predictions = torch.argmax(model_output, dim=2) correct = (predictions == batch_targets).sum().item() accuracy = correct / (model_output.size(0) * model_output.size(1)) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if (step + 1) % config.print_every == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, \ Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) # save loss and accuracy accuracies.append(accuracy) losses.append(loss) writer.add_scalar("loss", loss) writer.add_scalar("accuracy", accuracy) if (step + 1) % config.sample_every == 0: model.eval() generate_sequence(model, 62, dataset) model.train() if step == config.train_steps: break print('Done training.') # make loss and accuracy plots x = np.arange(len(accuracies)) * config.print_every plot_curve(x, accuracies, "Accuracy", "Training accuracy") plot_curve(x, losses, "Loss", "Training Loss")
def train(config): # determine the filename (to be used for saving results, checkpoints, models, etc.) filename = Path(config.txt_file).stem # Initialize the device which to run the model on if config.device == 'cuda': if torch.cuda.is_available(): device = torch.device(config.device) else: device = torch.device('cpu') else: device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset( filename=config.txt_file, seq_length=config.seq_length ) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # get the vocabulary size and int2char and char2int dictionaries for use later VOCAB_SIZE = dataset.vocab_size # Initialize the model that we are going to use model = TextGenerationModel( batch_size=config.batch_size, seq_length=config.seq_length, vocabulary_size=VOCAB_SIZE, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, device=device, batch_first=config.batch_first, dropout=1.0-config.dropout_keep_prob ) # Setup the loss and optimizer and learning rate scheduler criterion = nn.CrossEntropyLoss() optimizer = optim.Adam( model.parameters(), config.learning_rate ) # Load the latest checkpoint, if any exist checkpoints = list(CHECKPOINTS_DIR.glob(f'{model.__class__.__name__}_{filename}_checkpoint_*.pt')) if len(checkpoints) > 0: # load the latest checkpoint checkpoints.sort(key=os.path.getctime) latest_checkpoint_path = checkpoints[-1] start_step, results, sequences = load_checkpoint(latest_checkpoint_path, model, optimizer) else: # initialize the epoch, results and best_accuracy start_step = 0 results = { 'step': [], 'accuracy': [], 'loss': [], } sequences = { 'step': [], 't': [], 'temperature': [], 'sequence': [] } for step in range(start_step, int(config.train_steps)): # reinitialize the data_loader iterater if we have iterated over all available mini-batches if step % len(data_loader) == 0 or step == start_step: data_iter = iter(data_loader) # get the mini-batch batch_inputs, batch_targets = next(data_iter) # Only for time measurement of step through network t1 = time.time() ####################################################### # Add more code here ... ####################################################### # put the model in training mode model.train() # convert the data and send to device X = torch.stack(batch_inputs, dim=1) X = X.to(device) Y = torch.stack(batch_targets, dim=1) Y = Y.to(device) # forward pass the mini-batch Y_out, _ = model.forward(X) Y_pred = Y_out.argmax(dim=-1) # (re)set the optimizer gradient to 0 optimizer.zero_grad() # compute the accuracy and the loss accuracy = get_accuracy(Y_pred, Y) loss = criterion.forward(Y_out.transpose(2, 1), Y) # backwards propogate the loss loss.backward() # clip the gradients (to preven them from exploding) torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) # tune the model parameters optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % config.print_every == 0: print(f'[{datetime.now().strftime("%Y-%m-%d %H:%M")}], Train Step {step:04d}/{int(config.train_steps):04d}, Batch Size = {config.batch_size}, Examples/Sec = {examples_per_second:.2f}, Accuracy = {accuracy:.2f}, Loss = {loss:.3f}') # append the accuracy and loss to the results results['step'].append(step) results['accuracy'].append(accuracy.item()) results['loss'].append(loss.item()) if step % config.sample_every == 0: for T in [20, 30, 60, 120]: for temperature in [0.0, 0.5, 1.0, 2.0]: # Generate some sentences by sampling from the model sequence = sample_sequence( model=model, vocab_size=VOCAB_SIZE, T=T, char=None, temperature=temperature, device=device ) sequence_str = dataset.convert_to_string(sequence) print(f'Generated sample sequence (T={T}, temp={temperature}): {sequence_str}') # append the generated sequence to the sequences sequences['step'].append(step) sequences['t'].append(T) sequences['temperature'].append(temperature) sequences['sequence'].append(sequence_str) if step % config.checkpoint_every == 0: # create a checkpoint create_checkpoint(CHECKPOINTS_DIR, filename, step, model, optimizer, results, sequences) # save the results save_results(RESULTS_DIR, filename, results, sequences, model) # save the model save_model(MODELS_DIR, filename, model) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
# Initialize the device which to run the model on device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=0) print('batch', config.batch_size) vocabulary_size = dataset.vocab_size print('vocab', vocabulary_size) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, vocabulary_size=vocabulary_size, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, dropout=1-config.dropout_keep_prob, device=device ) model = model.to(device) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate ) print('Hi') acc_list = [] loss_list = [] step_list = [] text_list = [] epoch = 50
def train(config): def compute_accuracy(outputs, targets): """ Compute the accuracy of the predicitions. """ outputs = torch.argmax(outputs, -1) return (outputs == targets).float().mean() # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # fixme data_loader = DataLoader(dataset, config.batch_size, num_workers=4) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, config.lstm_num_hidden, config.lstm_num_layers, device, config.dropout_keep_prob).to(device) learning_rate = config.learning_rate # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() # fixme optimizer = optim.Adam(model.parameters(), learning_rate) # fixme x_onehot = torch.FloatTensor(config.seq_length, config.batch_size, dataset.vocab_size).to(device) y_onehot = torch.FloatTensor(config.seq_length, config.batch_size, dataset.vocab_size).to(device) # HACK: config.train_steps seems to be of type 'float' instead of 'int'. config.train_steps = int(config.train_steps) step = 0 loss_list = [] accuracy_list = [] while step < config.train_steps: for batch_inputs, batch_targets in data_loader: # Only for time measurement of step through network t1 = time.time() ####################################################### # Add more code here ... ####################################################### optimizer.zero_grad() batch_inputs = torch.stack(batch_inputs).to(device) batch_targets = torch.stack(batch_targets).to(device) # print(dataset.convert_to_string(batch_inputs.t()[0].cpu().numpy())) try: x_onehot.zero_() x_onehot.scatter_(2, batch_inputs.unsqueeze(-1), 1) except RuntimeError: continue y = model(x_onehot) loss = criterion(y.view(-1, dataset.vocab_size), batch_targets.view(-1)) loss.backward() optimizer.step() loss = loss.item() # fixme accuracy = compute_accuracy(y, batch_targets) # fixme # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) loss_list.append(loss) accuracy_list.append(accuracy) if step % config.learning_rate_step == 0: learning_rate = config.learning_rate_decay * learning_rate print(learning_rate) optimizer = optim.Adam(model.parameters(), learning_rate) if step % config.print_every == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, " "Examples/Sec = {:.2f}, Accuracy = {:.2f}, " "Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) # Save an image of loss and accuracy during training. plt.figure() plt.subplot(121) plt.plot(loss_list) plt.xlabel("Steps") plt.ylabel("Loss") plt.subplot(122) plt.plot(accuracy_list) plt.xlabel("Steps") plt.ylabel("Accuracy") plt.tight_layout() plt.savefig('loss.png') plt.close() if step % config.sample_every == 0: # Generate some sentences by sampling from the model inputs = sample_text(dataset, x_onehot) output = sample_text(dataset, y) sample = sample_text(dataset, model.sample()) for idx in range(5): print(f"{inputs[idx]} | {output[idx]} | {sample[idx]}") # Save some sampled sequences. with open('samples.csv', 'a') as file: for line in sample[:5]: file.write(f"{step};'{line}'\n") torch.save( { 'step': step + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(config.summary_path, f"model_{step}.pth.tar")) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this # bug report: https://github.com/pytorch/pytorch/pull/9655 break else: step += 1 print('Done training.')
y_pred, (h1, h2) = model.generate(chars, h1, h2) chars.append(torch.max(y_pred, 1)[1][y_pred.size(0) - 1]) for i in range(sentence_len): y_pred, (h1, h2) = model.generate([chars[-1]], h1, h2) chars.append(torch.max(y_pred, 1)[1][y_pred.size(0) - 1]) chars = [int(char.cpu().numpy()[0]) for char in chars] return dataset.convert_to_string(chars) batch_size = 64 seq_length = 30 vocabulary_size = 87 lstm_num_hidden = 128 lstm_num_layers = 2 device = 'cpu' model = TextGenerationModel(batch_size, seq_length, vocabulary_size, lstm_num_hidden, lstm_num_layers, device).to(device) model.load_state_dict( torch.load('results_grim/model_final.pickle', map_location='cpu')) dataset = TextDataset(filename='assets/book_EN_grimms_fairy_tails.txt', seq_length=30) print(generate_greedy(model, dataset, 30)) print(generate_temperature(model, dataset, 30, 2)) print(generate_greedy_given(model, dataset, 180)) print(generate_temperature_given(model, dataset, 180, T=0.5))
def train(config): # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1, drop_last=True) vocab_size = dataset.vocab_size # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, vocab_size, config.lstm_num_hidden, config.lstm_num_layers, config.device) model = model.to(device) print(model) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() # if pickle file is available, load steps and use index -1 to get last step + get lists of values, to continue training # where we left off if os.path.isfile("steps.p"): print('Pre-trained model available...') print('Resuming training...') # load lists step_intervals = pickle.load(open("steps.p", "rb")) all_sentences = pickle.load(open("sentences.p", "rb")) accuracy_list = pickle.load(open("accuracies.p", "rb")) loss_list = pickle.load(open("loss.p", "rb")) model_info = pickle.load(open("model_info.p", "rb")) # start where we left off all_steps = step_intervals[-1] # load model Modelname = 'TrainIntervalModel' + model_info[0] + 'acc:' + model_info[ 1] + '.pt' model = torch.load(Modelname) model = model.to(device) # otherwise start training from a clean slate else: print('No pre-trained model available...') print('Initializing training...') # create lists to keep track of data while training all_sentences = [] step_intervals = [] accuracy_list = [] loss_list = [] # initialize total step counter all_steps = 0 # initialize optimizer with starting learning rate optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) # initialize optimizer with previous learning rate. (extract from pickle then use scheduler) scheduler = torch.optim.lr_scheduler.StepLR( optimizer, step_size=config.learning_rate_step, gamma=config.learning_rate_decay) # since the nested for loop stops looping after a complete iteration through the data_loader, add for loop for epochs for epoch in range(config.epochs): print(model) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # apply scheduler scheduler.step() # create 2D tensor instead of list of 1D tensors #batch_inputs = torch.stack(batch_inputs) batch_inputs = batch_inputs.to(device) h, c = model.init_hidden() out, (h, c) = model(batch_inputs, h, c) # transpose to match cross entropy input dimensions out.transpose_(1, 2) batch_targets = batch_targets.to(device) ####################################################### # Add more code here ... ####################################################### loss = criterion(out, batch_targets) max = torch.argmax(out, dim=1) correct = (max == batch_targets) accuracy = torch.sum( correct).item() / correct.size()[0] / correct.size()[1] # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.print_every == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, int(config.train_steps), config.batch_size, examples_per_second, accuracy, loss)) if all_steps % config.sample_every == 0: ############################### # Generate generated sequence # ############################### # do not keep track of gradients during model evaluation with torch.no_grad(): # create random character to start sentence with random_input = torch.randint(0, vocab_size, (config.batch_size, ), dtype=torch.long).view(-1, 1) x_input = random_input.to(device) # initialize hidden state and cell state h, c = model.init_hidden() h = h.to(device) c = c.to(device) sentences = x_input # loop through sequence length to set generated output as input for next sequence for i in range(config.seq_length): # get randomly generated sentence out, (h, c) = model(x_input, h, c) #################### # Temperature here # #################### # check whether user wants to apply temperature sampling if config.temperature: # apply temperature sampling out = out / config.tempvalue out = F.softmax(out, dim=2) # create a torch distribution of the calculated softmax probabilities and sample from that distribution distribution = torch.distributions.categorical.Categorical( out.view(config.batch_size, vocab_size)) out = distribution.sample().view(-1, 1) # check whether user wants to apply greedy sampling else: # load new datapoint by taking the predicted previous letter using greedy approach out = torch.argmax(out, dim=2) # append generated character to total sentence sentences = torch.cat((sentences, out), 1) x_input = out # pick a random sentence (from the batch of created sentences) index = np.random.randint(0, config.batch_size, 1) sentence = sentences[index, :] # squeeze sentence into 1D sentence = sentence.view(-1).cpu() # print sentence print(dataset.convert_to_string(sentence.data.numpy())) # save sentence all_sentences.append(sentence.data.numpy()) ########################## # Save loss and accuracy # ########################## # save loss value loss = loss.cpu() loss_list.append(loss.data.numpy()) # save accuracy value accuracy_list.append(accuracy) # save step interval step_intervals.append(all_steps) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break # counter of total amounts of steps (keep track over multiple training sessions) all_steps += 1 if config.savefiles: # pickle sentences and steps pickle.dump(all_sentences, open('sentences.p', 'wb')) pickle.dump(step_intervals, open('steps.p', 'wb')) # pickle accuracy and loss pickle.dump(accuracy_list, open('accuracies.p', 'wb')) pickle.dump(loss_list, open('loss.p', 'wb')) # save model Modelname = 'TrainIntervalModel' + str(epoch) + 'acc:' + str( accuracy) + '.pt' torch.save(model, Modelname) model_info = [str(epoch), str(accuracy)] pickle.dump(model_info, open('model_info.p', 'wb')) print('Done training.')
def train(config): seed = config.seed np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # Initialize the device which to run the model on device = torch.device(config.device) #device = torch.device('cpu') # Initialize the dataset and data loader (note the +1) dataset = TextDataset(filename=config.txt_file, seq_length=config.seq_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=0) # Initialize the model that we are going to use model = TextGenerationModel(config, dataset._vocab_size, device).to(device) print('device:', device.type) print('Model defined. Number of trainable params:', model.numTrainableParameters()) print(model) model.numTrainableParameters() testLSTM(dataset, data_loader, model, config, device) # Setup the loss and optimizer criterion = torch.nn.NLLLoss() optimizer = optim.AdamW(model.parameters(), config.learning_rate) scheduler = torch.optim.lr_scheduler.StepLR( optimizer, step_size=config.learning_rate_step, gamma=config.learning_rate_decay) selfGenTHRES = 0 maxTrainAcc = 0 acc_plt = [] loss_plt = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() ####################################################### # Add more code here ... ####################################################### X = torch.stack(batch_inputs).to( device) # (seq_len,bsize), input sequence T = torch.stack(batch_targets).to( device) # (seq_len,bsize), ground truth sequence model.zero_grad() h, C = model.init_cell(config.batch_size) logprobs, _, _ = model(X, h, C) # (seq_len,bsize,voc_size) loss = criterion( logprobs.reshape(config.seq_length * config.batch_size, dataset.vocab_size), T.reshape(-1)) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) optimizer.step() scheduler.step() predchar = torch.argmax( logprobs, dim=2 ) # (seq_len,bsize) the predicted characters: selected highest logprob for each sequence and example in the mini batch accuracy = torch.sum(predchar == T).item() / (config.batch_size * config.seq_length) loss_plt.append(loss) acc_plt.append(accuracy) # Save model with max train accuracy (I will use this for this toy example with batch_size*seq_len character predictions. # Of course this should be based on a larger test dataset if accuracy > maxTrainAcc: maxTrainAcc = accuracy torch.save( { 'step': step, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': loss, 'accuracy': accuracy }, "saved_model.tar") # If a new accuracy level (steps of 0.1) is reached, print five self-generated sentences if accuracy > selfGenTHRES: selfGenTHRES += 0.1 print( '\n#################################### SAMPLE SELF GENERATED SEQUENCES #######################################' ) print('# Step:', step, ', loss:', loss, 'accuracy', accuracy) print('# ') print('# Greedy sampling [a...]:', generateSequenceGreedy(dataset, model, device, 70, 'a')) print('# Greedy sampling [b...]:', generateSequenceGreedy(dataset, model, device, 70, 'b')) print('# Greedy sampling [c...]:', generateSequenceGreedy(dataset, model, device, 70, 'c')) print('# Greedy sampling [d...]:', generateSequenceGreedy(dataset, model, device, 70, 'd')) print('# Greedy sampling [e...]:', generateSequenceGreedy(dataset, model, device, 70, 'e')) print('#') print('# Output of last training example:') print('# INPUT....: ', end="") printSequence(X, 0, dataset) print('# TARGET...: ', end="") printSequence(T, 0, dataset) print('# PREDICTED: ', end="") printSequence(predchar, 0, dataset) print('#') print( '############################################################################################################\n' ) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if (step + 1) % config.print_every == 0: # Print training update print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, \ Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) print('best training acc', maxTrainAcc) if (step + 1) % (config.train_steps // 3) == 0: # Generate some sentences by sampling from the model print( '\n#################################### SAMPLE SELF GENERATED SEQUENCES #######################################' ) print('# Step:', step, ', loss:', loss, 'accuracy', accuracy) print('# Greedy sampling [a...]:', generateSequenceGreedy(dataset, model, device, 30, 'a')) print( '############################################################################################################\n' ) if step == config.train_steps: # If you receive a PyTorch data-loader error, # check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') Testaccuracy = getTestAccuracy(dataset, data_loader, model, config, device, 200) pltLossAcc(loss_plt, acc_plt, config)
def train(config): # Initialize the device which to run the model on device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # Initialize the dataset and data loader (note the +1) abs_path = os.path.abspath(config.txt_file) dataset = TextDataset(abs_path, config.seq_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel(batch_size=config.batch_size, seq_length=config.seq_length, vocabulary_size=dataset.vocab_size, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, device=device) experiment_label = "{}_".format(datetime.now().strftime("%Y-%m-%d %H:%M")) for key, value in vars(config).items(): experiment_label += "{}={}_".format(key, value) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) # TODO: configure learning rate scheduler for epoch in range(1, config.epochs + 1): for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() X = torch.stack(batch_inputs, dim=1) X = one_hot(X, dataset.vocab_size) Y = torch.stack(batch_targets, dim=1) X, Y = X.to(device), Y.to(device) # forward pass outputs, _ = model(X) # compute training metrics loss = criterion(outputs.transpose(2, 1), Y) predictions = get_predictions(outputs) accuracy = (Y == predictions).sum().item() / reduce(lambda x,y: x*y, Y.size()) # backward pass model.zero_grad() loss.backward() optimizer.step() # clip gradients to prevent them form exploding torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % config.print_every == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), epoch*step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss )) torch.save(model, 'grimm/grimm_epoch_{}.pt'.format(epoch)) # _ = xp.to_zip(experiment_label + ".zip") print('Done training.')
def train(config): if config.tensorboard: writer = SummaryWriter(config.summary + datetime.now().strftime("%Y%m%d-%H%M%S")) # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # fixme data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, device=config.device) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) for epoch in range(config.epochs): for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() ####################################################### # Add more code here ... ####################################################### optimizer.zero_grad() # Set to float LongTensor output dtype of one_hot produces internal error for forward batch_inputs = torch.nn.functional.one_hot( batch_inputs, num_classes=dataset.vocab_size).float().to(device) batch_targets = batch_targets.to(device) out, _ = model.forward(batch_inputs) #Expected size 64 x 87 x 30 got 64 x 30 x 87 to compute with 64 x 30 loss = criterion(out.permute(0, 2, 1), batch_targets) loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) optimizer.step() predictions = out.argmax(dim=-1) accuracy = (predictions == batch_targets).float().mean() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.print_every == 0: print( "[{}] Train Step {:04d}/{:04d}, Epoch {:d} Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, int(config.train_steps), epoch, config.batch_size, examples_per_second, accuracy, loss)) if config.tensorboard: writer.add_scalar('training_loss', loss, step) writer.add_scalar('accuracy', accuracy, step) if step % config.sample_every == 0: # Generate some sentences by sampling from the model # print(f'shape state {state[1].shape}') # sys.exit(0) generate_sentence(step, model, config, dataset) # pass if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 print('Done training.') break
def train(config): tf.reset_default_graph() # Initialize the text dataset dataset = TextDataset(config.txt_file, config.clean_data) # Initialize the model model = TextGenerationModel(batch_size=config.batch_size, seq_length=config.seq_length, vocabulary_size=dataset.vocab_size, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, embed_dim=config.embed_dim, decoding_model=config.decoding_mode) ########################################################################### # Implement code here. ########################################################################### warmup_seq = tf.placeholder(dtype=tf.int32, shape=(None, 1), name='warmup_decoding_sequences') warmup_decodes = model.decode_warmup(warmup_seq, config.decode_length) init_decode_char = tf.placeholder(dtype=tf.int32, shape=(config.num_rand_samples), name='rand_init_decoding') random_decodes = model.decode(decode_batch_size=config.num_rand_samples, init_input=init_decode_char, decode_length=config.decode_length, init_state=None) # Reproducibility # tf.set_random_seed(42) # np.random.seed(42) # Utility vars and ops gpu_opts = tf.GPUOptions( per_process_gpu_memory_fraction=config.gpu_mem_frac, allow_growth=True) session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_opts)) global_step = tf.Variable(0, trainable=False, name='global_step') # logging train_logdir = os.path.join(config.summary_path, '{}_train'.format(config.model_name)) train_log_writer = init_summary_writer(session, train_logdir) # Define the optimizer if config.optimizer.lower() == 'rmsprop': optimizer = tf.train.RMSPropOptimizer( learning_rate=config.learning_rate, decay=config.learning_rate_decay) elif config.optimizer.lower() == 'adam': optimizer = tf.train.AdamOptimizer(config.learning_rate) # Compute the gradients for each variable grads_and_vars = optimizer.compute_gradients(model.loss) grads, variables = zip(*grads_and_vars) grads_clipped, _ = tf.clip_by_global_norm( grads, clip_norm=config.max_norm_gradient) apply_gradients_op = optimizer.apply_gradients(zip(grads_clipped, variables), global_step=global_step) saver = tf.train.Saver(max_to_keep=50) save_path = os.path.join(config.checkpoint_path, '{}/model.ckpt'.format(config.model_name)) _ensure_path_exists(save_path) # Summaries summary_op = tf.summary.merge_all() session.run(fetches=[ tf.global_variables_initializer(), tf.local_variables_initializer() ]) for train_step in range(int(config.train_steps)): # dim: [batch_size, time_step] batch_inputs, batch_labels = dataset.batch( batch_size=config.batch_size, seq_length=config.seq_length) # Time-major: [time_step, batch_size] batch_inputs = batch_inputs.T # Only for time measurement of step through network t1 = time.time() ####################################################################### # Implement code here ####################################################################### train_feed = {model.inputs: batch_inputs, model.labels: batch_labels} fetches = [model.loss, apply_gradients_op] if train_step % config.print_every == 0: fetches += [summary_op] loss, _, summary = session.run(feed_dict=train_feed, fetches=fetches) train_log_writer.add_summary(summary, train_step) else: loss, _ = session.run(feed_dict=train_feed, fetches=fetches) # Only for time measurement of step through network t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # Output the training progress if train_step % config.print_every == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, Loss = {}" .format(datetime.now().strftime("%Y-%m-%d %H:%M"), train_step + 1, int(config.train_steps), config.batch_size, examples_per_second, loss)) # Decode if train_step % config.sample_every == 0: # warmup_seq = tf.placeholder(dtype=tf.int32, shape=(None, 5), name='warmup_decoding_sequences') # decoded_seqs = model.decode_warmup(warmup_seq, config.decode_length) # # init_decode_char = tf.placeholder(dtype=tf.int32, shape=(config.num_rand_samples), # name='rand_init_decoding') # random_decodes = model.decode(decode_batch_size=config.num_rand_samples, init_input=init_decode_char, # decode_length=config.decode_length, init_state=None) # random character sampling print('Random character sampling') rand_chars = np.random.choice(a=dataset.vocab_size, size=(config.num_rand_samples)) decode_feed = {init_decode_char: rand_chars} decoded_tokens = session.run(fetches=[random_decodes], feed_dict=decode_feed)[0] decoded_tokens = np.array(decoded_tokens).T for i in range(decoded_tokens.shape[0]): print('{}|{}'.format( dataset._ix_to_char[rand_chars[i]], dataset.convert_to_string(decoded_tokens[i, :]))) print('Warmup sequence sampling') warmups = [ 'Welcome to the planet Earth ', 'Human beings grew up in forests ', 'Satan said ', 'God is not ', 'theory of evolution ', 'whole groups of species ' ] for warmup in warmups: warmup_tokens = np.array([ dataset._char_to_ix[x] for x in warmup.lower() if x in dataset._char_to_ix ]).reshape((-1, 1)) feed = {warmup_seq: warmup_tokens} decoded_tokens = session.run(fetches=[warmup_decodes], feed_dict=feed)[0] print('{}|{}'.format( warmup, dataset.convert_to_string( decoded_tokens.squeeze().tolist()))) if train_step % config.checkpoint_every == 0: saver.save(session, save_path=save_path) train_log_writer.close()
def train(config): # Print all configs to confirm parameter settings print_flags() assert config.sampling_method in ('greedy', 'random') assert config.generate_mode in ('generate', 'finish') # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(filename=config.txt_file, seq_length=config.seq_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel(batch_size=config.batch_size, seq_length=config.seq_length, vocabulary_size=dataset.vocab_size, dropout=1-config.dropout_keep_prob, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, device=device) model.to(device) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) epoch = 10 # Store some measures los = list() iteration = list() acc = list() max_step = 0 for i in range(epoch): for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() model.train() optimizer.zero_grad() batch_inputs = torch.stack(batch_inputs).to(device) batch_targets = torch.stack(batch_targets).to(device) h_0 = torch.zeros(config.lstm_num_layers, batch_inputs.shape[1], config.lstm_num_hidden).to(device) c_0 = torch.zeros(config.lstm_num_layers, batch_inputs.shape[1], config.lstm_num_hidden).to(device) pred, _, _ = model(batch_inputs, h_0, c_0) accuracy = compute_accuracy(pred, batch_targets) pred = pred.permute(1, 2, 0) batch_targets = batch_targets.permute(1, 0) loss = criterion(pred, batch_targets) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if (step + i * max_step) % config.print_every == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step + i * max_step, int(config.train_steps), config.batch_size, examples_per_second, accuracy, loss )) iteration.append(step + i * max_step) acc.append(accuracy) los.append(loss) if max_step < step: max_step = step if (step + i * max_step) % config.sample_every == 0: model.eval() batch_sample = 5 if config.generate_mode == 'finish': generated = [dataset._char_to_ix[c] for c in config.input_seq] generated = torch.LongTensor(generated).view(-1, 1).to(device) for l in range(config.generate_length): if l == 0: h_s = torch.zeros(config.lstm_num_layers, 1, config.lstm_num_hidden).to(device) c_s = torch.zeros(config.lstm_num_layers, 1, config.lstm_num_hidden).to(device) gen, h_s, c_s = model(generated, h_s, c_s) gen = torch.unsqueeze(gen[-1], 0) else: gen, h_s, c_s = model(gen, h_s, c_s) if config.sampling_method == 'greedy': gen = gen.argmax(dim=2) else: gen = nn.functional.softmax(gen/config.temperature, dim=2) dist = torch.distributions.categorical.Categorical(gen) gen = dist.sample() generated = torch.cat((generated, gen)) else: generated = [dataset._char_to_ix[random.choice(dataset._chars)] for c in range(batch_sample)] generated = torch.LongTensor(generated).view(-1, batch_sample).to(device) for l in range(config.generate_length - 1): if l == 0: h_s = torch.zeros(config.lstm_num_layers, batch_sample, config.lstm_num_hidden).to(device) c_s = torch.zeros(config.lstm_num_layers, batch_sample, config.lstm_num_hidden).to(device) gen, h_s, c_s = model(generated, h_s, c_s) else: gen, h_s, c_s = model(gen, h_s, c_s) if config.sampling_method == 'greedy': gen = gen.argmax(dim=2) else: gen = nn.functional.softmax(gen/config.temperature, dim=2) dist = torch.distributions.categorical.Categorical(gen) gen = dist.sample() generated = torch.cat((generated, gen)) generated = generated.t() sentence = [dataset.convert_to_string(idx) for idx in generated.tolist()] if config.sampling_method == 'random': with open('{}/{}_{}_{}_{}.txt'.format(config.summary_path, config.generate_mode, datetime.now().strftime("%Y-%m-%d"), config.sampling_method, config.temperature), 'a', encoding='utf-8') as file: file.write('--------------\n') file.write('Training Step: {}\n'.format(step + i * max_step)) file.write('--------------\n') for sen in sentence: file.write('{}\n'.format(sen)) file.write('\n') file.close() else: with open('{}/{}_{}_{}.txt'.format(config.summary_path, config.generate_mode, datetime.now().strftime("%Y-%m-%d"), config.sampling_method), 'a', encoding='utf-8') as file: file.write('--------------\n') file.write('Training Step: {}\n'.format(step + i * max_step)) file.write('--------------\n') for sen in sentence: file.write('{}\n'.format(sen)) file.write('\n') file.close() if (step + i * max_step) == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break if (step + i * max_step) == config.train_steps: break print('Done training.') fig, axs = plt.subplots(1, 2, figsize=(10,5)) axs[0].plot(iteration, acc) axs[0].set_xlabel('Iteration') axs[0].set_ylabel('Accuracy') axs[1].plot(iteration, los) axs[1].set_xlabel('Iteration') axs[1].set_ylabel('Loss') fig.tight_layout() plt.show()
def train(config): # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # fixme data_loader = DataLoader(dataset, config.batch_size, num_workers=1) test_loader = DataLoader(dataset, config.test_size, num_workers=1) results = open(config.out_file, "w+") results.write( "#model_type : {}-layer LSTM\n#seq_length : {}\n#input_dim : {}\n#num_classes : {}\n#num_hidden :\ {}\n#batch_size : {}\n#learn_rate : {}\n#train_steps : {}\n#max_norm : {}\n#lr_decay : {}\n#lr_step :\ {}\n".format(config.lstm_num_layers, config.seq_length, dataset.vocab_size, dataset.vocab_size, config.lstm_num_hidden, config.batch_size, config.learning_rate, config.train_steps, config.max_norm, config.learning_rate_decay, config.learning_rate_step)) results.write("#train_step accuracy loss\n") gen_text = open(config.out_file[:-4] + ".txt", 'w+', encoding="utf-8") # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, device=device).to(device) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() #train optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) prevstep = 0 while True: #otherwise it stop after 1 epoch for step, (batch_inputs, batch_targets) in enumerate(data_loader): step = prevstep + step batch_inputs = torch.nn.functional.one_hot( batch_inputs.type(torch.LongTensor), dataset.vocab_size).type(torch.FloatTensor).to(device) batch_targets = batch_targets.to(device) optimizer.zero_grad() batch_y = model(batch_inputs) #without softmax, dim: B x T x C #prevent gradients from exploding, not sure if still necessary torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) # Only for time measurement of step through network t1 = time.time() loss = criterion(batch_y.transpose(1, 2), batch_targets) loss.backward() optimizer.step( ) if step > 0 else 0 #to be able to test initial model # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.print_every == 0: predictions = torch.argmax(torch.softmax(batch_y, 2), 2) accuracy = torch.sum(predictions == batch_targets).type( torch.FloatTensor) / config.batch_size / config.seq_length # #uncomment for printing # print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " # "Accuracy = {:.2f}, Loss = {:.3f}".format( # datetime.now().strftime("%Y-%m-%d %H:%M"), step, # int(config.train_steps), config.batch_size, examples_per_second, # accuracy, loss)) #writing results results.write("%d %.3f %.3f\n" % (step, accuracy, loss)) optimizer.step() if step == 0 else 0 if np.round(accuracy, 2) == 1.00: print("Achieved >99.95% accuracy.") break if step % config.sample_every == 0: gen_text.write("--- Step: {} ---\n".format(step)) with torch.no_grad(): #get random char from alphabet rnd_char = np.random.choice(list(map(chr, range(97, 123)))).upper() prev = torch.zeros(dataset.vocab_size).to(device) prev[dataset._chars.index(rnd_char)] = 1 prev = prev.view(1, 1, -1) #dim: B x T x D #feed to network, maybe a bit redundant for i in range(config.out_seq - 1): gen_y = model(prev) #dim: B x T x C char = torch.zeros(dataset.vocab_size).to(device) softm = torch.softmax( config.temp * gen_y[0, -1, :], 0).squeeze() #temperature included # char[np.random.choice(np.arange(dataset.vocab_size),p=np.array(softm.cpu()))] = 1 char[torch.argmax( softm )] = 1 #greedy, uncomment prev line for random prev = torch.cat([prev, char.view(1, 1, -1)], 1) txt = dataset.convert_to_string( torch.argmax(prev, 2).squeeze().cpu()) gen_text.write(txt + "\n\n") if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break prevstep = step if np.round(accuracy, 2) == 1.00 or step == config.train_steps: break print('Done training.') #Saving model doesn't work #hard-coding temperatures as solution with torch.no_grad(): length = 500 gen_text.write("--- Greedy ---\n") #get random char from alphabet rnd_char = np.random.choice(list(map(chr, range(97, 123)))).upper() prev = torch.zeros(dataset.vocab_size).to(device) prev[dataset._chars.index(rnd_char)] = 1 prev = prev.view(1, 1, -1) #dim: B x T x D #feed to network, maybe a bit redundant for i in range(length - 1): gen_y = model(prev) #dim: B x T x C char = torch.zeros(dataset.vocab_size).to(device) softm = torch.softmax(config.temp * gen_y[0, -1, :], 0).squeeze() #temperature included # char[np.random.choice(np.arange(dataset.vocab_size),p=np.array(softm.cpu()))] = 1 char[torch.argmax(softm)] = 1 #greedy prev = torch.cat([prev, char.view(1, 1, -1)], 1) txt = dataset.convert_to_string(torch.argmax(prev, 2).squeeze().cpu()) gen_text.write(txt + "\n\n") for t in [0.5, 1.0, 2.0]: gen_text.write("--- Temperature: {} ---\n".format(t)) #get random char from alphabet rnd_char = np.random.choice(list(map(chr, range(97, 123)))).upper() prev = torch.zeros(dataset.vocab_size).to(device) prev[dataset._chars.index(rnd_char)] = 1 prev = prev.view(1, 1, -1) #dim: B x T x D #feed to network, maybe a bit redundant for i in range(length - 1): gen_y = model(prev) #dim: B x T x C char = torch.zeros(dataset.vocab_size).to(device) softm = torch.softmax(t * gen_y[0, -1, :], 0).squeeze() #temperature included char[np.random.choice(np.arange(dataset.vocab_size), p=np.array(softm.cpu()))] = 1 # char[torch.argmax(softm)] = 1 #greedy prev = torch.cat([prev, char.view(1, 1, -1)], 1) txt = dataset.convert_to_string( torch.argmax(prev, 2).squeeze().cpu()) gen_text.write(txt + "\n\n") gen_text.write("--- Temperature: {}. Finish ---\n".format(t)) finish = "Sleeping beauty is " prev = torch.zeros(1, len(finish), dataset.vocab_size).to(device) for i, s in enumerate(finish): prev[0, i, dataset._chars.index(s)] = 1 for i in range(length - len(finish)): gen_y = model(prev) #dim: B x T x C char = torch.zeros(dataset.vocab_size).to(device) softm = torch.softmax(t * gen_y[0, -1, :], 0).squeeze() #temperature included char[np.random.choice(np.arange(dataset.vocab_size), p=np.array(softm.cpu()))] = 1 # char[torch.argmax(softm)] = 1 #greedy prev = torch.cat([prev, char.view(1, 1, -1)], 1) txt = dataset.convert_to_string( torch.argmax(prev, 2).squeeze().cpu()) gen_text.write(txt + "\n\n") results.close() gen_text.close()
def train(config): # Initialize the device which to run the model on device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # fixme data_loader = DataLoader(dataset, batch_size = config.batch_size, shuffle=True, num_workers=1) vocab_size = dataset.vocab_size # char2i = dataset._char_to_ix # i2char = dataset._ix_to_char # ---------------------------------------- # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, vocab_size, \ config.lstm_num_hidden, config.lstm_num_layers, device) # fixme model.to(device) # Setup the loss and optimizer criterion = nn.NLLLoss() # fixme optimizer = optim.RMSprop(model.parameters(), lr = config.learning_rate) # fixme logSoftmax = nn.LogSoftmax(dim=2) # Learning rate scheduler lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, \ step_size=config.learning_rate_step, gamma=config.learning_rate_decay) step = 1 if config.resume: if os.path.isfile(config.resume): print("Loading checkpoint '{}'".format(config.resume)) checkpoint = torch.load(config.resume) step = checkpoint['step'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) print("Checkpoint loaded '{}', steps {}".format(config.resume, checkpoint['step'])) if not os.path.isdir(config.summary_path): os.makedirs(config.summary_path) if config.sampling =="greedy": f = open(os.path.join(config.summary_path,"sampled_"+config.sampling+".txt"), "w+") else: f = open(os.path.join(config.summary_path,"sampled_"+config.sampling+"_"+str(config.temp)+".txt"), "w+") best_accuracy = 0.0 pl_loss =[] average_loss =[] acc =[] for epochs in range(30): if step == config.train_steps: print('Done training.') break for (batch_inputs, batch_targets) in data_loader: if config.batch_size!=batch_inputs.size()[0]: print("batch mismatch") break # Only for time measurement of step through network t1 = time.time() model.hidden = model.init_hidden(config.batch_size) model.zero_grad() ####################################################### # Add more code here ... #convert batch inputs to one-hot vector batch_inputs= torch.zeros(config.batch_size, config.seq_length, vocab_size).scatter_(2,batch_inputs.unsqueeze(-1),1.0) batch_inputs, batch_targets = batch_inputs.to(device), batch_targets.to(device) predictions, _ = model(batch_inputs) if config.sampling=="greedy": predictions = logSoftmax(predictions) else: predictions = logSoftmax(predictions/config.temp) loss = criterion(predictions.transpose(2,1), batch_targets) # fixme _, predictions = torch.max(predictions, dim=2, keepdim=True) predictions = (predictions.squeeze(-1) == batch_targets).float() accuracy = torch.mean(predictions) loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) optimizer.step() lr_scheduler.step() ####################################################### # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) pl_loss.append(loss.item()) average_loss.append(np.mean(pl_loss[:-100:-1])) acc.append(accuracy) if step % config.print_every == 0: print("[{}] Train Step {}/{}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss.item() )) if step % config.sample_every == 0: model.eval() with torch.no_grad(): char_ix = generate_sample(model, vocab_size, config.seq_length, device, config) sentence = dataset.convert_to_string(char_ix) f.write("--------------"+str(step)+"----------------\n") f.write(sentence+"\n") print(sentence) print() model.train() # ########################################################################### # save training loss plt.plot(pl_loss,'r-', label="Batch loss", alpha=0.5) plt.plot(average_loss,'g-', label="Average loss", alpha=0.5) plt.legend() plt.xlabel("Iterations") plt.ylabel("Loss") plt.title("Training Loss") plt.grid(True) # plt.show() if config.sampling == "greedy": plt.savefig("loss_"+config.sampling+".png") else: plt.savefig("loss_"+config.sampling+"_"+str(config.temp)+".png") plt.close() ################################training################################################## plt.plot(acc,'g-', alpha=0.5) plt.xlabel("Iterations") plt.ylabel("Accuracy") plt.title("Train Accuracy") plt.grid(True) if config.sampling == "greedy": plt.savefig("accuracy_"+config.sampling+".png") else: plt.savefig("accuracy_"+config.sampling+"_"+str(config.temp)+".png") plt.close() if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break step+=1 save_checkpoint({ 'epoch': epochs + 1, 'step': step, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler':lr_scheduler.state_dict(), 'accuracy': accuracy }, config) f.close()
def train(config): if not os.path.isdir(CHECKPOINTS_FOLDER): os.mkdir(CHECKPOINTS_FOLDER) # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length, config.batch_size, config.train_steps) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size).to(device=device) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) generated_sentences = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() optimizer.zero_grad() batch_inputs = torch.unsqueeze(torch.stack(batch_inputs), 2).float().to(device=device) batch_targets = torch.cat(batch_targets).to(device=device) predictions = model(batch_inputs, config.batch_size) torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) loss = criterion(predictions, batch_targets) accuracy = get_accuracy(predictions, batch_targets) loss.backward() optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.print_every == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, int(config.train_steps), config.batch_size, examples_per_second, accuracy, loss)) if step % config.sample_every == 0: # Generate some sentences by sampling from the model sentence = generate_sentence(model, dataset, config) generated_sentences.append(sentence) state = { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } torch.save( state, 'checkpoints/{}'.format( config.txt_file.split("/", 1)[1].replace('.txt', ''))) filename = config.txt_file.replace('.txt', '') + 'generated_sentences.txt' f = open(filename, 'w') output_string = '\n'.join(generated_sentences) f.write(output_string) print('Done training.')
def train(config): # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # fixme data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Save the instantiated dataset. with open('model_ckpt/train.dataset', 'wb') as dataset_file: pickle.dump(dataset, dataset_file) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, config.lstm_num_hidden, config.lstm_num_layers, device, config.dropout_keep_prob) # fixme # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() # reduction='mean'(default) - average over all timesteps and all batches as they are merged. optimizer = optim.RMSprop(model.parameters(), config.learning_rate) # fixme # optimizer = optim.Adam(model.parameters(), config.learning_rate) # Create a tensor to hold the one-hot encoding for the batch inputs. onehot_batch_inputs = torch.FloatTensor(config.seq_length, config.batch_size, dataset.vocab_size) onehot_batch_inputs = onehot_batch_inputs.to(device) h_init = torch.zeros(config.lstm_num_layers, config.batch_size, config.lstm_num_hidden, device=device) c_init = torch.zeros(config.lstm_num_layers, config.batch_size, config.lstm_num_hidden, device=device) # Record the learning rate steps individually for learning rate decay. lr_step = 0 lr = 1 for epoch in np.arange(config.epochs): losses = [] accs = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() ####################################################### # Add more code here ... ####################################################### model.train() # Convert the DataLoader output from list of tensors to tensors. batch_inputs = torch.stack(batch_inputs) batch_inputs = batch_inputs.to(device) # If the epoch is finished and there is not enough character to extract, break the loop if batch_inputs.shape[0] * batch_inputs.shape[1] != onehot_batch_inputs.shape[0] * onehot_batch_inputs.shape[1]: break # Zero the one-hot encoding and encode according to batch_inputs. onehot_batch_inputs.zero_() onehot_batch_inputs.scatter_(2, batch_inputs.unsqueeze_(-1), 1) # Convert the DataLoader output from list of tensors to tensors. batch_targets = torch.stack(batch_targets) batch_targets = batch_targets.to(device) # Learning rate decay. if lr_step % config.learning_rate_step == 0: optimizer = optim.RMSprop(model.parameters(), config.learning_rate * lr) lr *= config.learning_rate_decay optimizer.zero_grad() logits, _, _ = model(onehot_batch_inputs, h_init, c_init) # The seq_length dimension and batch_size dimension of the logits and batch_targets are merged together, and the mean is computed over this new dimension. loss = criterion(logits.view(-1, dataset.vocab_size), batch_targets.view(-1)) # fixme loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) accuracy = accuracy_fn(logits.view(-1, dataset.vocab_size), batch_targets.view(-1)) # fixme optimizer.step() losses.append(loss.item()) accs.append(accuracy) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % config.print_every == 0: print("[{}] Epoch {}/{}, Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), epoch + 1, config.epochs, step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss )) if step % config.sample_every == 0: # Generate some sentences by sampling from the model model.eval() # Create tensor to hold the generated samples. samples = torch.zeros((5, config.sample_length), dtype=torch.int, device=device) # Initialize the first characters for the samples. start_chars = torch.randint(dataset.vocab_size, size=(1, 5, 1), dtype=torch.long, device=device) samples[:, 0] = start_chars.squeeze() # Create a tensor to hold the one-hot encoding for the output characters of the LSTM network (one per each time step). onehot_chars = torch.zeros((1, 5, dataset.vocab_size), device=device) onehot_chars.scatter_(2, start_chars, 1) last_h = torch.zeros(config.lstm_num_layers, 5, config.lstm_num_hidden, device=device) last_c = torch.zeros(config.lstm_num_layers, 5, config.lstm_num_hidden, device=device) for t in np.arange(config.sample_length - 1): logits, last_h, last_c = model(onehot_chars, last_h, last_c) next_chars = logits.squeeze().argmax(-1) onehot_chars.zero_() onehot_chars.scatter_(2, next_chars.view(1, 5, 1), 1) samples[:, t + 1] = next_chars samples = samples.tolist() samples = [dataset.convert_to_string(sample) for sample in samples] # Output the samples into a text file. with open(config.summary_path + 'samples.txt', 'a') as txt_file: txt_file.write('Epoch: {}\nStep: {}\n'.format(epoch + 1, step)) txt_file.writelines(map(lambda x: x + '\n', samples)) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break lr_step += 1 # After each training epoch, save the model and the training loss and accuracy. model.train() torch.save(model.state_dict(), 'model_ckpt/lstm_gen_epoch{}.ckpt'.format(epoch + 1)) with open(config.summary_path + 'train_epoch{}.csv'.format(epoch + 1), 'w', newline='') as csv_file: csv_writer = csv.writer(csv_file) csv_writer.writerow(losses) csv_writer.writerow(accs) print('Done training.')
def evaluate(config): # Initialize the device which to run the model on device = torch.device(config.device) # Load the dataset with open(config.dataset, 'rb') as dataset_file: dataset = pickle.load(dataset_file) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, config.lstm_num_hidden, config.lstm_num_layers, device, config.dropout_keep_prob) # fixme model.load_state_dict(torch.load(config.ckpt)) # Generate some sentences by sampling from the model model.eval() # Create tensor to hold the generated samples. samples = torch.zeros((config.sample_batch_size, config.sample_length), dtype=torch.int, device=device, requires_grad=False) last_h = torch.zeros(config.lstm_num_layers, config.sample_batch_size, config.lstm_num_hidden, device=device, requires_grad=False) last_c = torch.zeros(config.lstm_num_layers, config.sample_batch_size, config.lstm_num_hidden, device=device, requires_grad=False) if config.pre_text: pre_input = torch.tensor( [dataset._char_to_ix[ch] for ch in config.pre_text] * 10, device=device, requires_grad=False).view(config.sample_batch_size, -1).t().unsqueeze(-1) onehot_pre_input = torch.zeros( (pre_input.shape[0], pre_input.shape[1], dataset.vocab_size), device=device, requires_grad=False) onehot_pre_input.scatter_(2, pre_input, 1) logits, last_h, last_c = model(onehot_pre_input, last_h, last_c) logits = nn.functional.softmax(logits[-1, :, :].unsqueeze(-1) / config.temperature, dim=1) start_chars = logits.squeeze().argmax(-1) samples[:, 0] = start_chars onehot_chars = torch.zeros( (1, config.sample_batch_size, dataset.vocab_size), device=device, requires_grad=False) onehot_chars.scatter_(2, start_chars.view(1, config.sample_batch_size, 1), 1) else: # Initialize the first characters for the samples. start_chars = torch.randint(dataset.vocab_size, size=(1, config.sample_batch_size, 1), dtype=torch.long, device=device, requires_grad=False) samples[:, 0] = start_chars.squeeze() # Create a tensor to hold the one-hot encoding for the output characters of the LSTM network (one per each time step). onehot_chars = torch.zeros( (1, config.sample_batch_size, dataset.vocab_size), device=device, requires_grad=False) onehot_chars.scatter_(2, start_chars, 1) for t in np.arange(config.sample_length - 1): logits, last_h, last_c = model(onehot_chars, last_h, last_c) logits = nn.functional.softmax(logits / config.temperature, dim=2) next_chars = logits.squeeze().argmax(-1) onehot_chars.zero_() onehot_chars.scatter_(2, next_chars.view(1, config.sample_batch_size, 1), 1) samples[:, t + 1] = next_chars samples = samples.tolist() samples = [dataset.convert_to_string(sample) for sample in samples] # Output the samples into a text file. with open(config.summary_path + 'samples.txt', 'a') as txt_file: txt_file.write('Temperature: {}\nSample length: {}\n'.format( config.temperature, config.sample_length)) txt_file.writelines(map(lambda x: config.pre_text + x + '\n', samples)) print('Done evaluation.')
def train(config, lr): # Initialize the device which to run the model on #device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # fixme data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel( batch_size=config.batch_size, seq_length=config.seq_length, vocabulary_size=dataset.vocab_size, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers) # fixme if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' print('Currently using: ', device) model = model.to(device) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() # fixme #optimizer = torch.optim.Adam(model.parameters(), lr = config.learning_rate, amsgrad = True) # fixme #optimizer = torch.optim.Adam(model.parameters(), lr = lr, amsgrad = True) acc_list = [] loss_list = [] test_batches_in = [] test_batches_ta = [] test_acc = [] best_accuracy = 0 ### Flag for temperature temp = True temp_value = 2 for runs in range(3): optimizer = torch.optim.RMSprop(model.parameters(), lr=lr) for step, (batch_inputs, batch_targets) in enumerate(data_loader): if step % config.print_every != 0 or step == 0: t1 = time.time() #print(type(step)) #model.train() ####################################################### torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) zerox = create_zerox(batch_inputs, dataset.vocab_size, device) output, _ = model.forward(zerox) #.to(device) targets = torch.stack(batch_targets).to(device) output_indices = torch.argmax(output, dim=2).to(device) output = output.transpose(0, 1).transpose(1, 2).to(device) #print(output.shape, targets.shape) #return 'a' #print(output.transpose(0,2).shape, targets.t().shape) #return 'a' loss_for_backward = criterion(output.transpose(0, 2), targets.t()).to(device) optimizer.zero_grad() loss_for_backward.backward() optimizer.step() correct_indices = output_indices == targets.transpose( 0, 1).to(device) #return correct_indices ####################################################### #loss = criterion.forward(output, targets) #accuracy = int(sum(sum(correct_indices)))/int(correct_indices.shape[0]* #correct_indices.shape[1]) #print(type(accuracy),type(loss)) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.print_every == 0 and step != 0: #model.eval() zerox = create_zerox(batch_inputs, dataset.vocab_size, device) output, _ = model.forward(zerox) output_indices = torch.argmax(output, dim=2).to(device) output = output.transpose(0, 1).transpose(1, 2).to(device) targets = torch.stack(batch_targets).to(device) #loss_for_backward = criterion(output,targets).to(device) loss_for_backward = criterion(output.transpose(0, 2), targets.t()).to(device) correct_indices = output_indices == targets.transpose( 0, 1) #.to(device) #return output_indices, targets.transpose(0,1) #print(correct_indices.shape) #accuracy = sum(acc_list) / len(acc_list) #accuracy = int(sum(sum(correct_indices)))/int(correct_indices.numel()) accuracy = np.array(correct_indices.detach().cpu()).mean() #print("[{}] Train Step {:04d}/{:f}, Batch Size = {}, Examples/Sec = {:.2f}, " # "Accuracy = {:.2f}, Loss = {:.3f}".format( # datetime.now().strftime("%Y-%m-%d %H:%M"), step, # config.train_steps, config.batch_size, examples_per_second, # accuracy, # loss_for_backward #)) acc_list.append(accuracy) loss_list.append(float(loss_for_backward)) if accuracy > best_accuracy: torch.save( { 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, 'model.pth') if step % config.sample_every == 0: # Generate some sentences by sampling from the model ## Generate a good sample instead of the same one over and over again #model.eval() ### Append every modulo batch to a list of test batches and run ### over that list to test zerox = create_zerox(batch_inputs, dataset.vocab_size, device) test_batches_in.append(zerox) targets = torch.stack(batch_targets).to(device) test_batches_ta.append(targets) batch_inputz = torch.stack(batch_inputs).to(device) batch_input = batch_inputz.transpose(1, 0).to(device) output, _ = model.forward(zerox) #.to(device) output_indices = torch.argmax(output, dim=2).to(device) output = output.transpose(0, 1).transpose(1, 2).to(device) loss_for_backward = criterion(output, targets).to(device) correct_indices = output_indices == targets.transpose( 0, 1).to(device) best_sample = np.argmax( np.asarray(sum(correct_indices.t().detach().cpu()))) print( 'Real: ', dataset.convert_to_string( np.asarray(batch_input[best_sample].cpu()))) output, _ = model.forward(zerox) #.to(device) output_indices = torch.argmax(output, dim=2).to(device) print( 'prediction: ', dataset.convert_to_string( np.asarray(output_indices[best_sample].cpu()))) bc = int(sum(correct_indices.t().detach().cpu()) [best_sample]) / config.seq_length print('This sample had:', bc, 'characters right') output = np.random.randint(dataset.vocab_size) letters = [output] greedy_output = np.random.randint(dataset.vocab_size) greedy_letters = [greedy_output] Temperature_time(runs, step, dataset, device, model) for i in range(config.seq_length - 1): #if temp: # ============================================================================= # # soft = torch.nn.Softmax(dim=2) # # # # # zerol = torch.zeros([1,1,dataset.vocab_size]) # one_hot_letter = torch.tensor(output).unsqueeze(-1).unsqueeze(-1).unsqueeze(-1) # zerol.scatter_(2,one_hot_letter,1) # zerol = zerol.to(device) # if i == 0: # output, h = model.forward(zerol) # # else: # output, h = model.forward(zerol, h) # # tempered = soft(output/temp_value) # #print(tempered) # output = int(torch.multinomial(tempered[0][0],1).detach().cpu()) # #print(output) # letters.append(output) # ============================================================================= greedy_zerol = torch.zeros([1, 1, dataset.vocab_size]) greedy_one_hot_letter = torch.tensor( greedy_output).unsqueeze(-1).unsqueeze(-1).unsqueeze( -1) greedy_zerol.scatter_(2, greedy_one_hot_letter, 1) greedy_zerol = greedy_zerol.to(device) if i == 0: greedy_output, greedy_h = model.forward(greedy_zerol) else: greedy_output, greedy_h = model.forward( greedy_zerol, greedy_h) greedy_output = int( torch.argmax(greedy_output, dim=2).detach().cpu()) greedy_letters.append(greedy_output) print('Greedy Generation ', dataset.convert_to_string(greedy_letters)) abs_step = (runs * 10000) + step line = ' '.join(('Step:', str(abs_step), dataset.convert_to_string(letters))) with open('GreedyGeneration.txt', 'a') as file: file.write(line + '\n') # ============================================================================= # if step % (config.sample_every*1000) ==0: # avg = [] # print('Testing over ', len(test_batches_in), 'batches') # for z in range(len(test_batches_in)): # ##OUTPUT # output,_ = model.forward(test_batches_in[z]) # output_indices = torch.argmax(output, dim=2).to(device) # output = output.transpose(0,1).transpose(1,2).to(device) # # ##LOSS AND ACCURACY # loss_for_backward = criterion(output,targets).to(device) # correct_indices = output_indices == test_batches_ta[z].transpose(0,1).to(device) # # accuracy = int(sum(sum(correct_indices)))/int(correct_indices.shape[0]* # correct_indices.shape[1]) # # avg.append(accuracy) # # this_test_acc = sum(avg)/len(avg) # print('The test accuracy over ',len(test_batches_in), 'is: ', this_test_acc) # test_acc.append(this_test_acc) # #if bc > 0.8: # # print(bc) # # #return correct_indices # # ============================================================================= if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') line = ' '.join( ('Test accuracy:', str(test_acc.append), 'Learning rate:', str(lr), 'Accuracy:', str(acc_list), 'Loss:', str(loss_list))) with open('textresults.txt', 'a') as file: file.write(line + '\n') #hiddenstates = [None]*30 output = np.random.randint(dataset.vocab_size) letters = [output] for i in range(400): zerol = torch.zeros([1, 1, dataset.vocab_size]) one_hot_letter = torch.tensor(output).unsqueeze(-1).unsqueeze( -1).unsqueeze(-1) zerol.scatter_(2, one_hot_letter, 1) zerol = zerol.to(device) if i == 0: output, h = model.forward(zerol) output = int(torch.argmax(output, dim=2).detach().cpu()) letters.append(output) #hiddenstates[i] = h else: output, h = model.forward(zerol, h) output = int(torch.argmax(output, dim=2).detach().cpu()) letters.append(output) #hiddenstates[i % 30] = h print('Final generation: ', dataset.convert_to_string(letters)) line = ' '.join(('Accuracy:', str(acc_list), 'Loss', str(loss_list))) with open('PrideAndPrejudice2.txt', 'a') as file: file.write(line + '\n')
def train(config): # Initialize the device which to run the model on use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") #path to save the model path = "results/" # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # print("Data file:", dataset._data[0:5]) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset, config.lstm_num_hidden, config.lstm_num_layers, device) # model = torch.load("results/book_EN_grimms_fairy_tails_final_model.pt") # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) # Store Accuracy and losses: results = {'accuracy': [], 'loss': []} # Training: total_steps = 0 while total_steps <= config.train_steps: for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() optimizer.zero_grad() # Stacking and One-hot encoding: batch_inputs = torch.stack(batch_inputs, dim=1).to(device) batch_targets = torch.stack(batch_targets, dim=1).to(device) # print("Inputs and targets:", x_onehot.size(), batch_targets.size()) # forward inputs to the model: pred_targets, _ = model.forward( index_to_onehot(batch_inputs, dataset.vocab_size)) # print("pred_targets trans shape:", pred_targets.transpose(2,1).size()) loss = criterion(pred_targets.transpose(2, 1), batch_targets) #Backward pass loss.backward(retain_graph=True) optimizer.step() #Accuracy # argmax along the vocab dimension accuracy = (pred_targets.argmax( dim=2) == batch_targets).float().mean().item() #Update the accuracy and losses for visualization: results['accuracy'].append(accuracy) results['loss'].append(loss.item()) # Just for time measurement t2 = time.time() # examples_per_second = config.batch_size/float(t2-t1) total_steps += 1 if step % config.print_every == 0: # print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " # "Accuracy = {:.2f}, Loss = {:.3f}".format( # datetime.now().strftime("%Y-%m-%d %H:%M"), step, # config.train_steps, config.batch_size, examples_per_second, # accuracy, loss # )) print("[{}] Train Step {:07d}/{:07d}, Batch Size = {}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, total_steps, config.batch_size, results['accuracy'][-1], results['loss'][-1])) if step % config.sample_every == 0: # Generate some sentences by sampling from the model print('GENERATED NO TEMP:') print(model.generate_sentence(100)) print('__________________') print('GENERATED 0.5 TEMP:') print(model.generate_sentence(100, 0.5)) print('__________________') print('GENERATED 1 TEMP:') print(model.generate_sentence(100, 1)) print('__________________') print('GENERATED 2 TEMP:') print(model.generate_sentence(100, 2)) # save model for individual timesteps torch.save( model, path + config.txt_file.split('/')[1].split('.')[0] + str(step) + "_model.pt") if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') #Save the final model torch.save( model, path + config.txt_file.split('/')[1].split('.')[0] + "_final_model.pt") print("saving results in folder...") np.save(path + "loss_train", results['loss']) np.save(path + "accuracy_train", results['accuracy'])