def train(config): # Initialize the model that we are going to use model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size) # fixme # Initialize the dataset and data loader (leave the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() # fixme optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) # fixme for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Add more code here ... hit = 0 n, dim = batch_inputs.size() batch_inputs_T = torch.transpose(batch_inputs, 0, 1) # print(batch_inputs_T.size()) y_hat_oh = model.forward(batch_inputs_T) for i in range(n): y_pre, _ = max(enumerate(y_hat_oh[i]), key=itemgetter(1)) y = batch_targets[i].item() # print(y_pre, y) if y_pre == y: hit += 1 # print("/////////") # the following line is to deal with exploding gradients torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) # Add more code here ... loss = criterion(y_hat_oh, batch_targets) # fixme accuracy = hit / n * 100 # fixme optimizer.zero_grad() loss.backward() optimizer.step() if step % 10 == 0: print("loss: ", loss.item()) print("accuracy: ", accuracy) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def train(config): print('Vanilla RNN is WORKING...') model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size) dataset = PalindromeDataset(config.input_length+1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), config.learning_rate, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False) #optimizer = torch.optim.SGD(model.parameters(), config.learning_rate) # model.train() for step, (batch_inputs, batch_targets) in enumerate(data_loader): # step: epoch # Add more code here ... # the following line is to deal with exploding gradients torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) optimizer.zero_grad() batch_inputs = batch_inputs.unsqueeze(0) output= model(batch_inputs)[0] loss = criterion(output, batch_targets) loss.backward() optimizer.step() _, pred = torch.max(output, 1) all = len(pred) correct = 0 for i in range(len(pred)): if batch_targets[i] == pred[i]: correct += 1 # Add more code here ... accuracy = correct/all if step % 25 == 0: plot_step.append(step) plot_loss.append(loss) plot_accuracy.append(accuracy*100) # print acuracy/loss here if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break fig1 = plt.subplot(2,1,1) fig2 = plt.subplot(2,1,2) fig1.plot(plot_step, plot_accuracy, c='red', label='accuracy') fig1.legend() fig2.plot(plot_step, plot_loss, c='green', label='loss') fig2.legend() plt.show() print('Done training.')
def train(config, input_length): # Initialize the model that we are going to use model = VanillaRNN(input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size) # fixme device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.to(device) # Initialize the dataset and data loader (leave the +1) dataset = PalindromeDataset(input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() # fixme optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate) # fixme losses = [] accuracies = [] loss = 0.0 for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Add more code here ... optimizer.zero_grad() batch_inputs, batch_targets = batch_inputs.to( device), batch_targets.to(device) outputs = model(batch_inputs) loss = criterion(outputs, batch_targets) loss.backward() optimizer.step() # the following line is to deal with exploding gradients torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) # Add more code here ... loss += loss.item() # fixme accu = 0.0 # fixme if step % 10 == 0: # print acuracy/loss here print('[step: %5d] loss: %.4f' % (step, loss / 10)) losses.append(loss / 10) loss = 0.0 accu = accuracy(outputs, batch_targets) accuracies.append(accu) print('Accuracy on training dataset: %.3f %%' % (accu)) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') return model, losses, accuracies
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.input_length, config.input_dim, \ config.num_hidden, config.num_classes, \ config.batch_size, device=config.device) elif config.model_type == 'LSTM': model = LSTM(config.input_length, config.input_dim, \ config.num_hidden, config.num_classes, \ config.batch_size, device=config.device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) results = 'palindrome length:' + str(config.input_length + 1) + '\n' for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Add more code here ... batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) optimizer.zero_grad() nn_out = model(batch_inputs) loss = criterion(nn_out, batch_targets) loss.backward() ######################################################################## # QUESTION: what happens here and why? ######################################################################## nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) ######################################################################## optimizer.step() accuracy = torch.sum(nn_out.argmax(dim=1) == batch_targets)\ .to(torch.float) / (config.batch_size) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 10 == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {},\ Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) results += str(accuracy.item()) + ", " if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') with open('results/' + str(config.model_type) \ + str(config.input_length + 1) \ + '.txt', 'w') as f: f.write(results)
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use # print(torch.nn.init.constant_(torch.empty(5), 0)) # print(asdasda) # print(torch.nn.Parameter(torch.nn.init.normal_((torch.empty(5, 5))))) if (config.model_type == 'RNN'): model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device) # model = model.to(device) else: model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device) # model = model.to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() # import pdb # pdb.set_trace() optimizer = optim.RMSprop( model.parameters(), lr=config.learning_rate) #, weight_decay=1/(200*9)) # optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) #, weight_decay=1/(200*9)) # optimizer = torch.optim.SGD(model.parameters(), lr=config.learning_rate) accuracies = [] losses = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Add more code here ... # print(batch_inputs.shape[1]) # print(sadasd) ############################################################################ # QUESTION: what happens here and why? # Clipping gradients helps prevent exploding gradients (hence clipping) # However it does nothing against vanishing gradients for RNN's # For vanishing gradients LSTMs are useful ############################################################################ ############################################################################ batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) out = model.forward(batch_inputs) # Add more code here ... # print(out.argmax(dim=1).shape, batch_targets.shape) loss = criterion(out, batch_targets) optimizer.zero_grad() loss.backward() # if (config.model_type == 'RNN'): torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 100 == 0: compare = (out.argmax(dim=1) == batch_targets) summed = compare.sum().item() accuracy = summed / compare.size()[0] accuracies.append(accuracy) losses.append(loss) print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break plt.plot(accuracies, label='accuracies') plt.plot(losses, label='losses') plt.tight_layout() plt.legend() plt.show() print('Done training.')
def train(config): # Initialize the device which to run the model on device = torch.device(config.device) # Setup the model that we are going to use print("Initializing Vanilla RNN model...") model = VanillaRNN( seq_length=config.input_length, input_dim=config.input_dim, num_hidden=config.num_hidden, num_classes=config.num_classes, batch_size=config.batch_size, device=device ) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length+1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer loss_function = torch.nn.NLLLoss() optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Move to GPU batch_inputs = batch_inputs.unsqueeze(-1) # add input dimensionality batch_inputs = batch_inputs.to(device) # [batch_size, seq_length, 1] batch_targets = batch_targets.to(device) # [batch_size] # Reset for next iteration model.zero_grad() # Forward pass log_probs = model(batch_inputs) # Compute the loss, gradients and update network parameters loss = loss_function(log_probs, batch_targets) loss.backward() ############################################################################ # QUESTION: what happens here and why? ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ optimizer.step() predictions = torch.argmax(log_probs, dim=1) correct = (predictions == batch_targets).sum().item() accuracy = correct / log_probs.size(0) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % 10 == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss )) # Check if training is finished if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def train(config): assert config.model_type in ('RNN', 'LSTM') # Setup the model that we are going to use if config.model_type == 'RNN': print("Initializing Vanilla RNN model...") model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size) else: print("Initializing LSTM model...") model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size) ########################################################################### # Implement code here. ########################################################################### # Load test data test_size = int(config.batch_size * config.train_steps / 3) x_test, y_test = get_batch(test_size, config.input_length) input_placeholder = tf.placeholder(tf.float32, shape=(config.input_length - 1, None, config.input_dim)) labels_placeholder = tf.placeholder(tf.int32, shape=(None, config.num_classes)) logits = model.compute_logits(input_placeholder) logits_test = model.compute_logits_test(input_placeholder, test_size) # Define the optimizer optimizer = tf.train.RMSPropOptimizer(config.learning_rate) ########################################################################### # QUESTION: what happens here and why? ########################################################################### dummy = model.compute_loss(logits, labels_placeholder) # ... implement me grads_and_vars = optimizer.compute_gradients(dummy) grads, variables = zip(*grads_and_vars) grads_clipped, _ = tf.clip_by_global_norm( grads, clip_norm=config.max_norm_gradient) apply_gradients_op = optimizer.apply_gradients( zip(grads_clipped, variables)) #, global_step=global_step) ############################################################################ accuracy = model.accuracy(logits_test, labels_placeholder) summary = tf.summary.merge_all() init = tf.global_variables_initializer() saver = tf.train.Saver() sess = tf.Session() summary_writer = tf.summary.FileWriter(config.summary_path, sess.graph) sess.run(init) ############################################################################ for train_step in range(config.train_steps): # Only for time measurement of step through network t1 = time.time() # Load palindromes x, y = get_batch(config.batch_size, config.input_length) feed_dict = { input_placeholder: x, labels_placeholder: y, } #_, loss_value, accuracy_value = sess.run([apply_gradients_op, dummy, accuracy], feed_dict=feed_dict) _, loss_value = sess.run([apply_gradients_op, dummy], feed_dict=feed_dict) # Only for time measurement of step through network t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # Print the training progress if train_step % config.print_every == 0: feed_dict = { input_placeholder: x_test, labels_placeholder: y_test, } accuracy_value = sess.run(accuracy, feed_dict=feed_dict) print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, " "Examples/Sec = {:.2f}, Accuracy = {:.2f}, Loss = {:.2f}". format(datetime.now().strftime("%Y-%m-%d %H:%M"), train_step, config.train_steps, config.batch_size, examples_per_second, accuracy_value, loss_value)) # Update the events file. summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer.add_summary(summary_str, train_step) summary_writer.flush()
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == "RNN": model = VanillaRNN(seq_length=config.input_length, input_dim=config.input_dim, num_hidden=config.num_hidden, batch_size=config.batch_size, num_classes=config.num_classes, device=device) elif config.model_type == "LSTM": model = LSTM(seq_length=config.input_length, input_dim=config.input_dim, num_hidden=config.num_hidden, num_classes=config.num_classes, device=device, batch_size=config.batch_size) # send model to device model.to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) # track training statistics train_accuracies = [] train_losses = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # batch inputs to device for cuda batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) # convert input batches to tensors on device ínput_sequences = torch.tensor(batch_inputs, dtype=torch.float, device=device) targets = torch.tensor(batch_targets, dtype=torch.long, device=device) #print(ínput_sequences) #print(targets) # Backward pass # reset gradients optimizer.zero_grad() # Forward pass # Debugging # predict classes for input batches # a = ínput_sequences[:, 0].unsqueeze(1) # print(ínput_sequences.size()) # print(a.size()) # break # predict input sequences predictions = model.forward(ínput_sequences) # accuracy accuracy = torch.div( torch.sum(targets == predictions.argmax(dim=1)).to(torch.float), config.batch_size) # print(accuracy) # backpropagate loss # compute loss per batch loss = criterion(predictions, targets) loss.backward() ############################################################################ # QUESTION: what happens here and why? # --> # ANSWER: Gradients are reinforced at each layer. Thus, very large gradients can appear. This leads to # learning problems. Cutting the gradients to a limit overcomes that issue. ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ # update weights according to optimizer optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # save stats for each step train_accuracies.append(accuracy) train_losses.append(loss) if step % 10 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) # If the last 50 accuracies are already 1 (avg=1), stop the training, as convergence is reached and unnecessary # computations dont have to be done avg_accuracies = np.sum(train_accuracies[-50:]) / 50 print(avg_accuracies) if avg_accuracies == 1: print( "\nTraining finished for length: {} after {} steps".format( config.input_length, step)) print("Avg Accuracy : {:.3f}".format(avg_accuracies)) break if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') return max(train_accuracies), step
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on if config.device == 'best': config.device = 'cuda:0' if torch.cuda.is_available() else 'cpu' device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.embed_dim, config.num_hidden, \ config.num_classes, device) else: model = LSTM(config.embed_dim, config.num_hidden, \ config.num_classes, device) # Initialize the dataset and data loader dataset = PalindromeDataset(config.input_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = F.cross_entropy optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) # Track metrics losses = [] losses_last10 = [] accuracies = [] accuracies_last10 = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Transform input to RNN input format (sequence, batch, input) # batch_inputs = batch_inputs.t().unsqueeze(2).to(device=device, dtype=torch.long) batch_inputs = batch_inputs.t().to(device=device, dtype=torch.long) batch_targets = batch_targets.to(device=device, dtype=torch.long) # Only for time measurement of step through network t1 = time.time() # forward pass logits = model.forward(batch_inputs) # backprop optimizer.zero_grad() loss = criterion(logits, batch_targets) loss.backward() ############################################################################ # QUESTION: what happens here and why? ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ optimizer.step() # Compute metrics accuracy = (logits.cpu().argmax(dim=1) == batch_targets.cpu()).numpy().mean() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) # track metrics accuracies_last10.append(accuracy.tolist()) losses_last10.append(loss.tolist()) if step % 10 == 0: message = "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss) print(message) if config.log_path != "": with open(config.log_path, "a") as f: f.write(message + "\n") accuracies.append(np.mean(accuracies_last10)) losses.append(np.mean(losses_last10)) accuracies_last10 = [] losses_last10 = [] # Early stopping criterion: average accuracy over last 1000 iters was lower than the 1000 before that stopping_criterion = len(accuracies) > 200 and \ np.mean(accuracies[-100:]) <= np.mean(accuracies[-200:-100]) if step == config.train_steps or stopping_criterion: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 print('Done training.') return losses, accuracies
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) # fixme else: model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) print(model) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() # fixme optimizer = torch.optim.RMSprop(model.parameters(), config.learning_rate) # fixme optimizer.zero_grad() for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Add more code here ... model_outputs = model.forward(batch_inputs) ############################################################################ # QUESTION: what happens here and why? # This function clips the norm of the gradient to an acceptable level. # It accually puts a limit of the update parameters. ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ # Add more code here ... loss = criterion(torch.t(model_outputs), batch_targets) # fixme accuracy = accuracy_(model_outputs, batch_targets) # fixme optimizer.zero_grad() loss.backward() optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # writer.add_scalar('accuracy',accuracy,step) # writer.add_scalar('loss',loss,step) # if loss < 0.001: # writer.add_scalar('loss',loss,10000) # writer.add_scalar('accuracy',accuracy,10000) # break if step % 10 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def train(config): # Reproducibility tf.set_random_seed(42) np.random.seed(42) assert config.model_type in ('RNN', 'LSTM') tf.reset_default_graph() # Setup the model that we are going to use if config.model_type == 'RNN': print("Initializing Vanilla RNN model...") model = VanillaRNN(config.input_length - 1, config.input_dim, config.num_hidden, config.num_classes, config.batch_size) else: print("Initializing LSTM model...") model = LSTM(config.input_length - 1, config.input_dim, config.num_hidden, config.num_classes, config.batch_size) ########################################################################### # Implement code here. ########################################################################### # Utility vars and ops gpu_opts = tf.GPUOptions(per_process_gpu_memory_fraction=0.99, allow_growth=True) session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_opts)) global_step = tf.Variable(0, trainable=False, name='global_step') # logging train_logdir = os.path.join(config.summary_path, '{}_train'.format(config.model_name)) train_log_writer = utils.init_summary_writer(session, train_logdir) # Define the optimizer if config.optimizer.lower() == 'rmsprop': optimizer = tf.train.RMSPropOptimizer(config.learning_rate) elif config.optimizer.lower() == 'adam': optimizer = tf.train.AdamOptimizer(config.learning_rate) ########################################################################### # QUESTION: what happens here and why? # Answer: Instead of calling optimizer.minimize(..) as usual, we compute the gradients, # and then clip each gradient value if they fall outside of a desirable range. # This avoid applying gradient updates that we either too large or too small, due # to the exploding/vanishing gradients problem. ########################################################################### grads_and_vars = optimizer.compute_gradients(model.loss_op) # [_gradient_summary(var, grad, 'raw_grad') for var, grad in grads_and_vars] grads, variables = zip(*grads_and_vars) grads_clipped, _ = tf.clip_by_global_norm( grads, clip_norm=config.max_norm_gradient) grads_and_vars = list(zip(grads_clipped, variables)) # [_gradient_summary(var, grad, 'clipped_grad') for var, grad in grads_and_vars] apply_gradients_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) ############################################################################ ########################################################################### # Implement code here. ########################################################################### # Initialize variables summary_op = tf.summary.merge_all() session.run(fetches=[ tf.global_variables_initializer(), tf.local_variables_initializer() ]) for train_step in range(config.train_steps): # Get data and convert to one-hot data = utils.generate_palindrome_batch(batch_size=config.batch_size, length=config.input_length) inputs, labels = data[:, :-1], data[:, -1] inputs = (np.arange(config.num_classes) == inputs[..., None]).astype(int) labels = (np.arange(config.num_classes) == labels[..., None]).astype(int) inputs = np.transpose(inputs, axes=(1, 0, 2)) # [time, batch_size, input_dim) # Only for time measurement of step through network t1 = time.time() train_feed = {model.inputs: inputs, model.labels: labels} fetches = [model.loss_op, model.accuracy_op, apply_gradients_op] if train_step % config.print_every == 0: fetches += [summary_op] loss, accuracy, _, summary = session.run(fetches=fetches, feed_dict=train_feed) train_log_writer.add_summary(summary, train_step) else: loss, accuracy, _ = session.run(fetches=fetches, feed_dict=train_feed) # Only for time measurement of step through network t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # Print the training progress if train_step % config.print_every == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, " "Examples/Sec = {:.2f}, Accuracy = {:.2f}%, Loss = {:.4f}". format(datetime.now().strftime("%Y-%m-%d %H:%M"), train_step, config.train_steps, config.batch_size, examples_per_second, accuracy * 100, loss)) train_log_writer.close()
def train(config): #print parameters print_config(config) config.model_type = config.model_type.lower() assert config.model_type in ('rnn', 'lstm', 'rrn') # Initialize the device which to run the model on wanted_device = config.device.lower() if wanted_device == 'cuda': #check if cuda is available device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') else: #cpu is the standard option device = torch.device('cpu') # Initialize the model that we are going to use if config.model_type == 'rnn': model = VanillaRNN(seq_length = config.input_length, input_dim = config.input_dim, num_hidden = config.num_hidden, num_classes = config.num_classes, batch_size = config.batch_size, device = device) elif config.model_type == 'lstm': model = LSTM(seq_length = config.input_length, input_dim = config.input_dim, num_hidden = config.num_hidden, num_classes = config.num_classes, batch_size = config.batch_size, device = device) elif config.model_type == 'rrn': model = RRN(seq_length = config.input_length, input_dim = config.input_dim, num_hidden = config.num_hidden, num_classes = config.num_classes, batch_size = config.batch_size, device = device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length+1) data_loader = DataLoader(dataset, config.batch_size, num_workers=0) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) #keep stats train_acc = np.zeros(config.train_steps+1) first_best_acc = 0 acc_MA = 0 for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() #batches to torch tensors x = torch.tensor(batch_inputs, dtype=torch.float, device=device) y_true = torch.tensor(batch_targets, dtype=torch.long, device=device) #Forward pass y_pred = model.forward(x) loss = criterion(y_pred, y_true) #Backward pass optimizer.zero_grad() loss.backward() ############################################################################ # QUESTION: what happens here and why? # clip_grad_norm() is a method to avoid exploding gradients. It clips # gradients above max_norm to max_norm. #Deprecated, use clip_grad_norm_() instead ############################################################################ torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) ############################################################################ optimizer.step() train_acc[step] = accuracy(y_pred, y_true, config) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/(float(t2-t1) + 1e-6) if step % config.print_every == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, train_acc[step], loss )) print(f"x: {x[0,:]}, y_pred: {y_pred[0,:].argmax()}, y_true: {y_true[0]}") acc_MA = train_acc[step-4:step+1].sum()/5 if step == config.train_steps or acc_MA == 1.0: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') #Save the final model torch.save(model, config.model_type + "_model.pt") np.save("train_acc_" + config.model_type + str(config.input_length), train_acc) if config.experiment: stats = {} stats["last acc"] = train_acc[-1] first_best_acc = np.argmax(train_acc) stats["best acc"] = train_acc[first_best_acc] stats["step best acc"] = first_best_acc stats["num steps"] = len(train_acc) stats["accs"] = train_acc return stats
lr = config.learning_rate train_steps = config.train_steps max_norm = config.max_norm device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(device) acc_list = [] loss_list = [] epoch_list = [] # Run experiment 5 times for significant results for _ in range(3): # Initialize the model that we are going to use if model_type == 'RNN': model = VanillaRNN(input_length, input_dim, num_hidden, num_classes, batch_size, device=device) model.to(device) elif model_type =='LSTM': model = LSTM(input_length, input_dim, num_hidden, num_classes, batch_size, device=device) model.to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(input_length+1) data_loader = DataLoader(dataset, batch_size, num_workers=0) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=lr, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False )
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use settings = [config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device] model = VanillaRNN(*settings) if config.model_type=='RNN' else LSTM(*settings) # print("model params:", list(model.parameters())) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length+1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Add more code here ... ############################################################################ # QUESTION: what happens here and why? # - Gradients are clipped according to the given threshold to prevent # exploding gradients ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ # Add more code here ... predictions = model.forward(batch_inputs) loss = criterion(predictions, batch_targets) accuracy = float((predictions.argmax(dim=1) == batch_targets.long()).sum())/float(batch_targets.shape[0]) # print("acc", accuracy) loss.backward() optimizer.step() # Just for time measurement t2 = time.time() # examples_per_second = config.batch_size/float(t2-t1) examples_per_second = 0.0 if step % 10 == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss.item() )) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def train(config): tf.set_random_seed(42) np.random.seed(42) assert config.model_type in ('RNN', 'LSTM') tf.reset_default_graph() # Setup the model that we are going to use if config.model_type == 'RNN': print("Initializing Vanilla RNN model...") model = VanillaRNN(config.input_length - 1, config.input_dim, config.num_hidden, config.num_classes, config.batch_size) else: print("Initializing LSTM model...") model = LSTM(config.input_length - 1, config.input_dim, config.num_hidden, config.num_classes, config.batch_size) ########################################################################### # Implement code here. ########################################################################### sess = tf.Session() #sess = tf_debug.LocalCLIDebugWrapperSession(sess) # Setup global step global_step = tf.Variable(0, trainable=False, name='global_step') # Define the optimizer assert config.optimizer in ('adam', 'rmsprop') if config.optimizer == "adam": optimizer = tf.train.AdamOptimizer(config.learning_rate) elif config.optimizer == "rmsprop": optimizer = tf.train.RMSPropOptimizer(config.learning_rate) # Define summary operation summary_op = tf.summary.merge_all() ########################################################################### # QUESTION: what happens here and why? # ANSWER: we calculate the gradients and clip each of them if the magnitude # is larger than config.max_norm_gradient to avoid unstable learning # due to exploding gradients in cliffs in the loss surface. ########################################################################### grads_and_vars = optimizer.compute_gradients(model._loss) grads, variables = zip(*grads_and_vars) grads_clipped, _ = tf.clip_by_global_norm( grads, clip_norm=config.max_norm_gradient) apply_gradients_op = optimizer.apply_gradients(zip(grads_clipped, variables), global_step=global_step) ############################################################################ init_op = tf.global_variables_initializer() local_init_op = tf.local_variables_initializer() sess.run(fetches=[init_op, local_init_op]) ########################################################################### # Implement code here. ########################################################################### train_log_path = os.path.join(config.summary_path, '{}'.format(config.name)) _check_path(train_log_path) train_log_writer = tf.summary.FileWriter(train_log_path, graph=sess.graph) palindrome_length = config.input_length for train_step in range(config.train_steps): # Only for time measurement of step through network t1 = time.time() palindrome_batch = utils.generate_palindrome_batch( config.batch_size, palindrome_length) x_dense = palindrome_batch[:, :-1] y_dense = palindrome_batch[:, -1] x = np.transpose( (np.arange(model._num_classes) == x_dense[..., None]).astype(int), [1, 0, 2]) y = (np.arange(model._num_classes) == y_dense[..., None]).astype(int) tr_feed = {model._inputs: x, model._targets: y} fetches = [apply_gradients_op, model._loss, model._accuracy] if train_step % config.print_every == 0: fetches += [summary_op] _, train_loss, train_accuracy, train_summary = sess.run( fetches=fetches, feed_dict=tr_feed) train_log_writer.add_summary(train_summary, train_step) else: _, train_loss, train_accuracy, = sess.run(fetches=fetches, feed_dict=tr_feed) # Only for time measurement of step through network t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # Print the training progress if train_step % config.print_every == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, " "Examples/Sec = {:.2f}, Accuracy = {:.3f}, Loss = {:.4f}". format(datetime.now().strftime("%Y-%m-%d %H:%M"), train_step, config.train_steps, config.batch_size, examples_per_second, train_accuracy, train_loss)) train_log_writer.close()
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, config.device) elif config.model_type == 'LSTM': model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, config.device) else: AssertionError('Models available: RNN, LSTM') # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), config.learning_rate) # keep track of variables accuracy_list = [] loss_list = [] # loop through data (get batches) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # get model predictions (USE THE PLAIN INTEGERS FROM THE PALINDROME TO INSERT INTO THE MODEL AS INPUT) predictions = model(batch_inputs) ############################################################################ # QUESTION: what happens here and why? # # it clips the gradient to a border value, to prevent exploding or vanishing # gradients. # ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ # calculate loss loss = criterion(predictions, batch_targets) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() targets = batch_targets.data.numpy() predictions_np = predictions.data.numpy() # get amount of correct predictions correct = 0 for i in range(len(targets)): if targets[i] == np.argmax(predictions_np[i]): correct += 1 # get accuracy accuracy = correct / len(targets) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 10 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) # keep track of losses loss_list.append(loss.data.numpy()) # keep track of accuracies accuracy_list.append(accuracy) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break # save variables data = [loss_list, accuracy_list] filename = 'Result_' + config.model_type + '_inputlen_' + str( config.input_length) + '.p' pickle.dump(data, open(filename, 'wb')) print('Done training.')
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) final_accuracy = [] seq_list = [] for i in range(30): input_length = config.input_length + i # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(input_length, config.input_dim, config.num_hidden, config.num_classes, device=device) elif config.model_type == 'LSTM': model = LSTM(input_length, config.input_dim, config.num_hidden, config.num_classes, device=device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer optimizer = optim.RMSprop(model.parameters(), config.learning_rate) criterion = nn.CrossEntropyLoss() accuracies = [] losses = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() optimizer.zero_grad() prediction = model(batch_inputs.to(device)) loss = criterion(prediction, batch_targets.to(device)) loss.backward() ############################################################################ # QUESTION: what happens here and why? # this function causes the gradient not to explode ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ optimizer.step() _, predicted = torch.max(prediction, 1) accuracy = (predicted == batch_targets.to(device) ).sum().item() / len(predicted) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) accuracies.append(accuracy * 100) losses.append(loss) if step % 10 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) if step % 100 == 0: # go on with training if the model predicted with 100% accuracy for the last 100 steps accuracy_average = sum(accuracies[-100:]) / 100 if accuracy_average == 100: break if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break final_accuracy.append(accuracy_average) seq_list.append(input_length) print('Done training.')
def train(input_length, print_log): # Initialize the model that we are going to use model = VanillaRNN(input_length, INPUT_DIM, NUM_HIDDEN, OUTPUT_DIM, NUM_BATCH_SIZE).cuda() # Initialize the dataset and data loader (leave the +1) dataset = PalindromeDataset(input_length + 1) data_loader = DataLoader(dataset, NUM_BATCH_SIZE, num_workers=1) # Setup the loss and optimizer cross_entropy = torch.nn.CrossEntropyLoss() # fixme optimizer = torch.optim.RMSprop(model.parameters(), lr=LEARNING_RATE) record_epochs, accs, losses = [], [], [] for step, train_data in enumerate(data_loader): X, y = train_data X, y = X.cuda().long(), y.cuda() y_pred = model(X) loss = cross_entropy(y_pred, y) optimizer.zero_grad() loss.backward() optimizer.step() # the following line is to deal with exploding gradients torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=MAX_NORM) if step % 10 == 0 and print_log: correct = 0 total = 0 with torch.no_grad(): for i, test_data in enumerate(data_loader): X, y = test_data X, y = X.cuda().long(), y.cuda() y_pred = model(X) total += y.size(0) _, predicted = torch.max(y_pred.data, 1) correct += (predicted == y).sum().item() if (i + 1) % TEST_SIZE == 0: break acc = round(correct / total, 4) avg_loss = round(loss.item(), 6) print('step: {}, loss: {}, test acc: {}'.format( step, avg_loss, acc)) record_epochs.append(step) accs.append(acc) losses.append(avg_loss) if step == TRAIN_STEPS: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break correct = 0 total = 0 with torch.no_grad(): for i, test_data in enumerate(data_loader): X, y = test_data X, y = X.cuda().long(), y.cuda() y_pred = model(X) total += y.size(0) _, predicted = torch.max(y_pred.data, 1) correct += (predicted == y).sum().item() if (i + 1) % TEST_SIZE == 0: break acc = round(correct / total, 4) print('Done training.') return record_epochs, accs, losses, acc
def train(config): # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.input_length + 1, config.input_dim, config.num_hidden, config.num_classes, device) elif config.model_type == 'LSTM': model = LSTM(config.input_length + 1, config.input_dim, config.num_hidden, config.num_classes, device) else: print("Unknown model type, please use RNN or LSTM") exit() model.store_hidden = True # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) accuracies = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) ############################################################################ # QUESTION: what happens here and why? ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ optimizer.zero_grad() outputs = model(batch_inputs) loss = criterion(outputs, batch_targets) loss.backward() loss = loss.data.item() optimizer.step() outputs = outputs.cpu().detach().numpy() acc = accuracy(outputs, batch_targets.cpu().detach().numpy()) accuracies.append(acc) grads = [ torch.norm(t.grad).cpu().detach() for t in model.hiddenActivity ] # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 10 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, acc, loss)) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') drawPlotMagn( grads, './' + str(config.model_type) + '_len:' + str(config.input_length) + '_lr:' + str(config.learning_rate) + '_grads_over_time.jpg', "Gradients over time steps with " + str(config.model_type), 1)
def train(config): assert config.model_type in ('RNN', 'LSTM') # Setup the model that we are going to use if config.model_type == 'RNN': print("Initializing Vanilla RNN model...") model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size) else: print("Initializing LSTM model...") model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size) ########################################################################### # Implement code here. ########################################################################### #Create placeholders with tf.name_scope('input'): inputs = tf.placeholder( tf.int32, shape=[config.batch_size, config.input_length - 1], name='inputs') labels = tf.placeholder(tf.int32, shape=[config.batch_size], name='labels') test_inputs = tf.placeholder( tf.int32, shape=[config.batch_size, config.input_length - 1], name='test_inputs') test_labels = tf.placeholder(tf.int32, shape=[config.batch_size], name='test_labels') #Compute the logits with tf.name_scope('logits'): logits = model.compute_logits(inputs) #Compute the loss with tf.name_scope('loss'): loss = model.compute_loss(logits, labels) tf.summary.scalar('loss', loss) # Define the optimizer optimizer = tf.train.RMSPropOptimizer(config.learning_rate) ########################################################################### # Implement code here. ########################################################################### ########################################################################### # QUESTION: what happens here and why? ->put threshold in order to avoid exploding gradients (gradient clipping) ########################################################################### global_step = tf.Variable(0, trainable=False, name='global_step') dummy = loss grads_and_vars = optimizer.compute_gradients(dummy) grads, variables = zip(*grads_and_vars) grads_clipped, _ = tf.clip_by_global_norm( grads, clip_norm=config.max_norm_gradient) apply_gradients_op = optimizer.apply_gradients(zip(grads_clipped, variables), global_step=global_step) #Compute the accuracy with tf.name_scope('accuracy'): with tf.name_scope('predictions'): predictions = model.compute_logits(test_inputs) with tf.name_scope('accuracy'): accuracy = model.accuracy(predictions, test_labels) tf.summary.scalar('accuracy', accuracy) merged = tf.summary.merge_all() test_writer = tf.summary.FileWriter(config.summary_path + '/test', graph=tf.get_default_graph()) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) ############################################################################ ########################################################################### # Implement code here. ########################################################################### for train_step in range(config.train_steps + 1): # Only for time measurement of step through network t1 = time.time() batch = utils.generate_palindrome_batch(config.batch_size, config.input_length) #Take the first T-1 digits as input batch_x = batch[:, 0:(config.input_length - 1)] #Take the last digit as the label, correct class batch_y = batch[:, -1] sess.run(apply_gradients_op, feed_dict={ inputs: batch_x, labels: batch_y }) # Only for time measurement of step through network t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # Print the training progress if train_step % config.print_every == 0: #Create batch to test batch_test = utils.generate_palindrome_batch( config.batch_size, config.input_length) batch_x_test = batch_test[:, 0:(config.input_length - 1)] batch_y_test = batch_test[:, -1] l, acc, summary = sess.run( [loss, accuracy, merged], feed_dict={ inputs: batch_x, labels: batch_y, test_inputs: batch_x_test, test_labels: batch_y_test }) test_writer.add_summary(summary, train_step) print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, " "Examples/Sec = {:.2f}, Accuracy = {}%, Loss = {}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), train_step, config.train_steps, config.batch_size, examples_per_second, acc, l)) test_writer.close() sess.close()
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' print('Currently using: ', device) # Initialize the model that we are going to use input_length = config.input_length input_dim = config.input_dim num_classes = config.num_classes num_hidden = config.num_hidden batch_size = config.batch_size learning_rate = config.learning_rate if config.model_type == 'RNN': model = VanillaRNN(input_length, input_dim, num_hidden, num_classes , batch_size, device).double() if config.model_type == 'LSTM': model = LSTM(input_length, input_dim, num_hidden, num_classes, batch_size, device).double() model = model.to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(inp_len+1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() # fixme optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate) # fixme accuracy_list = [] loss_list = [] ## first 100 steps are to generate the test set for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) output = model.forward(batch_inputs.transpose(0,1).double()) optimizer.zero_grad() output_indices = torch.argmax(output.transpose(0,1), dim=0) loss_for_backward = criterion(output,batch_targets).to(device) loss_for_backward.backward() ############################################################################ # QUESTION: what happens here and why? ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ #print(output.shape) #print(batch_targets.shape) optimizer.step() #loss = criterion.forward(output, batch_targets) correct_indices = output_indices == batch_targets #if step == 4000: # return correct_indices, output_indices, batch_targets, batch_inputs accuracy = int(sum(correct_indices))/int(len(correct_indices)) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % 10 == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss_for_backward )) accuracy_list.append(accuracy) loss_list.append(loss_for_backward) if step == config.train_steps or (len(accuracy_list) > 10 and (sum(accuracy_list[-3:]) /len(accuracy_list[-3:])) == 1.0): # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') line = ' '.join((str(config.model_type),'Palindrome length:',str(input_length),'Accuracy:',str(accuracy_list),'Loss', str(loss_list))) with open('LSTMMMMM.txt', 'a') as file: file.write(line + '\n')
def train(config): np.random.seed(42) torch.manual_seed(42) assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) print(device) # Initialize the model that we are going to use if config.model_type=="RNN": print("Training VanillaRNN") print() model = VanillaRNN(config.input_length, config.input_dim,\ config.num_hidden, config.num_classes, config.batch_size, config.device) # fixme else: print("Training LSTM") print() model = LSTM(config.input_length, config.input_dim,\ config.num_hidden, config.num_classes, config.batch_size, config.device) model = model.to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length+1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() #fixme if config.optimizer=="adam": optimizer = optim.Adam(model.parameters(), lr = config.learning_rate) # fixme else: optimizer = optim.RMSprop(model.parameters(), lr = config.learning_rate) pl_loss =[] average_loss =[] acc =[] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() batch_targets = torch.LongTensor(batch_targets) batch_inputs, batch_targets = batch_inputs.to(device), batch_targets.to(device) # zero the parameter gradients model.zero_grad() # Add more code here ... output = model(batch_inputs) out_loss = criterion(output, batch_targets) out_loss.backward() ############################################################################ # QUESTION: what happens here and why? # ANSWER: helps prevent the exploding gradient problem in RNNs / LSTMs. ############################################################################ torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) ############################################################################ optimizer.step() # Add more code here ... loss = out_loss.item() # fixme # get argmax softmax = torch.nn.Softmax(dim=1) predictions = torch.argmax(softmax(output), dim=1) predictions = config.batch_size-len(torch.nonzero(predictions - batch_targets)) accuracy = predictions/config.batch_size # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) pl_loss.append(loss) average_loss.append(np.mean(pl_loss[:-100:-1])) acc.append(accuracy) if step % 10 == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss )) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break # if step%100==0: # # save training loss # plt.plot(pl_loss,'r-', label="Batch loss", alpha=0.5) # plt.plot(average_loss,'g-', label="Average loss", alpha=0.5) # plt.legend() # plt.xlabel("Iterations") # plt.ylabel("Loss") # plt.title("Training Loss") # plt.grid(True) # # plt.show() # plt.savefig(config.optimizer+"_loss_"+config.model_type+"_"+str(config.input_length)+".png") # plt.close() ################################training################################################## # plt.plot(acc,'g-', alpha=0.5) # plt.xlabel("Iterations") # plt.ylabel("Accuracy") # plt.title("Train Accuracy") # plt.grid(True) # plt.savefig("accuracy_"+config.sampling+"_"+str(config.temp)+".png") # plt.close() # fl = config.optimizer+"_acc_"+config.model_type+"_"+str(config.input_length) # np.savez(fl, acc=acc) print('Done training.')
def train(config): assert config.model_type in ('RNN', 'LSTM') # Print all configs to confirm parameter settings print_flags() # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) else: model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) model.to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay, momentum=config.momentum) # Store some measures best_acc = 0. los = list() iteration = list() tmp_acc = list() acc = list() for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() optimizer.zero_grad() batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) pred = model(batch_inputs) accuracy = compute_accuracy(pred, batch_targets) tmp_acc.append(accuracy) loss = criterion(pred, batch_targets) loss.backward() ############################################################################ # QUESTION: what happens here and why? ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ optimizer.step() # Just for time measurement t2 = time.time() if not float(t2 - t1) == 0: examples_per_second = config.batch_size / float(t2 - t1) if step % 10 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) iteration.append(step) acc.append(accuracy) los.append(loss) if accuracy > best_acc: best_acc = accuracy if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') tmp_acc.sort(reverse=True) avg_acc = sum(tmp_acc[:50]) / 50 print('Average of 50 best accuracies: {}'.format(avg_acc)) with open('result/{}_acc.txt'.format(config.model_type), 'a') as file: file.write('{} {}\n'.format(config.input_length, avg_acc)) file.close() fig, axs = plt.subplots(1, 2, figsize=(10, 5)) axs[0].plot(iteration, acc) axs[0].set_xlabel('Iteration') axs[0].set_ylabel('Accuracy') axs[1].plot(iteration, los) axs[1].set_xlabel('Iteration') axs[1].set_ylabel('Loss') fig.tight_layout() plt.show()
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == "RNN": model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden,\ config.num_classes, device=device) elif config.model_type == "LSTM": model = LSTM(config.input_length, config.input_dim, config.num_hidden,\ config.num_classes, device=device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) test_loader = iter(DataLoader(dataset, config.test_size, num_workers=1)) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) results = open(config.out_file, "w+") results.write( "#model_type : {}\n#input_length : {}\n#input_dim : {}\n#num_classes : {}\n#num_hidden : {}\n#batch_size : {}\n#learn_rate : {}\n#train_steps : {}\n#max_norm : {}\n" .format(config.model_type, config.input_length, config.input_dim, config.num_classes, config.num_hidden, config.batch_size, config.learning_rate, config.train_steps, config.max_norm)) results.write("#train_step accuracy loss\n") for step, (batch_inputs, batch_targets) in enumerate(data_loader): batch_inputs = torch.nn.functional.one_hot( batch_inputs.type(torch.LongTensor)).type( torch.FloatTensor).to(device) batch_targets = batch_targets.to(device) # Only for time measurement of step through network t1 = time.time() optimizer.zero_grad() # #for calculating gradients # for timestep in range(config.input_length): # model.zero_grad() # batch_y, hGrad = model(batch_inputs, timestep) #without softmax # #prevent gradients from exploding # torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) # loss = criterion(batch_y, batch_targets) # loss.backward() # results.write("{} {}\n".format(timestep,hGrad.grad.norm())) # print("Done calculating gradients.") # results.close() # return batch_y = model(batch_inputs) #without softmax #prevent gradients from exploding torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) loss = criterion(batch_y, batch_targets) loss.backward() optimizer.step() if step > 0 else 0 #to be able to test initial model # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.eval_freq == 0: # predictions = torch.argmax(torch.abs(batch_y),1) #training: smaller batch size than test # accuracy = torch.sum(predictions == batch_targets).type(torch.FloatTensor)/config.batch_size with torch.no_grad(): test_inputs, test_targets = next(test_loader) test_inputs = torch.nn.functional.one_hot( test_inputs.type(torch.LongTensor), config.input_dim).type(torch.FloatTensor).to(device) test_targets = test_targets.to(device) test_y = model(test_inputs) test_loss = criterion(test_y, test_targets) test_predictions = torch.argmax(test_y, 1) test_accuracy = torch.sum( test_predictions == test_targets).type( torch.FloatTensor) / config.test_size # #uncomment for printing # print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " # "Accuracy = {:.2f}, Loss = {:.3f}".format( # datetime.now().strftime("%Y-%m-%d %H:%M"), step, # config.train_steps, config.batch_size, examples_per_second, # test_accuracy, test_loss)) results.write("%d %.3f %.3f\n" % (step, test_accuracy, test_loss)) optimizer.step() if step == 0 else 0 if np.round(test_accuracy, 2) == 1.00: print("Achieved >99.95% accuracy.") break if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') results.close()
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on # if GPU was chosen, check if CUDA is available if str(config.device) != "cpu": if not torch.cuda.is_available(): print('\n* GPU was selected but CUDA is not available.\nTraining on CPU ...') device = torch.device("cpu") else: print('\nCUDA is available! Training on GPU ...') device = torch.device(config.device) else: print('\nTraining on GPU ...') device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) else: model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) # Print Configuration print("Model Type: {!s:5} Input Length: {!s:5} Learning Rate: {}\n" .format(config.model_type, config.input_length, config.learning_rate)) # Initialize model model = torch.nn.DataParallel(model).to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length+1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) train_loss, train_accuracy, train_steps = [], [], [] # Enable train mode model.train() for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # move tensors to GPU, if enabled batch_targets = batch_targets.long().to(device) batch_inputs = batch_inputs.to(device) # Forward pass predictions = model(batch_inputs) # Calculate loss loss = criterion(predictions, batch_targets) # Back-propagate loss.backward() ############################################################################ # QUESTION: what happens here and why? # ANSWER: `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. # ref: https://medium.com/usf-msds/deep-learning-best-practices-1-weight-initialization-14e5c0295b94 ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ # Update weights optimizer.step() # Clear weights gradients optimizer.zero_grad() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % 10 == 0: # Store accuracy and loss train_steps.append(step) train_loss.append(loss.item()) train_accuracy.append(accuracy(predictions, batch_targets)) if step % 100 == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, train_accuracy[-1], train_loss[-1])) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 # Save Train and Test accuracies and losses file_name = str(config.model_type) + '_' + str(config.input_length) + '.npz' np.savez(file_name, train_steps=train_steps, train_accuracy=train_accuracy, model_type=config.model_type, input_length=config.input_length) break print('Done training.')
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) if config.model_type == 'RNN': model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) else: model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() output = model.forward(batch_inputs) loss = criterion(output, batch_targets) optimizer.zero_grad() loss.backward() ############################################################################ # QUESTION: It cuts off the gradient so we don't get exploding gradients ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ optimizer.step() loss = loss.item() accuracy = (torch.max(output, 1)[1] == batch_targets).float().mean() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 10 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def train(config, device="cpu"): assert config.model_type in ('RNN', 'LSTM') # Tensorboard summary writer run_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S_" + config.model_type.lower() + '_' + str(config.input_length)) log_dir = 'tensorboard/' + config.model_type.lower() + '/' + run_id writer = SummaryWriter(log_dir=log_dir) # Torch settings if device == 'cpu': torch.set_default_tensor_type(torch.FloatTensor) elif device == 'cuda:0': torch.set_default_tensor_type(torch.cuda.FloatTensor) dtype = torch.float # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device).to(device) elif config.model_type == 'LSTM': model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device).to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) # Accuracy and loss to be saved accuracies = [] losses = [] # Useful for convergence check avg_range = 200 last_accuracy = 0 convergence_threshold = 1e-4 model.train() for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Load batches in the GPU batch_inputs = batch_inputs.to(device=device) batch_targets = batch_targets.to(device=device) # Forward pass predictions = model.forward(batch_inputs) # Compute loss loss = criterion(predictions, batch_targets) # Reset gradients before backwards pass optimizer.zero_grad() # Backward pass loss.backward() # Clipping gradients to avoid exploding gradient problem torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) # Update weights optimizer.step() # Compute accuracy accuracy = get_accuracy(predictions, batch_targets) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # Add accuracy and loss to the writer writer.add_scalars('accuracy_and_loss', { 'acc': accuracy, 'loss': loss }, step) # Store accuracy and loss accuracies.append(accuracy) losses.append(loss) # Print information if step % 100 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) # Check for convergence if step % avg_range == 0 and step != 0: avg_accuracy = np.mean(accuracies[-avg_range:]) if np.abs(avg_accuracy - last_accuracy) < convergence_threshold: print( "The model has converged with accuracy", avg_accuracy, "(" + ("+" if avg_accuracy > last_accuracy else "-") + str(np.abs(avg_accuracy - last_accuracy)) + ")") break last_accuracy = avg_accuracy if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break save_results(accuracies, losses, run_id, config.model_type, config.input_length, last_accuracy) writer.close() print('Done training. Accuracy:', avg_accuracy)
def train(config,n_run): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Train on T-1 first digits config.input_length = config.input_length - 1 # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device) elif config.model_type == 'LSTM': model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length+1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) model.to(device) train_loss = [] train_acc = [] t_loss = [] t_acc = [] #Convergence condition eps = 1e-6 for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Clear stored gradient model.zero_grad() # Only for time measurement of step through network t1 = time.time() # Add more code here ... #Convert inputs and labels into tensors x = torch.tensor(batch_inputs, device=device) y = torch.tensor(batch_targets,device=device) #Forward pass pred = model.forward(x) loss = criterion(pred, y) t_loss.append(loss.item()) optimizer.zero_grad() #Backward pass loss.backward() ############################################################################ # QUESTION: what happens here and why? # ANSWER : the function torch.nn.utils.clip_grad_norm() is used to prevent # exploding gradients by ‘clipping’ the norm of the gradients, to restrain # the gradient values to a certain threshold. This essentially acts as a # limit to the size of the updates of the parameters of every layer, ensuring # that the parameter values don't change too much from their previous values. ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ # Add more code here ... optimizer.step() accuracy = get_accuracy(pred,y, config.batch_size) t_acc.append(accuracy.item()) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % 1000 == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss )) if step % 100 == 0: #Get loss and accuracy averages over 100 steps train_loss.append(np.mean(t_loss)) train_acc.append(np.mean(t_acc)) t_loss = [] t_acc = [] if step > 0 and abs(train_loss[-1] - train_loss[-2]) < eps: break if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('\nDone training.\n') # #Save trained model and results if config.model_type == 'RNN': #save model torch.save(model, "./Results/RNN/" + str(config.input_length) + "_RNN_model") #save train accuracy and loss np.save("./Results/RNN/" + str(config.input_length) + "_RNN_accuracy", train_acc) np.save("./Results/RNN/" + str(config.input_length) + "_RNN_loss", train_loss) # #save model ####################################################################### For SURFsara # torch.save(model, str(config.input_length+1) + "_RNN_model_" + str(n_run)) # #save train accuracy and loss # np.save(str(config.input_length+1) + "_RNN_accuracy_" + str(n_run), train_acc) # np.save(str(config.input_length+1) + "_RNN_loss_" + str(n_run), train_loss) elif config.model_type == 'LSTM': #save model torch.save(model, "./Results/LSTM/" + str(config.input_length) + "_LSTM_model") #save train accuracy and loss np.save("./Results/LSTM/" + str(config.input_length) + "_LSTM_accuracy", train_acc) np.save("./Results/LSTM/" + str(config.input_length) + "_LSTM_loss", train_loss)
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(seq_length=config.input_length, input_dim=config.input_dim, num_hidden=config.num_hidden, num_classes=config.num_classes, batch_size=config.batch_size, device=device) elif config.model_type == 'LSTM': model = LSTM(seq_length=config.input_length, input_dim=config.input_dim, num_hidden=config.num_hidden, num_classes=config.num_classes, batch_size=config.batch_size, device=device) model.to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(params=model.parameters(), lr=config.learning_rate) # evaluation metrics results = [] print_setting(config) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) s_inputs = batch_inputs.shape s_targets = batch_targets.shape #forward pass predictions = model.forward(batch_inputs) #compute loss loss = criterion(predictions, batch_targets) #backward pass & updates # set gradients to zero optimizer.zero_grad() loss.backward() ############################################################################ # QUESTION: what happens here and why? # Prevents exploding gradients by rescaling to a limit specified by config.max_norm # Forcing gradients to be within a certain norm to ensure reasonable updates ############################################################################ torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) ############################################################################ optimizer.step() accuracy = (predictions.argmax(dim=1) == batch_targets).sum().float() / (config.batch_size) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.eval_freq == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) #l = loss.float().item() results.append([step, accuracy.item(), loss.float().item()]) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training. \n') return results
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on if config.device == 'cuda': if torch.cuda.is_available(): device = torch.device(config.device) else: device = torch.device('cpu') else: device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(seq_length=config.input_length, input_dim=config.input_dim, num_hidden=config.num_hidden, num_classes=config.num_classes, batch_size=config.batch_size, device=device) elif config.model_type == 'LSTM': model = LSTM(seq_length=config.input_length, input_dim=config.input_dim, num_hidden=config.num_hidden, num_classes=config.num_classes, batch_size=config.batch_size, device=device) # make the results directory (if it doesn't exist) RESULTS_DIR = Path.cwd() / 'results' RESULTS_DIR.mkdir(parents=True, exist_ok=True) results_filepath = RESULTS_DIR / (model.__class__.__name__ + '.csv') # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) results = { 'T': [], 'step': [], 'accuracy': [], 'loss': [], } for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Add more code here ... # send the data to device batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) # (re)set the optimizer gradient to 0 optimizer.zero_grad() # forward pass the mini-batch pred_targets = model.forward(batch_inputs) loss = criterion.forward(pred_targets, batch_targets) # backwards propogate the loss loss.backward() ############################################################################ # QUESTION: what happens here and why? # clip_grad_norm is deprecated, use clip_grad_norm_ instead ############################################################################ torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) ############################################################################ # Add more code here ... optimizer.step() accuracy = (pred_targets.argmax(dim=1) == batch_targets).float().mean() # append the results results['T'].append(config.input_length) results['step'].append(step) results['accuracy'].append(accuracy.item()) results['loss'].append(loss.item()) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 1000 == 0: print( f'[{datetime.now().strftime("%Y-%m-%d %H:%M")}] Train Step {step:04d}/{config.train_steps:04d}, Batch Size = {config.batch_size}, Examples/Sec = {examples_per_second:.2f}, Accuracy = {accuracy:.2f}, Loss = {loss:.3f}' ) if step == config.train_steps: results_df = df.from_dict(results) if not results_filepath.exists(): results_df.to_csv(results_filepath, sep=';', mode='w', encoding='utf-8', index=False) else: results_df.to_csv(results_filepath, sep=';', mode='a', header=False, encoding='utf-8', index=False) # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')