def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use model = None if config.model_type == 'LSTM': model = LSTM( config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.device, ) elif config.model_type == 'RNN': model = VanillaRNN( config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.device, ) else: print('Your model type input is neither \'RNN\' or \'LSTM\'') return # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) for step, (batch_inputs, batch_targets) in enumerate(data_loader): batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) # Only for time measurement of step through network t1 = time.time() # Add more code here ... optimizer.zero_grad() output = model.forward(batch_inputs) loss = criterion.forward(output, batch_targets) loss.backward() ############################################################################ # QUESTION: what happens here and why? ############################################################################ ''' ANSWER: This function ‘clips’ the norm of the gradients by scaling the gradients down by the same amount in order to reduce the norm to an acceptable level. In practice this places a limit on the size of the parameter updates. ''' torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ # Add more code here ... optimizer.step() with torch.no_grad(): pred = torch.nn.functional.softmax(output, dim=0) pred = torch.max(pred, 1)[1] loss = loss # fixme accuracy = np.sum([ 1 if pred[i] == batch_targets[i] else 0 for i in range(len(pred)) ]) / len(batch_targets) # fixme # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 10 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') """ Test memory capacity """ dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, 1000, num_workers=1) batch_inputs, batch_targets = next(iter(data_loader)) batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) output = model.forward(batch_inputs) print('T:', config.input_length + 1) pred = torch.nn.functional.softmax(output, dim=0) pred = torch.max(pred, 1)[1] accuracy = np.sum( [1 if pred[i] == batch_targets[i] else 0 for i in range(len(pred))]) / len(batch_targets) print("Final accuracy:", accuracy)
def run(model_type, input_length, input_dim, num_classes, num_hidden, batch_size, learning_rate, train_steps, max_norm, device): assert model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(device) # Initialize the model that we are going to use model_pars = [ input_length, input_dim, num_hidden, num_classes, batch_size, device ] model = LSTM(*model_pars) \ if model_type == 'LSTM' \ else VanillaRNN(*model_pars) model.to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(input_length + 1) data_loader = DataLoader(dataset, batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Add more code here ... optimizer.zero_grad() ys = model.forward(batch_inputs) ############################################################################ # QUESTION: what happens here and why? ############################################################################ torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=max_norm) ############################################################################ # Add more code here ... predictions = ys.argmax(dim=-1) loss = criterion(ys, batch_targets) loss.backward() optimizer.step() accuracy = (batch_targets == predictions).float().mean() # Just for time measurement t2 = time.time() examples_per_second = batch_size / float(t2 - t1) stats = {'loss': loss, 'accuracy': accuracy} if step % 10 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, train_steps, batch_size, examples_per_second, accuracy, loss)) if step == train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') return (accuracy.item())
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") def acc(predictions, targets): accuracy = (predictions.argmax(dim=1) == targets).float().mean().item() return accuracy # Initialize the dataset and data loader (note the +1 dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() lstm = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes) rnn = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, device) optimizer_lstm = torch.optim.RMSprop(lstm.parameters(), lr=config.learning_rate) optimizer_rnn = torch.optim.RMSprop(rnn.parameters(), lr=config.learning_rate) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network print("step", step) # Initialize the model that we are going to use lstm_out = lstm.forward(batch_inputs) optimizer_lstm.zero_grad() loss_lstm = criterion(lstm_out, batch_targets) loss_lstm.backward() optimizer_lstm.step() rnn_out = rnn.forward(batch_inputs) optimizer_rnn.zero_grad() loss_rnn = criterion(rnn_out, batch_targets) loss_rnn.backward() optimizer_rnn.step() lstm_norms = [] for h in lstm.all_h: lstm_norms.append(h.grad.norm().item()) rnn_norms = [] for h in rnn.all_h: rnn_norms.append(h.grad.norm().item()) sequence = list(range(1, config.input_length + 1)) plt.figure(figsize=(15, 6)) plt.plot(sequence, rnn_norms, label="rnn") plt.plot(sequence, lstm_norms, label="lstm") plt.legend() plt.xlabel("sequence value") plt.ylabel("gradient norm") plt.show() break print('Done training.')
class PalindromeExperiment(PytorchExperiment): def setup(self): self.save_checkpoint(name='setup') (model_type, input_length, input_dim, num_classes, num_hidden, batch_size, learning_rate, train_steps, max_norm, wanted_device) = itemgetter(*flags)(vars(self.config)) assert model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on # TODO: debug CUDA issues device = torch.device(wanted_device) # device = torch.device(device if torch.cuda.is_available() else 'cpu') # Initialize the model that we are going to use model_pars = [input_length, input_dim, num_hidden, num_classes, batch_size, device] self.model = LSTM(*model_pars) \ if model_type == 'LSTM' \ else VanillaRNN(*model_pars) self.model.to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(input_length+1) self.data_loader = DataLoader(dataset, batch_size, num_workers=1) # Setup the loss and optimizer self.criterion = torch.nn.CrossEntropyLoss() self.optimizer = torch.optim.RMSprop(self.model.parameters(), lr=learning_rate) # TODO: plot accuracy over input_length # TODO: increase learning_rate over input_length # TODO: compare result with RNN def train(self, epoch): (model_type, input_length, input_dim, num_classes, num_hidden, batch_size, learning_rate, train_steps, max_norm, device) = itemgetter(*flags)(vars(self.config)) with SummaryWriter('part1/train') as w: results = [] for step, (batch_inputs, batch_targets) in enumerate(self.data_loader): # Only for time measurement of step through network t1 = time.time() # Add more code here ... self.optimizer.zero_grad() # move to device inputs = torch.tensor(batch_inputs, dtype=torch.float).to(device) targets = torch.tensor(batch_targets, dtype=torch.long ).to(device) ys = self.model.forward(inputs) # clip the gradients so gradient explosion won't let us overshoot the minimum # https://www.quora.com/What-is-gradient-clipping-and-why-is-it-necessary torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=max_norm) # Add more code here ... predictions = ys.argmax(dim=-1) loss = self.criterion(ys, targets) loss.backward() self.optimizer.step() accuracy = (targets == predictions).float().mean() # Just for time measurement t2 = time.time() examples_per_second = batch_size/float(t2-t1) stats = {'loss':loss, 'accuracy':accuracy} results.append({'step': step, **{k:v.item() for k,v in stats.items()}}) if step % 100 == 0: w.add_scalars('metrics', stats, int(step/10)) # # TODO: check why this is slow! # for k, v in stats.items(): # self.add_result(value=v.item(), name=f'train_{k}', counter=step / train_steps, label=k) self.elog.print("elog [{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, train_steps, batch_size, examples_per_second, accuracy, loss )) self.save_checkpoint(name='train', n_iter=step) if step % 100 == 0: results = write_csv(results, self.config) if step == train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') results = write_csv(results, self.config) def validate(self, epoch): pass