예제 #1
0
def train(config):
    # Initialize the model that we are going to use
    model = VanillaRNN(config.input_length, config.input_dim,
                       config.num_hidden, config.num_classes,
                       config.batch_size)  # fixme

    # Initialize the dataset and data loader (leave the +1)
    dataset = PalindromeDataset(config.input_length + 1)

    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()  # fixme
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)  # fixme

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):
        # Add more code here ...
        hit = 0
        n, dim = batch_inputs.size()
        batch_inputs_T = torch.transpose(batch_inputs, 0, 1)
        # print(batch_inputs_T.size())
        y_hat_oh = model.forward(batch_inputs_T)
        for i in range(n):
            y_pre, _ = max(enumerate(y_hat_oh[i]), key=itemgetter(1))
            y = batch_targets[i].item()
            # print(y_pre, y)
            if y_pre == y:
                hit += 1
        # print("/////////")

        # the following line is to deal with exploding gradients
        torch.nn.utils.clip_grad_norm(model.parameters(),
                                      max_norm=config.max_norm)

        # Add more code here ...
        loss = criterion(y_hat_oh, batch_targets)  # fixme
        accuracy = hit / n * 100  # fixme

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % 10 == 0:
            print("loss: ", loss.item())
            print("accuracy: ", accuracy)

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
예제 #2
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the model that we are going to use

    # print(torch.nn.init.constant_(torch.empty(5), 0))
    # print(asdasda)
    # print(torch.nn.Parameter(torch.nn.init.normal_((torch.empty(5, 5)))))

    if (config.model_type == 'RNN'):
        model = VanillaRNN(config.input_length,
                           config.input_dim,
                           config.num_hidden,
                           config.num_classes,
                           config.batch_size,
                           device=device)
        # model = model.to(device)
    else:
        model = LSTM(config.input_length,
                     config.input_dim,
                     config.num_hidden,
                     config.num_classes,
                     config.batch_size,
                     device=device)
        # model = model.to(device)

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    # import pdb
    # pdb.set_trace()
    optimizer = optim.RMSprop(
        model.parameters(),
        lr=config.learning_rate)  #, weight_decay=1/(200*9))
    # optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) #, weight_decay=1/(200*9))
    # optimizer = torch.optim.SGD(model.parameters(), lr=config.learning_rate)

    accuracies = []
    losses = []

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Add more code here ...
        # print(batch_inputs.shape[1])
        # print(sadasd)
        ############################################################################
        # QUESTION: what happens here and why?
        # Clipping gradients helps prevent exploding gradients (hence clipping)
        # However it does nothing against vanishing gradients for RNN's
        # For vanishing gradients LSTMs are useful
        ############################################################################

        ############################################################################
        batch_inputs = batch_inputs.to(device)
        batch_targets = batch_targets.to(device)

        out = model.forward(batch_inputs)
        # Add more code here ...

        # print(out.argmax(dim=1).shape, batch_targets.shape)

        loss = criterion(out, batch_targets)
        optimizer.zero_grad()
        loss.backward()
        # if (config.model_type == 'RNN'):
        torch.nn.utils.clip_grad_norm(model.parameters(),
                                      max_norm=config.max_norm)
        optimizer.step()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % 100 == 0:
            compare = (out.argmax(dim=1) == batch_targets)
            summed = compare.sum().item()
            accuracy = summed / compare.size()[0]
            accuracies.append(accuracy)
            losses.append(loss)
            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss))

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    plt.plot(accuracies, label='accuracies')
    plt.plot(losses, label='losses')
    plt.tight_layout()
    plt.legend()
    plt.show()
    print('Done training.')
예제 #3
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    if torch.cuda.is_available():
        device = 'cuda'
    else:
        device = 'cpu'
    print('Currently using: ', device)
    # Initialize the model that we are going to use
    input_length = config.input_length
    input_dim = config.input_dim
    num_classes = config.num_classes
    num_hidden = config.num_hidden
    batch_size = config.batch_size
    learning_rate = config.learning_rate
    
    if config.model_type == 'RNN':
    
        model = VanillaRNN(input_length, input_dim, num_hidden, num_classes
                           , batch_size, device).double()
        
    if config.model_type == 'LSTM':
        model = LSTM(input_length, input_dim, num_hidden, num_classes, batch_size, device).double()
    
    
    
    model = model.to(device)
    
    
    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(inp_len+1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()  # fixme
    optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate)  # fixme
    accuracy_list = []
    loss_list = []

## first 100 steps are to generate the test set
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):
        # Only for time measurement of step through network
        t1 = time.time()

        batch_inputs = batch_inputs.to(device)
        batch_targets = batch_targets.to(device)

        output = model.forward(batch_inputs.transpose(0,1).double())

        optimizer.zero_grad()
        
        output_indices = torch.argmax(output.transpose(0,1), dim=0)
        loss_for_backward = criterion(output,batch_targets).to(device)
        loss_for_backward.backward()
        
        ############################################################################
        # QUESTION: what happens here and why?
        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm)
        ############################################################################

        #print(output.shape)
        #print(batch_targets.shape)
        
        optimizer.step()
        
        #loss = criterion.forward(output, batch_targets)
        
        correct_indices = output_indices == batch_targets
        
        
        
        
        #if step == 4000:
        #    return correct_indices, output_indices, batch_targets, batch_inputs
        accuracy = int(sum(correct_indices))/int(len(correct_indices))

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size/float(t2-t1)

        if step % 10 == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss_for_backward
            ))
            accuracy_list.append(accuracy)
            loss_list.append(loss_for_backward)

        if step == config.train_steps or (len(accuracy_list) > 10 and (sum(accuracy_list[-3:])
        /len(accuracy_list[-3:])) == 1.0):
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
    line = ' '.join((str(config.model_type),'Palindrome length:',str(input_length),'Accuracy:',str(accuracy_list),'Loss', str(loss_list)))
    with open('LSTMMMMM.txt', 'a') as file:
                          file.write(line + '\n')
예제 #4
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the model that we are going to use
    if config.model_type == "RNN":
        model = VanillaRNN(seq_length=config.input_length,
                           input_dim=config.input_dim,
                           num_hidden=config.num_hidden,
                           batch_size=config.batch_size,
                           num_classes=config.num_classes,
                           device=device)

    elif config.model_type == "LSTM":
        model = LSTM(seq_length=config.input_length,
                     input_dim=config.input_dim,
                     num_hidden=config.num_hidden,
                     num_classes=config.num_classes,
                     device=device,
                     batch_size=config.batch_size)
    # send model to device
    model.to(device)
    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)

    # track training statistics
    train_accuracies = []
    train_losses = []

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # batch inputs  to device for cuda
        batch_inputs = batch_inputs.to(device)
        batch_targets = batch_targets.to(device)

        # convert input batches to tensors on device
        ínput_sequences = torch.tensor(batch_inputs,
                                       dtype=torch.float,
                                       device=device)
        targets = torch.tensor(batch_targets, dtype=torch.long, device=device)

        #print(ínput_sequences)
        #print(targets)

        # Backward pass
        # reset gradients
        optimizer.zero_grad()

        # Forward pass
        # Debugging
        # predict classes for input batches
        # a = ínput_sequences[:, 0].unsqueeze(1)
        # print(ínput_sequences.size())
        # print(a.size())
        # break

        # predict input sequences
        predictions = model.forward(ínput_sequences)
        # accuracy
        accuracy = torch.div(
            torch.sum(targets == predictions.argmax(dim=1)).to(torch.float),
            config.batch_size)
        # print(accuracy)
        # backpropagate loss
        # compute loss per batch
        loss = criterion(predictions, targets)
        loss.backward()

        ############################################################################
        # QUESTION: what happens here and why?
        # --> # ANSWER: Gradients are reinforced at each layer. Thus, very large gradients can appear. This leads to
        #  learning problems. Cutting the gradients to a limit overcomes that issue.
        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(),
                                      max_norm=config.max_norm)
        ############################################################################
        # update weights according to optimizer
        optimizer.step()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)
        # save stats for each step
        train_accuracies.append(accuracy)
        train_losses.append(loss)

        if step % 10 == 0:

            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss))

            # If the last 50 accuracies are already 1 (avg=1), stop the training, as convergence is reached and unnecessary
            # computations dont have to be done
            avg_accuracies = np.sum(train_accuracies[-50:]) / 50
            print(avg_accuracies)
            if avg_accuracies == 1:
                print(
                    "\nTraining finished for length: {} after {} steps".format(
                        config.input_length, step))
                print("Avg Accuracy : {:.3f}".format(avg_accuracies))
                break

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')

    return max(train_accuracies), step
예제 #5
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    if config.device == 'cuda':
        if torch.cuda.is_available():
            device = torch.device(config.device)
        else:
            device = torch.device('cpu')
    else:
        device = torch.device(config.device)

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(seq_length=config.input_length,
                           input_dim=config.input_dim,
                           num_hidden=config.num_hidden,
                           num_classes=config.num_classes,
                           batch_size=config.batch_size,
                           device=device)
    elif config.model_type == 'LSTM':
        model = LSTM(seq_length=config.input_length,
                     input_dim=config.input_dim,
                     num_hidden=config.num_hidden,
                     num_classes=config.num_classes,
                     batch_size=config.batch_size,
                     device=device)

    # make the results directory (if it doesn't exist)
    RESULTS_DIR = Path.cwd() / 'results'
    RESULTS_DIR.mkdir(parents=True, exist_ok=True)
    results_filepath = RESULTS_DIR / (model.__class__.__name__ + '.csv')

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)

    results = {
        'T': [],
        'step': [],
        'accuracy': [],
        'loss': [],
    }

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Add more code here ...
        # send the data to device
        batch_inputs = batch_inputs.to(device)
        batch_targets = batch_targets.to(device)

        # (re)set the optimizer gradient to 0
        optimizer.zero_grad()

        # forward pass the mini-batch
        pred_targets = model.forward(batch_inputs)
        loss = criterion.forward(pred_targets, batch_targets)

        # backwards propogate the loss
        loss.backward()

        ############################################################################
        # QUESTION: what happens here and why?
        # clip_grad_norm is deprecated, use clip_grad_norm_ instead
        ############################################################################
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       max_norm=config.max_norm)
        ############################################################################

        # Add more code here ...
        optimizer.step()

        accuracy = (pred_targets.argmax(dim=1) == batch_targets).float().mean()

        # append the results
        results['T'].append(config.input_length)
        results['step'].append(step)
        results['accuracy'].append(accuracy.item())
        results['loss'].append(loss.item())

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % 1000 == 0:
            print(
                f'[{datetime.now().strftime("%Y-%m-%d %H:%M")}] Train Step {step:04d}/{config.train_steps:04d}, Batch Size = {config.batch_size}, Examples/Sec = {examples_per_second:.2f}, Accuracy = {accuracy:.2f}, Loss = {loss:.3f}'
            )

        if step == config.train_steps:
            results_df = df.from_dict(results)

            if not results_filepath.exists():
                results_df.to_csv(results_filepath,
                                  sep=';',
                                  mode='w',
                                  encoding='utf-8',
                                  index=False)
            else:
                results_df.to_csv(results_filepath,
                                  sep=';',
                                  mode='a',
                                  header=False,
                                  encoding='utf-8',
                                  index=False)
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
예제 #6
0
def train(config,n_run):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Train on T-1 first digits
    config.input_length = config.input_length - 1

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device)
    elif config.model_type == 'LSTM':
        model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device)


    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length+1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate)

    model.to(device)

    train_loss = []
    train_acc = []
    t_loss = []
    t_acc = []

    #Convergence condition
    eps = 1e-6

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Clear stored gradient
        model.zero_grad()

        # Only for time measurement of step through network
        t1 = time.time()

        # Add more code here ...

        #Convert inputs and labels into tensors
        x = torch.tensor(batch_inputs, device=device)
        y = torch.tensor(batch_targets,device=device)


        #Forward pass
        pred = model.forward(x)
        loss = criterion(pred, y)
        t_loss.append(loss.item())
        optimizer.zero_grad()

        #Backward pass
        loss.backward()

        ############################################################################
        # QUESTION: what happens here and why?

        # ANSWER : the function torch.nn.utils.clip_grad_norm() is used to prevent
        # exploding gradients by ‘clipping’ the norm of the gradients, to restrain
        # the gradient values to a certain threshold. This essentially acts as a
        # limit to the size of the updates of the parameters of every layer, ensuring
        # that the parameter values don't change too much from their previous values.

        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm)
        ############################################################################

        # Add more code here ...

        optimizer.step()
        accuracy = get_accuracy(pred,y, config.batch_size)
        t_acc.append(accuracy.item())

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size/float(t2-t1)

        if step % 1000 == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss
            ))

        if step % 100 == 0:
            #Get loss and accuracy averages over 100 steps
            train_loss.append(np.mean(t_loss))
            train_acc.append(np.mean(t_acc))
            t_loss = []
            t_acc = []

            if step > 0 and abs(train_loss[-1] - train_loss[-2]) < eps:
                break


        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break


    print('\nDone training.\n')
    #
    #Save trained model and results
    if config.model_type == 'RNN':
        #save model
        torch.save(model, "./Results/RNN/" + str(config.input_length) + "_RNN_model")
        #save train accuracy and loss
        np.save("./Results/RNN/" + str(config.input_length) + "_RNN_accuracy", train_acc)
        np.save("./Results/RNN/" + str(config.input_length) + "_RNN_loss", train_loss)

        # #save model ####################################################################### For SURFsara
        # torch.save(model, str(config.input_length+1) + "_RNN_model_" + str(n_run))
        # #save train accuracy and loss
        # np.save(str(config.input_length+1) + "_RNN_accuracy_" + str(n_run), train_acc)
        # np.save(str(config.input_length+1) + "_RNN_loss_" + str(n_run), train_loss)

    elif config.model_type == 'LSTM':
        #save model
        torch.save(model, "./Results/LSTM/" + str(config.input_length) + "_LSTM_model")
        #save train accuracy and loss
        np.save("./Results/LSTM/" + str(config.input_length) + "_LSTM_accuracy", train_acc)
        np.save("./Results/LSTM/" + str(config.input_length) + "_LSTM_loss", train_loss)
예제 #7
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    if config.model_type == 'RNN':
        model = VanillaRNN(config.input_length, config.input_dim,
                           config.num_hidden, config.num_classes,
                           config.batch_size, device)
    else:
        model = LSTM(config.input_length, config.input_dim, config.num_hidden,
                     config.num_classes, config.batch_size, device)

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        output = model.forward(batch_inputs)
        loss = criterion(output, batch_targets)

        optimizer.zero_grad()
        loss.backward()

        ############################################################################
        # QUESTION: It cuts off the gradient so we don't get exploding gradients
        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(),
                                      max_norm=config.max_norm)
        ############################################################################

        optimizer.step()
        loss = loss.item()
        accuracy = (torch.max(output, 1)[1] == batch_targets).float().mean()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % 10 == 0:

            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss))

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
예제 #8
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the model that we are going to use
    if (config.model_type == 'RNN'):
        model = VanillaRNN(config.input_length, config.input_dim,
                           config.num_hidden, config.num_classes,
                           config.batch_size, device)
    else:
        model = LSTM(config.input_length, config.input_dim, config.num_hidden,
                     config.num_classes, config.batch_size, device)

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.RMSprop(
        model.parameters(), lr=config.learning_rate
    )  #, alpha=0.99, eps=1e-8, weight_decay=0, momentum=0, centered=False)

    accuracies = []
    losses = []
    old_loss = float('inf')
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        batch_inputs = batch_inputs[
            ..., None]  # need to add this because input is a number
        batch_inputs = batch_inputs.to(device)
        batch_targets = batch_targets.to(device)

        batch_predictions = model.forward(batch_inputs)
        loss = criterion(batch_predictions, batch_targets)
        losses.append(loss.item())
        model.zero_grad()  #should we do this??
        loss.backward()

        torch.nn.utils.clip_grad_norm(
            model.parameters(),
            max_norm=config.max_norm)  #prevents maximum gradient problem

        optimizer.step()  #before or after clip_grad_norm?

        accuracy = accuracy_(batch_predictions, batch_targets)
        accuracies.append(accuracy)

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % 10 == 0:
            with open(config.save_logs, 'a') as file:
                file.write(
                    "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, Accuracy = {:.2f}, Loss = {:.3f}"
                    .format(datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                            config.train_steps, config.batch_size,
                            examples_per_second, accuracy, loss) + '\n')

        if step == config.train_steps or old_loss == loss.item(
        ):  # stop if two consecutive losses remain consistent
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break
        old_loss = loss.item()

    print('Done training.')
    return losses, accuracies
예제 #9
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(config.input_length, config.input_dim,
                           config.num_hidden, config.num_classes,
                           config.batch_size, device)  # fixme
    else:
        model = LSTM(config.input_length, config.input_dim, config.num_hidden,
                     config.num_classes, config.batch_size, device)
    print(model)
    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer

    criterion = torch.nn.CrossEntropyLoss()  # fixme
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    config.learning_rate)  # fixme
    optimizer.zero_grad()
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()
        # Add more code here ...
        model_outputs = model.forward(batch_inputs)

        ############################################################################
        # QUESTION: what happens here and why?
        # This function clips the norm of the gradient to an acceptable level.
        # It accually puts a limit of the update parameters.
        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(),
                                      max_norm=config.max_norm)
        ############################################################################

        # Add more code here ...
        loss = criterion(torch.t(model_outputs), batch_targets)  # fixme
        accuracy = accuracy_(model_outputs, batch_targets)  # fixme

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        #        writer.add_scalar('accuracy',accuracy,step)
        #        writer.add_scalar('loss',loss,step)

        #        if loss < 0.001:
        #            writer.add_scalar('loss',loss,10000)
        #            writer.add_scalar('accuracy',accuracy,10000)
        #            break

        if step % 10 == 0:

            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss))

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
예제 #10
0
def train(config):
    
    #print parameters
    print_config(config)
    
    config.model_type = config.model_type.lower()
    assert config.model_type in ('rnn', 'lstm', 'rrn')
    
    # Initialize the device which to run the model on
    wanted_device = config.device.lower()
    if wanted_device == 'cuda':
        #check if cuda is available
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    else:
        #cpu is the standard option
        device = torch.device('cpu')
        
    
    # Initialize the model that we are going to use    
    if config.model_type == 'rnn':
        model = VanillaRNN(seq_length = config.input_length,
                           input_dim = config.input_dim,
                           num_hidden = config.num_hidden,
                           num_classes = config.num_classes,
                           batch_size = config.batch_size,
                           device = device)
    elif config.model_type == 'lstm':
        model = LSTM(seq_length = config.input_length,
                       input_dim = config.input_dim,
                       num_hidden = config.num_hidden,
                       num_classes = config.num_classes,
                       batch_size = config.batch_size,
                       device = device)
    elif config.model_type == 'rrn':
        model = RRN(seq_length = config.input_length,
                       input_dim = config.input_dim,
                       num_hidden = config.num_hidden,
                       num_classes = config.num_classes,
                       batch_size = config.batch_size,
                       device = device)
        

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length+1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=0)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()

    optimizer = torch.optim.RMSprop(model.parameters(), 
                                        lr=config.learning_rate)
    
    #keep stats
    train_acc = np.zeros(config.train_steps+1)
    first_best_acc = 0
    acc_MA = 0
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        #batches to torch tensors
        x = torch.tensor(batch_inputs, dtype=torch.float, device=device)
        y_true = torch.tensor(batch_targets, dtype=torch.long, device=device)

        #Forward pass
        y_pred = model.forward(x)
        loss = criterion(y_pred, y_true)
        
        #Backward pass
        optimizer.zero_grad()
        loss.backward()
        
        ############################################################################
        # QUESTION: what happens here and why?
        # clip_grad_norm() is a method to avoid exploding gradients. It clips 
        # gradients above max_norm to max_norm.
        #Deprecated, use clip_grad_norm_() instead
        ############################################################################
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm)
        ############################################################################

        optimizer.step()
        
        train_acc[step] = accuracy(y_pred, y_true, config)        

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size/(float(t2-t1) + 1e-6)

        if step % config.print_every == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    train_acc[step], loss
            ))
            print(f"x: {x[0,:]}, y_pred: {y_pred[0,:].argmax()}, y_true: {y_true[0]}")
            
        acc_MA = train_acc[step-4:step+1].sum()/5
        if step == config.train_steps or acc_MA == 1.0:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
    #Save the final model
    torch.save(model, config.model_type + "_model.pt")
    np.save("train_acc_" + config.model_type + str(config.input_length), train_acc)
    
    if config.experiment:
        stats = {}
        stats["last acc"] = train_acc[-1]
        first_best_acc = np.argmax(train_acc)
        stats["best acc"] = train_acc[first_best_acc]
        stats["step best acc"] = first_best_acc
        stats["num steps"] = len(train_acc)
        stats["accs"] = train_acc
        return stats
예제 #11
0
        optimizer = torch.optim.RMSprop(model.parameters(), lr=lr, alpha=0.99,
                                        eps=1e-08, weight_decay=0, momentum=0,
                                        centered=False
                                        )
        print('start training')

        for step, tpl in enumerate(data_loader):
            (batch_inputs, batch_targets) = tpl
            # Only for time measurement of step through network
            t1 = time.time()
            # Add more code here ...

            #tensor_input = torch.Tensor(batch_inputs, dtype=torch.float, device=device)
            #tensor_targets = torch.Tensor(batch_targets, dtype=torch.long, device=device)
            batch_targets = batch_targets.to(device)
            output = model.forward(batch_inputs)
            loss = criterion(output, batch_targets)
            accuracy = acc(output, batch_targets)

            optimizer.zero_grad()
            loss.backward()

            ############################################################################
            # QUESTION: what happens here and why?
            # ANSWER:   It scales the gradient. With each layer backtracked the gradiend gets amplified.
            #           This can result in an exploding gradient.
            #           To avoid this, the gradient is clipped to the max_norm
            ############################################################################
            torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=max_norm)
            ############################################################################
예제 #12
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the model that we are going to use
    settings = [config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device]
    model = VanillaRNN(*settings) if config.model_type=='RNN' else LSTM(*settings)
    # print("model params:", list(model.parameters()))

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length+1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate)
    

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Add more code here ...

        ############################################################################
        # QUESTION: what happens here and why?
        # - Gradients are clipped according to the given threshold to prevent
        # exploding gradients
        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm)
        ############################################################################

        # Add more code here ...
        predictions = model.forward(batch_inputs)
        loss = criterion(predictions, batch_targets)
        accuracy = float((predictions.argmax(dim=1) == batch_targets.long()).sum())/float(batch_targets.shape[0])
        # print("acc", accuracy)
        loss.backward()
        optimizer.step()
        

        # Just for time measurement
        t2 = time.time()
        # examples_per_second = config.batch_size/float(t2-t1)
        examples_per_second = 0.0

        if step % 10 == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss.item()
            ))

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
예제 #13
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    tol = 0.

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(config.input_length, config.input_dim,
                           config.num_hidden, config.num_classes,
                           config.batch_size, device)
    else:
        model = LSTM(config.input_length, config.input_dim, config.num_hidden,
                     config.num_classes, config.batch_size, device)

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate)
    accuracies = [0, 1]
    losses = [0, 1]

    if config.quite:
        bar = tqdm(total=config.train_steps)
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):
        # Only for time measurement of step through network
        t1 = time.time()

        batch_inputs = batch_inputs[..., None]
        batch_inputs.to(device)
        batch_targets.to(device)

        # FORWARD, BACKWARD, AND STEP
        out = model.forward(batch_inputs)
        model.zero_grad()
        loss = criterion(out, batch_targets)
        loss.backward()

        ############################################################################
        # QUESTION: what happens here and why?
        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(),
                                      max_norm=config.max_norm)
        ############################################################################
        optimizer.step()

        # Add more code here ...
        accuracy = (out.argmax(dim=1) == batch_targets.long()).float().mean()
        losses.append(loss.item())
        accuracies.append(accuracy.item())

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % 10 == 0 and not config.quite:
            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracies[-1], losses[-1]))
        if config.quite:
            bar.update()
        if step == config.train_steps or np.isclose(losses[-1], losses[-2],
                                                    tol):
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break
    print('Done training.')
    return accuracies[2:], losses[2:]
예제 #14
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the model that we are going to use
    device = torch.device(config.device)  # fixme

    if config.model_type == 'RNN':
        model = VanillaRNN(seq_length=config.input_length,
                           input_dim=config.input_dim,
                           num_hidden=config.num_hidden,
                           num_classes=config.num_classes,
                           batch_size=config.batch_size,
                           device=config.device).to(config.device)  # fixme
    elif config.model_type == 'LSTM':
        model = LSTM(seq_length=config.input_length,
                     input_dim=config.input_dim,
                     num_hidden=config.num_hidden,
                     num_classes=config.num_classes,
                     batch_size=config.batch_size,
                     device=config.device).to(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()  # fixme
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)  # fixme

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Add more code here ...

        ############################################################################
        # QUESTION: what happens here and why?
        """This function is used to bound the norm of the gradient within certain threshold (specified by the max\_norm argument),
        this technique can help with the problem of exploding gradients and makes training stable
        (avoid too large steps when update parameters )."""
        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(),
                                      max_norm=config.max_norm)
        ############################################################################

        # Add more code here ...
        batch_targets = batch_targets.to(config.device)

        y_pred = model.forward(batch_inputs)
        loss = criterion(y_pred, batch_targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        accuracy = acc(y_pred, batch_targets)

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % 10 == 0:

            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss))

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    acc_test = []
    for i in range(10):
        (tr, te) = next(iter(data_loader))
        y_pred = model.forward(tr)
        acc_test.append(acc(y_pred, te))
    print('FINAL TEST ACCURACY: ', np.mean(acc_test))

    print('Done training.')
예제 #15
0
def train(config, device="cpu"):
    assert config.model_type in ('RNN', 'LSTM')

    # Tensorboard summary writer
    run_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S_" +
                                     config.model_type.lower() + '_' +
                                     str(config.input_length))
    log_dir = 'tensorboard/' + config.model_type.lower() + '/' + run_id
    writer = SummaryWriter(log_dir=log_dir)

    # Torch settings
    if device == 'cpu':
        torch.set_default_tensor_type(torch.FloatTensor)
    elif device == 'cuda:0':
        torch.set_default_tensor_type(torch.cuda.FloatTensor)
    dtype = torch.float

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(config.input_length,
                           config.input_dim,
                           config.num_hidden,
                           config.num_classes,
                           config.batch_size,
                           device=device).to(device)
    elif config.model_type == 'LSTM':
        model = LSTM(config.input_length,
                     config.input_dim,
                     config.num_hidden,
                     config.num_classes,
                     config.batch_size,
                     device=device).to(device)

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)

    # Accuracy and loss to be saved
    accuracies = []
    losses = []

    # Useful for convergence check
    avg_range = 200
    last_accuracy = 0
    convergence_threshold = 1e-4

    model.train()
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Load batches in the GPU
        batch_inputs = batch_inputs.to(device=device)
        batch_targets = batch_targets.to(device=device)

        # Forward pass
        predictions = model.forward(batch_inputs)

        # Compute loss
        loss = criterion(predictions, batch_targets)

        # Reset gradients before backwards pass
        optimizer.zero_grad()

        # Backward pass
        loss.backward()

        # Clipping gradients to avoid exploding gradient problem
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       max_norm=config.max_norm)

        # Update weights
        optimizer.step()

        # Compute accuracy
        accuracy = get_accuracy(predictions, batch_targets)

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        # Add accuracy and loss to the writer
        writer.add_scalars('accuracy_and_loss', {
            'acc': accuracy,
            'loss': loss
        }, step)

        # Store accuracy and loss
        accuracies.append(accuracy)
        losses.append(loss)

        # Print information
        if step % 100 == 0:
            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss))

        # Check for convergence
        if step % avg_range == 0 and step != 0:
            avg_accuracy = np.mean(accuracies[-avg_range:])
            if np.abs(avg_accuracy - last_accuracy) < convergence_threshold:
                print(
                    "The model has converged with accuracy", avg_accuracy,
                    "(" + ("+" if avg_accuracy > last_accuracy else "-") +
                    str(np.abs(avg_accuracy - last_accuracy)) + ")")
                break
            last_accuracy = avg_accuracy

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    save_results(accuracies, losses, run_id, config.model_type,
                 config.input_length, last_accuracy)
    writer.close()
    print('Done training. Accuracy:', avg_accuracy)
예제 #16
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    if config.device == 'best':
        config.device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
    device = torch.device(config.device)

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(config.embed_dim, config.num_hidden, \
            config.num_classes, device)
    else:
        model = LSTM(config.embed_dim, config.num_hidden, \
            config.num_classes, device)

    # Initialize the dataset and data loader
    dataset = PalindromeDataset(config.input_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = F.cross_entropy
    optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate)

    # Track metrics 
    losses = []
    losses_last10 = []
    accuracies = []
    accuracies_last10 = []

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):
        # Transform input to RNN input format (sequence, batch, input)
        # batch_inputs = batch_inputs.t().unsqueeze(2).to(device=device, dtype=torch.long)
        batch_inputs = batch_inputs.t().to(device=device, dtype=torch.long)
        batch_targets = batch_targets.to(device=device, dtype=torch.long)

        # Only for time measurement of step through network
        t1 = time.time()

        # forward pass
        logits = model.forward(batch_inputs)

        # backprop
        optimizer.zero_grad()
        loss = criterion(logits, batch_targets)
        loss.backward()

        ############################################################################
        # QUESTION: what happens here and why?
        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm)
        ############################################################################

        optimizer.step()

        # Compute metrics
        accuracy = (logits.cpu().argmax(dim=1) == batch_targets.cpu()).numpy().mean()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size/float(t2-t1)

        # track metrics
        accuracies_last10.append(accuracy.tolist())
        losses_last10.append(loss.tolist())

        if step % 10 == 0:

            message = "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss)
            print(message)
            if config.log_path != "":
                with open(config.log_path, "a") as f:
                    f.write(message + "\n")
            accuracies.append(np.mean(accuracies_last10))
            losses.append(np.mean(losses_last10))
            accuracies_last10 = []
            losses_last10 = []

        # Early stopping criterion: average accuracy over last 1000 iters was lower than the 1000 before that
        stopping_criterion =  len(accuracies) > 200 and \
            np.mean(accuracies[-100:]) <= np.mean(accuracies[-200:-100])

        if step == config.train_steps or stopping_criterion:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            print('Done training.')
            return losses, accuracies
예제 #17
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(seq_length=config.input_length,
                           input_dim=config.input_dim,
                           num_hidden=config.num_hidden,
                           num_classes=config.num_classes,
                           batch_size=config.batch_size,
                           device=device)
    elif config.model_type == 'LSTM':
        model = LSTM(seq_length=config.input_length,
                     input_dim=config.input_dim,
                     num_hidden=config.num_hidden,
                     num_classes=config.num_classes,
                     batch_size=config.batch_size,
                     device=device)

    model.to(device)
    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(params=model.parameters(),
                                    lr=config.learning_rate)

    # evaluation metrics
    results = []

    print_setting(config)

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()
        batch_inputs = batch_inputs.to(device)
        batch_targets = batch_targets.to(device)

        s_inputs = batch_inputs.shape
        s_targets = batch_targets.shape

        #forward pass
        predictions = model.forward(batch_inputs)

        #compute loss
        loss = criterion(predictions, batch_targets)

        #backward pass & updates
        # set gradients to zero
        optimizer.zero_grad()
        loss.backward()
        ############################################################################
        # QUESTION: what happens here and why?
        # Prevents exploding gradients by rescaling to a limit specified by config.max_norm
        # Forcing gradients to be within a certain norm to ensure reasonable updates
        ############################################################################
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       max_norm=config.max_norm)
        ############################################################################

        optimizer.step()

        accuracy = (predictions.argmax(dim=1)
                    == batch_targets).sum().float() / (config.batch_size)

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % config.eval_freq == 0:

            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss))

            #l = loss.float().item()
            results.append([step, accuracy.item(), loss.float().item()])

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training. \n')

    return results
예제 #18
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the model that we are going to use
    if config.model_type == 'RNN':
        model = VanillaRNN(config.input_length, config.input_dim,
                           config.num_hidden, config.num_classes,
                           config.batch_size, device).to(device)
    else:
        model = LSTM(config.input_length, config.input_dim, config.num_hidden,
                     config.num_classes, config.batch_size, device).to(device)

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss().to(device)  # fixme
    optimizer = optim.RMSprop(model.parameters(),
                              lr=config.learning_rate)  # fixme

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Reshape the tensors to their correct shape, cast to a device
        # (cpu/gpu) and compute the model's output.
        batch_inputs = batch_inputs.unsqueeze(-1).to(device)
        batch_targets = batch_targets.to(device)
        output = model.forward(batch_inputs)

        # Compute the loss and the gradients.
        loss = criterion(output, batch_targets)

        loss.backward()

        ############################################################################
        # QUESTION: what happens here and why?
        ############################################################################
        torch.nn.utils.clip_grad_norm(model.parameters(),
                                      max_norm=config.max_norm)
        ############################################################################

        # Add more code here ...
        optimizer.step()
        optimizer.zero_grad()

        loss = loss.item()  # fixme
        accuracy = (
            output.argmax(1) == batch_targets).float().mean().item()  # fixme

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % 10 == 0:

            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss))

        if step == config.train_steps or f"{loss:.3f}" == "0.000":
            with open("results.csv", 'a') as file:
                file.write(
                    f"{config.input_length};{accuracy};{config.model_type}\n")

            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')