Exemplo n.º 1
0
def train():
    """
    Performs training and evaluation of MLP model.
  
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """
    
    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
        torch.cuda.manual_seed_all(42)
    
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    # print("Device", device)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
    else:
        dnn_hidden_units = []
    

    # DNN_HIDDEN_UNITS_DEFAULT = '100'
    # LEARNING_RATE_DEFAULT = 1e-3
    # MAX_STEPS_DEFAULT = 1400
    # BATCH_SIZE_DEFAULT = 200
    # EVAL_FREQ_DEFAULT = 100
    
    data = cifar10_utils.get_cifar10(data_dir=FLAGS.data_dir)
    train = data['train']
    print(train.images.shape)
    test = data['test']
    n_inputs = train.images[0].flatten().shape[0]
    n_classes = train.labels[0].shape[0]

    mlp = MLP(n_inputs, dnn_hidden_units, n_classes)
    loss_mod = nn.CrossEntropyLoss()
    if FLAGS.optimizer == 'SGD':
        optimizer = torch.optim.SGD(mlp.parameters(), lr=FLAGS.learning_rate)
    elif FLAGS.optimizer == 'AdamW':
        optimizer = torch.optim.AdamW(mlp.parameters(), lr=FLAGS.learning_rate)
    
    mlp.to(device)

    loss_history = []
    acc_history = []
    for step in range(FLAGS.max_steps): #FLAGS.max_steps
        mlp.train()
        x, y = train.next_batch(FLAGS.batch_size)
        x = torch.from_numpy(x.reshape(x.shape[0], n_inputs)).to(device)
        y = torch.from_numpy(np.argmax(y, axis=1)).to(device) # converts onehot to dense

        out = mlp(x)
        loss = loss_mod(out, y)
        loss_history.append(loss)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if step == 0 or (step + 1) % FLAGS.eval_freq == 0:
            mlp.eval()
            with torch.no_grad():
                x, y = test.images, test.labels
                x = torch.from_numpy(x.reshape(x.shape[0], n_inputs)).to(device)
                y = torch.from_numpy(y).to(device)
                test_out = mlp.forward(x)
                acc = accuracy(test_out, y)
                print('Accuracy:', acc)
                acc_history.append(acc)
    print('Final loss:', loss_history[-1])
    print('Final acc:', acc_history[-1])

    plt.plot(loss_history)
    plt.step(range(0, FLAGS.max_steps + 1, FLAGS.eval_freq), acc_history) # range(0, FLAGS.max_steps, FLAGS.eval_freq)
    plt.legend(['loss', 'accuracy'])
    plt.show()
Exemplo n.º 2
0
def train():
    """
    Performs training and evaluation of MLP model.

    TODO:
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """

    # DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    # Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    # -------------------------- UNCKECKED -------------------
    # initialize tensorboard
    run_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S_mlp")
    if batchnorm:
        run_id = run_id + '_batchnorm'
    log_dir = 'tensorboard/' + run_id
    writer = SummaryWriter(log_dir=log_dir)

    # get the dataset
    data_set = cifar10_utils.get_cifar10(FLAGS.data_dir)

    # get dataset information
    n_batches = {
        'train': int(data_set['train']._num_examples / FLAGS.batch_size),
        'validation':
        int(data_set['validation']._num_examples / FLAGS.batch_size),
        'test': int(data_set['test']._num_examples / FLAGS.batch_size)
    }
    image_shape = data_set['train'].images[0].shape
    n_inputs = image_shape[0] * image_shape[1] * image_shape[2]
    n_classes = data_set['train'].labels[0].shape[0]

    # get the necessary components
    classifier = MLP(n_inputs, dnn_hidden_units, n_classes, dropout,
                     batchnorm).to(device)
    loss_function = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(classifier.parameters(),
                                 lr=FLAGS.learning_rate,
                                 weight_decay=weight_decay)

    # list of training accuracies and losses
    train_accuracies = []
    train_losses = []

    # list of test accuracies and losses
    test_accuracies = []
    test_losses = []

    epoch_test_accuracy = 0
    epoch_test_loss = 0

    # training loop
    for step in range(FLAGS.max_steps):

        # get current batch...
        images, labels = data_set['train'].next_batch(FLAGS.batch_size)
        images = images.reshape(FLAGS.batch_size, n_inputs)

        # ...in the gpu
        images = torch.from_numpy(images).type(dtype).to(device=device)
        labels = torch.from_numpy(labels).type(dtype).to(device=device)

        # forward pass
        classifier.train()
        predictions = classifier.forward(images)

        # compute loss
        class_labels = labels.argmax(dim=1)
        loss = loss_function(predictions, class_labels)

        # reset gradients before backwards pass
        optimizer.zero_grad()

        # backward pass
        loss.backward()

        # update weights
        optimizer.step()

        # get accuracy and loss for the batch
        train_accuracy = accuracy(predictions, labels)
        train_accuracies.append(train_accuracy)

        writer.add_scalar("Training accuracy vs steps", train_accuracy, step)

        train_losses.append(loss.item())
        writer.add_scalar("Training loss vs steps", loss.item(), step)

        if ((step + 1) % 100) == 0 or step == 0:
            print("\nStep", step + 1)
            print("\tTRAIN:", round(train_accuracy * 100, 1), "%")

        # run evaluation every eval_freq epochs
        if (step + 1) % FLAGS.eval_freq == 0 or (step + 1) == FLAGS.max_steps:

            # list of test batch accuracies and losses for this step
            step_test_accuracies = []
            step_test_losses = []

            # get accuracy on the test set
            classifier.eval()
            for batch in range(n_batches['test']):
                # get current batch...
                images, labels = data_set['test'].next_batch(FLAGS.batch_size)
                images = images.reshape(FLAGS.batch_size, n_inputs)

                # ...in the gpu
                images = torch.from_numpy(images).type(dtype).to(device=device)
                labels = torch.from_numpy(labels).type(dtype).to(device=device)

                # forward pass
                predictions = classifier(images)

                # compute loss
                class_labels = labels.argmax(dim=1)
                loss = loss_function(predictions, class_labels)

                # get accuracy and loss for the batch
                step_test_accuracies.append(accuracy(predictions, labels))
                step_test_losses.append(loss.item())

            # store accuracy and loss
            epoch_test_accuracy = np.mean(step_test_accuracies)
            test_accuracies.append(epoch_test_accuracy)

            epoch_test_loss = np.mean(step_test_losses)
            test_losses.append(epoch_test_loss)

            print("\tTEST:", round(epoch_test_accuracy * 100, 1), "%")

        writer.add_scalar("Test accuracy vs epochs", epoch_test_accuracy, step)
        writer.add_scalar("Test loss vs epochs", epoch_test_loss, step)

    print("\nBest TEST:", round(max(test_accuracies) * 100, 1), "%")

    # save results
    results = {
        'train_accuracies': train_accuracies,
        'train_losses': train_losses,
        'test_accuracies': test_accuracies,
        'test_losses': test_losses,
        'eval_freq': FLAGS.eval_freq
    }

    if not os.path.exists("results/"):
        os.makedirs("results/")
    with open("results/" + run_id + "_results.pkl", "wb") as file:
        pkl.dump(results, file)

    writer.close()
Exemplo n.º 3
0
def train(n_hidden_1, dropout, lr, wdecay, _run):
    """
  Performs training and evaluation of MLP model. 

  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################
    def get_xy_tensors(batch):
        x, y = batch
        x = torch.tensor(x.reshape(-1, 3072), dtype=torch.float32).to(device)
        y = torch.tensor(y, dtype=torch.long).to(device)
        return x, y

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    datasets = cifar10_utils.read_data_sets(DATA_DIR_DEFAULT, one_hot=False)
    train_data = datasets['train']
    test_data = datasets['test']
    model = MLP(n_inputs=3072,
                n_hidden=[n_hidden_1, 400],
                n_classes=10,
                dropout=dropout).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wdecay)

    log_every = 50
    avg_loss = 0
    avg_acc = 0
    for step in range(FLAGS.max_steps):
        x, y = get_xy_tensors(train_data.next_batch(FLAGS.batch_size))

        # Forward and backward passes
        optimizer.zero_grad()
        out = model.forward(x)
        loss = loss_fn(out, y)
        loss.backward()

        # Parameter updates
        optimizer.step()

        avg_loss += loss.item() / log_every
        avg_acc += accuracy(out, y) / log_every
        if step % log_every == 0:
            print('[{}/{}] train loss: {:.6f}  train acc: {:.6f}'.format(
                step, FLAGS.max_steps, avg_loss, avg_acc))
            _run.log_scalar('train-loss', avg_loss, step)
            _run.log_scalar('train-acc', avg_acc, step)
            avg_loss = 0
            avg_acc = 0

        # Evaluate
        if step % FLAGS.eval_freq == 0 or step == (FLAGS.max_steps - 1):
            x, y = get_xy_tensors(test_data.next_batch(test_data.num_examples))
            model.eval()
            out = model.forward(x)
            model.train()
            test_loss = loss_fn(out, y).item()
            test_acc = accuracy(out, y)
            print('[{}/{}]  test accuracy: {:6f}'.format(
                step, FLAGS.max_steps, test_acc))

            _run.log_scalar('test-loss', test_loss, step)
            _run.log_scalar('test-acc', test_acc, step)
def train():
    """
    Performs training and evaluation of MLP model.
  
    TODO:
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    # select which device to train the model on
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    # compute the input size of the MLP
    input_size, n_classes = 3 * 32 * 32, 10

    # init model, define the dataset, loss function and optimizer
    model = MLP(input_size, dnn_hidden_units, n_classes, FLAGS.b).to(device)
    dataset = cifar10_utils.get_cifar10(FLAGS.data_dir)
    loss_fn = torch.nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.SGD(model.parameters(), lr=FLAGS.learning_rate)

    for step in range(FLAGS.max_steps):
        X_train, y_train = dataset['train'].next_batch(FLAGS.batch_size)
        optimizer.zero_grad()

        # move to correct device and shape for MLP
        X_train, y_train = torch.tensor(X_train).reshape(
            FLAGS.batch_size, input_size).float().to(device), torch.tensor(
                y_train).float().to(device)

        predictions = model(X_train)
        train_loss = loss_fn(predictions, y_train.argmax(1).long())

        train_loss.backward()
        optimizer.step()

        # add the loss and accuracy to the lists for plotting
        train_overall_loss.append(train_loss.cpu().detach().sum())
        train_overall_accuracy.append(
            accuracy(predictions.cpu().detach(),
                     y_train.cpu().detach()))
        train_x_axis.append(step)

        # test the model when eval freq is reached or if it is the last step
        if not step % FLAGS.eval_freq or step + 1 == FLAGS.max_steps:
            model.eval()
            test_accuracies, test_losses_list = [], []

            # test batchwise since it doesnot fit my gpu
            for X_test, y_test in cifar_test_generator(dataset):
                X_test, y_test = torch.tensor(X_test).reshape(
                    FLAGS.batch_size, input_size).float().to(
                        device), torch.tensor(y_test).float().to(device)

                predictions = model(X_test)
                test_loss = loss_fn(predictions, y_test.argmax(1).long())
                test_accuracy = accuracy(predictions, y_test)

                # add the values to compute the average loss and accuracy for the entire testset
                test_accuracies.append(test_accuracy.cpu().detach())
                test_losses_list.append(test_loss.cpu().detach().sum())

            print(
                "[{:5}/{:5}] Train loss {:.5f} Test loss {:.5f} Test accuracy {:.5f}"
                .format(step, FLAGS.max_steps, train_loss, test_loss,
                        sum(test_accuracies) / len(test_accuracies)))

            test_overall_accuracy.append(
                sum(test_accuracies) / len(test_accuracies))
            test_overall_loss.append(
                sum(test_losses_list) / len(test_losses_list))
            test_x_axis.append(step)

            model.train()

    plt.plot(train_x_axis, train_overall_loss, label="Avg Train loss")
    plt.plot(test_x_axis, test_overall_loss, label="Avg Test loss")
    plt.legend()
    plt.savefig("pytorch_loss_curve")
    plt.show()

    plt.plot(train_x_axis,
             train_overall_accuracy,
             label="Train batch accuracy")
    plt.plot(test_x_axis, test_overall_accuracy, label="Test set accuracy")
    plt.legend()
    plt.savefig("pytorch_accuracy_curve")
    plt.show()
def train():
  """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

  ### DO NOT CHANGE SEEDS!
  # Set the random seeds for reproducibility
  np.random.seed(42)
  torch.manual_seed(42)
  
  ## Prepare all functions
  # Get number of units in each hidden layer specified in the string such as 100,100
  if FLAGS.dnn_hidden_units:
    dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
    dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
  else:
    dnn_hidden_units = []

  ########################
  # PUT YOUR CODE HERE  #
  #######################
  net = MLP(3072, dnn_hidden_units, 10)
  net.to(device)
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(net.parameters(), lr = FLAGS.learning_rate)

  #Load cifar10
  cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir)
  print()
  print()
  print("----------------------------------------------")
  print("\t \t Training")
  print("----------------------------------------------\n")
  pl_loss =[]
  average_loss =[]
  moving_average=0.0
  acc =[]
  count = 1
  acc =[]
  check =0
  for iter_ in np.arange(0, FLAGS.max_steps):

    #Load batches 
    x , y = cifar10['train'].next_batch(FLAGS.batch_size)
    
    labels = np.argmax(y, axis=1)
    
    #reshape x into vectors
    x = np.reshape(x, (200, 3072))
    inputs, labels = torch.from_numpy(x), torch.LongTensor(torch.from_numpy(labels))
    
    inputs, labels = inputs.to(device), labels.to(device)

    # # labels = torch.LongTensor(labels)
    
    # # zero the parameter gradients
    optimizer.zero_grad()

    # # forward + backward + optimize
    outputs = net(inputs)
    print("output: {}, labels:{}".format(outputs.size(),labels.size()))
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    # # print statistics
    running_loss = loss.item()
    pl_loss.append(running_loss)
    moving_average+=running_loss
    average_loss.append(np.mean(np.mean(pl_loss[:-100:-1])))
    print("iter: {} | training loss: {} ".format(iter_,"%.3f"%running_loss))

    
    if (iter_+1)%FLAGS.eval_freq==0:
      net.eval()
      acc.append(evaluate(net, cifar10, FLAGS.batch_size))

  #######################
  # END OF YOUR CODE    #
  #######################
  
  plt.plot(pl_loss,'r-', label="Batch loss", alpha=0.5)
  plt.plot(average_loss,'g-', label="Average loss", alpha=0.5)
  plt.legend()
  plt.xlabel("Iterations")
  plt.ylabel("Loss")
  plt.title("Training Loss")
  plt.grid(True)
  plt.show()
  plt.close()

  plt.plot(acc,'g-', alpha=0.5)
  plt.xlabel("Iterations")
  plt.ylabel("Accuracy")
  plt.title("Test Accuracy")
  plt.grid(True)
  plt.show()
  plt.close()
  print()
  print("TRAINING COMPLETED") 
Exemplo n.º 6
0
def train():
    """
    Performs training and evaluation of MLP model.
  
    TODO:
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []
    cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir)
    loss_list = []
    batch_list = []
    accuracy_list = []
    # load the batches and reshape the samples

    for iter in range(FLAGS.max_steps):
        x, y = cifar10['train'].next_batch(FLAGS.batch_size)
        y = np.argmax(y, axis=1)
        # transform sample into vector
        x = np.reshape(x, (FLAGS.batch_size, -1))
        batch_list.append((x, y))
    print('Batch list completed')
    in_features = batch_list[0][0].shape[1]
    out_features = 10  #num_classes

    x_test, y_test = cifar10['test'].images, cifar10['test'].labels
    x_test = np.reshape(x_test, (x_test.shape[0], -1))
    y_test = np.argmax(y_test, axis=1)
    print(y_test.shape)
    x_test = torch.from_numpy(x_test)
    y_test = torch.from_numpy(y_test).long()
    print(y_test)
    net = MLP(in_features, dnn_hidden_units, out_features)
    #var_init(net, sd=0.0001)
    lossfunc = nn.CrossEntropyLoss()
    optimiser = optim.SGD(net.parameters(), lr=FLAGS.learning_rate)
    print(net)
    net.train()
    for i in range(FLAGS.max_steps):
        inputs, labels = batch_list[i]
        #inputs = torch.from_numpy(inputs)
        inputs = torch.tensor(inputs)
        labels = torch.from_numpy(labels).long()

        optimiser.zero_grad()
        outputs = net.forward(inputs.float())

        loss = lossfunc(outputs, labels)

        loss_list.append(loss)

        loss.backward()
        optimiser.step()

        if (i + 1) % FLAGS.eval_freq == 0:
            net.eval()
            predicted = net.forward(x_test)
            accuracy_val = accuracy(predicted, y_test)
            accuracy_list.append(accuracy_val)
            print('Accuracy on test set at step {} is {}'.format(
                i, accuracy_val))
            print('Loss of training is {}'.format(loss.item()))

    plt.subplot(2, 1, 1)
    plt.plot(
        np.arange(len(accuracy_list) * FLAGS.eval_freq, step=FLAGS.eval_freq),
        accuracy_list, 'o-')
    plt.xlabel('Step')
    plt.ylabel('Accuracy')
    #
    plt.subplot(2, 1, 2)
    plt.plot(np.arange(len(loss_list)), loss_list)
    plt.xlabel('Step')
    plt.ylabel('Loss')
def train():
    """
    Performs training and evaluation of MLP model.
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################

    # load the test daa
    cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir, one_hot=False)
    test_images, test_labels = torch.from_numpy(cifar10['test'].images).to(device), \
                               torch.from_numpy(cifar10['test'].labels).to(device)

    # flatten the images for the MLP
    test_vectors = reshape_images(test_images)

    # set up the model
    mlp_model = MLP(3072, dnn_hidden_units, 10)
    mlp_model.to(device)
    loss_module = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(mlp_model.parameters(),
                                 lr=FLAGS.learning_rate)

    accuracies = []
    losses = []
    mlp_model.train()
    for i in range(FLAGS.max_steps):

        # load data
        images, labels = cifar10['train'].next_batch(FLAGS.batch_size)
        image_vectors = reshape_images(images)
        image_vectors, labels = torch.from_numpy(
            image_vectors), torch.from_numpy(labels)
        image_vectors, labels = image_vectors.to(device), labels.to(device)
        labels.to(device)

        # forward pass
        model_pred = mlp_model(image_vectors)

        # calculate the loss
        loss = loss_module(model_pred, labels)

        # backward pass
        optimizer.zero_grad()
        loss.backward()

        # update the parameters
        optimizer.step()

        # evaluate the model on the data set every eval_freq steps
        mlp_model.eval()
        if i % FLAGS.eval_freq == 0:
            with torch.no_grad():
                test_pred = mlp_model(test_vectors)
                test_accuracy = accuracy(test_pred, test_labels)
                accuracies.append(test_accuracy)
                losses.append(loss)

        mlp_model.train()

    plot_curve(accuracies, 'Accuracy')
    plot_curve(losses, 'Loss')
def train():
    """
    Performs training and evaluation of MLP model. 

    TODO:
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
    else:
        dnn_hidden_units = []

    #######################
    # PUT YOUR CODE HERE  #
    #######################
    model = MLP(32 ** 2 * 3, dnn_hidden_units, 10)
    print(model)

    cv_size = 10000
    cifar10 = cifar10_utils.get_cifar10('cifar10/cifar-10-batches-py',
                                        validation_size=cv_size)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=FLAGS.learning_rate,
                          momentum=0.9, weight_decay=0.003)

    log = defaultdict(list)

    for step in range(FLAGS.max_steps):
        optimizer.zero_grad()

        x, y = cifar10['train'].next_batch(FLAGS.batch_size)
        x = torch.from_numpy(x.reshape(FLAGS.batch_size, -1))
        y = torch.from_numpy(y)

        h = model.forward(x)

        loss = criterion(h, y.argmax(1))
        loss.backward()
        
        optimizer.step()

        if step % FLAGS.eval_freq == 0:
            log['train_loss'].append(loss.item())
            log['train_acc'].append(accuracy(h, y))

            model.eval()

            x, y = cifar10['validation'].next_batch(cv_size)
            x = torch.from_numpy(x.reshape(-1, 32 ** 2 * 3))
            y = torch.from_numpy(y)

            h = model.forward(x)

            loss = criterion(h, y.argmax(1))

            log['cv_loss'].append(loss.item())
            log['cv_acc'].append(accuracy(h, y))

            model.train()

            print(
                f"Step {step} | "
                f"Training loss: {log['train_loss'][-1]:.5f}, "
                f"accuracy: {100 * log['train_acc'][-1]:.1f}% | "
                f"CV loss: {log['cv_loss'][-1]:.5f}, "
                f"accuracy: {100 * log['cv_acc'][-1]:.1f}%")

    model.eval()
    x, y = cifar10['test'].next_batch(cifar10['test'].num_examples)
    x = torch.from_numpy(x.reshape(-1, 32 ** 2 * 3))
    y = torch.from_numpy(y)

    h = model.forward(x)

    loss = criterion(h, y.argmax(1))

    print(f"Test loss: {loss.item()}, accuracy: {100 * accuracy(h, y):.1f}%")

    # Plot loss and accuracy.
    plt.subplot(121)
    plt.title("Loss")
    plt.plot(log['train_loss'], label="Training")
    plt.plot(log['cv_loss'], label="Cross Validation")
    plt.xlabel("Step")
    plt.legend()

    plt.subplot(122)
    plt.title("Accuracy")
    plt.plot(log['train_acc'], label="Training")
    plt.plot(log['cv_acc'], label="Cross Validation")
    plt.xlabel("Step")
    plt.legend()

    plt.legend()
    plt.show()
Exemplo n.º 9
0
def train():
    """
  Performs training and evaluation of MLP model.

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    # Device configuration
    use_cuda = torch.cuda.is_available()
    if use_cuda:
        print('Running in GPU model')

    device = torch.device('cuda' if use_cuda else 'cpu')
    dtype = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor

    # load dataset
    cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir)
    # get batches
    batches = []

    # initializing loss and accuracy arrays
    accuracies = []
    losses = []

    for i in range(FLAGS.max_steps):
        x, y = cifar10['train'].next_batch(
            FLAGS.batch_size)  # (batch_size, 3, 32, 32) (batch_size, 10)
        x = x.reshape(FLAGS.batch_size, -1)
        batches.append((x, y))
    # get output size
    out_size = batches[-1][1].shape[1]
    # get intput size
    in_size = batches[-1][0].shape[1]
    # initialize network
    net = MLP(in_size, dnn_hidden_units, out_size, FLAGS.batch_norm,
              FLAGS.dropout).to(device)

    # initialize l1 regularization
    reg_factor = 1e-6

    # intialize loss function
    criterion = nn.CrossEntropyLoss()
    if FLAGS.l2:
        optimizer = torch.optim.Adam(net.parameters(),
                                     lr=FLAGS.learning_rate,
                                     weight_decay=1e-5)
    else:
        optimizer = torch.optim.Adam(net.parameters(), lr=FLAGS.learning_rate)
    # make steps
    for s in range(FLAGS.max_steps):
        net.train()
        x, t = batches[s]
        # Forward pass
        y = net(torch.from_numpy(x).type(dtype))
        t = torch.from_numpy(t).type(dtype)
        t = torch.max(t, 1)[1]

        loss = criterion(y, t)

        if FLAGS.l1:
            l1_loss = torch.autograd.Variable(torch.FloatTensor(1),
                                              requires_grad=True)
            for name, param in net.named_parameters():
                if 'bias' not in name and isinstance(param, nn.Linear):
                    loss = loss + (reg_factor * torch.sum(torch.abs(param)))
        losses.append(loss.cpu().detach().numpy())

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if s % FLAGS.eval_freq == 0:
            net.eval()
            x, t = cifar10['test'].images, cifar10['test'].labels
            x = x.reshape(x.shape[0], -1)
            y = net(torch.from_numpy(x).type(dtype))
            acc = accuracy(y.cpu().detach().numpy(), t)
            print('accuracy at step', s, ': ', acc)
            print('loss at step', s, ': ', loss.cpu().detach().numpy())
            accuracies.append(acc * 100)
Exemplo n.º 10
0
def train():
    """
    Performs training and evaluation of MLP model.
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################
    opt = 'Adam'
    decay = 0

    # Import data
    cifar10_data = cifar10_utils.get_cifar10(FLAGS.data_dir)
    train_data = cifar10_data['train']
    test_data = cifar10_data['test']
    validation_data = cifar10_data['validation']

    input_size = np.prod(np.array([train_data.images[0].shape]))
    output_size = train_data.labels.shape[1]

    # Create model and optimizer
    model = MLP(input_size, dnn_hidden_units, output_size)
    criterion = nn.CrossEntropyLoss()
    if opt is 'Adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=FLAGS.learning_rate,
                               weight_decay=decay)
    elif opt is 'SGD':
        optimizer = optim.SGD(model.parameters(),
                              lr=FLAGS.learning_rate,
                              weight_decay=decay)
    model.train()

    # Train & evaluate
    eval_loss = 0.0
    full_loss = []
    lossv = []
    accv = []

    for step in range(1, FLAGS.max_steps + 1):
        data, target = train_data.next_batch(FLAGS.batch_size)
        data, target = Variable(torch.from_numpy(data).float()), Variable(
            torch.from_numpy(target))

        optimizer.zero_grad()

        prediction = model(data)
        loss = criterion(prediction, torch.argmax(target, dim=1))
        loss.backward()
        optimizer.step()

        full_loss.append(loss.item())

        # Accuracy evaluation
        eval_loss += loss.item()
        if step % FLAGS.eval_freq == 0:
            model.eval()

            # test_x, test_y = test_data.next_batch(FLAGS.batch_size)
            test_x, test_y = test_data.images, test_data.labels
            test_x = Variable(torch.from_numpy(test_x).float())

            predicted_y = model(test_x)
            accuracy_result = accuracy(predicted_y.detach().numpy(), test_y)

            lossv.append(eval_loss / FLAGS.eval_freq)
            accv.append(accuracy_result)
            print('Step %d  -  accuracy: %.4f  -  loss: %.3f' %
                  (step, accuracy_result, eval_loss / FLAGS.eval_freq))
            eval_loss = 0.0

            model.train()

    print("Training Done")