Exemplo n.º 1
0
def train():
    """
    Performs training and evaluation of MLP model.
  
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """
    
    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
        torch.cuda.manual_seed_all(42)
    
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    # print("Device", device)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
    else:
        dnn_hidden_units = []
    

    # DNN_HIDDEN_UNITS_DEFAULT = '100'
    # LEARNING_RATE_DEFAULT = 1e-3
    # MAX_STEPS_DEFAULT = 1400
    # BATCH_SIZE_DEFAULT = 200
    # EVAL_FREQ_DEFAULT = 100
    
    data = cifar10_utils.get_cifar10(data_dir=FLAGS.data_dir)
    train = data['train']
    print(train.images.shape)
    test = data['test']
    n_inputs = train.images[0].flatten().shape[0]
    n_classes = train.labels[0].shape[0]

    mlp = MLP(n_inputs, dnn_hidden_units, n_classes)
    loss_mod = nn.CrossEntropyLoss()
    if FLAGS.optimizer == 'SGD':
        optimizer = torch.optim.SGD(mlp.parameters(), lr=FLAGS.learning_rate)
    elif FLAGS.optimizer == 'AdamW':
        optimizer = torch.optim.AdamW(mlp.parameters(), lr=FLAGS.learning_rate)
    
    mlp.to(device)

    loss_history = []
    acc_history = []
    for step in range(FLAGS.max_steps): #FLAGS.max_steps
        mlp.train()
        x, y = train.next_batch(FLAGS.batch_size)
        x = torch.from_numpy(x.reshape(x.shape[0], n_inputs)).to(device)
        y = torch.from_numpy(np.argmax(y, axis=1)).to(device) # converts onehot to dense

        out = mlp(x)
        loss = loss_mod(out, y)
        loss_history.append(loss)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if step == 0 or (step + 1) % FLAGS.eval_freq == 0:
            mlp.eval()
            with torch.no_grad():
                x, y = test.images, test.labels
                x = torch.from_numpy(x.reshape(x.shape[0], n_inputs)).to(device)
                y = torch.from_numpy(y).to(device)
                test_out = mlp.forward(x)
                acc = accuracy(test_out, y)
                print('Accuracy:', acc)
                acc_history.append(acc)
    print('Final loss:', loss_history[-1])
    print('Final acc:', acc_history[-1])

    plt.plot(loss_history)
    plt.step(range(0, FLAGS.max_steps + 1, FLAGS.eval_freq), acc_history) # range(0, FLAGS.max_steps, FLAGS.eval_freq)
    plt.legend(['loss', 'accuracy'])
    plt.show()
def train():
  """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

  ### DO NOT CHANGE SEEDS!
  # Set the random seeds for reproducibility
  np.random.seed(42)
  torch.manual_seed(42)
  
  ## Prepare all functions
  # Get number of units in each hidden layer specified in the string such as 100,100
  if FLAGS.dnn_hidden_units:
    dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
    dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
  else:
    dnn_hidden_units = []

  ########################
  # PUT YOUR CODE HERE  #
  #######################
  net = MLP(3072, dnn_hidden_units, 10)
  net.to(device)
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(net.parameters(), lr = FLAGS.learning_rate)

  #Load cifar10
  cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir)
  print()
  print()
  print("----------------------------------------------")
  print("\t \t Training")
  print("----------------------------------------------\n")
  pl_loss =[]
  average_loss =[]
  moving_average=0.0
  acc =[]
  count = 1
  acc =[]
  check =0
  for iter_ in np.arange(0, FLAGS.max_steps):

    #Load batches 
    x , y = cifar10['train'].next_batch(FLAGS.batch_size)
    
    labels = np.argmax(y, axis=1)
    
    #reshape x into vectors
    x = np.reshape(x, (200, 3072))
    inputs, labels = torch.from_numpy(x), torch.LongTensor(torch.from_numpy(labels))
    
    inputs, labels = inputs.to(device), labels.to(device)

    # # labels = torch.LongTensor(labels)
    
    # # zero the parameter gradients
    optimizer.zero_grad()

    # # forward + backward + optimize
    outputs = net(inputs)
    print("output: {}, labels:{}".format(outputs.size(),labels.size()))
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    # # print statistics
    running_loss = loss.item()
    pl_loss.append(running_loss)
    moving_average+=running_loss
    average_loss.append(np.mean(np.mean(pl_loss[:-100:-1])))
    print("iter: {} | training loss: {} ".format(iter_,"%.3f"%running_loss))

    
    if (iter_+1)%FLAGS.eval_freq==0:
      net.eval()
      acc.append(evaluate(net, cifar10, FLAGS.batch_size))

  #######################
  # END OF YOUR CODE    #
  #######################
  
  plt.plot(pl_loss,'r-', label="Batch loss", alpha=0.5)
  plt.plot(average_loss,'g-', label="Average loss", alpha=0.5)
  plt.legend()
  plt.xlabel("Iterations")
  plt.ylabel("Loss")
  plt.title("Training Loss")
  plt.grid(True)
  plt.show()
  plt.close()

  plt.plot(acc,'g-', alpha=0.5)
  plt.xlabel("Iterations")
  plt.ylabel("Accuracy")
  plt.title("Test Accuracy")
  plt.grid(True)
  plt.show()
  plt.close()
  print()
  print("TRAINING COMPLETED") 
Exemplo n.º 3
0
def train():
    """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """
    
    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    
    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
    else:
        dnn_hidden_units = []
    
    ########################
    # PUT YOUR CODE HERE  #
    #######################

    ############################## VARIABLES ##############################
    
    SAVE_PLOTS = False
    SAVE_LOGS = False
    
    img_size = 32
    n_classes = 10
    input_size = img_size * img_size * 3
    batch_size = FLAGS.batch_size
    eval_freq = FLAGS.eval_freq
    n_iterations = FLAGS.max_steps
    lr_rate = FLAGS.learning_rate

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Device:", device)

    ############################## METHODS ##############################
    
    # fp = open('memory_profiler_basic_mean.log', 'w+')
    # @profile(stream=fp)
    def test():
        net.eval()
        
        output_t = net(x_t)
        loss_t = criterion(output_t, y_t).detach()
        acc_t = accuracy(output_t.detach(), y_t_onehot)
        
        return acc_t, loss_t
    
    def plot(iteration):
        idx_test = list(range(0, iteration + 1, eval_freq))
        idx = list(range(0, iteration + 1))
        
        plt.clf()
        plt.cla()
        plt.subplot(1, 2, 1)
        plt.plot(idx_test, test_accuracies, "k-", linewidth=1, label="test")
        plt.plot(idx, accuracies, "r-", linewidth=0.5, alpha=0.5, label="train")
        plt.xlabel('iteration')
        plt.ylabel('accuracy')
        plt.legend()
        
        plt.subplot(1, 2, 2)
        plt.plot(idx_test, test_losses, "k-", linewidth=1, label="test")
        plt.plot(idx, losses, "r-", linewidth=0.5, alpha=0.5, label="train")
        plt.xlabel('iteration')
        plt.ylabel('loss')
        plt.legend()
        plt.savefig("./out/plot/plot_pytorch_" + str(batch_size) + "_" + str(lr_rate) + ".png", bbox_inches='tight')
        return
    
    def to_label(tensor):
        _, tensor = tensor.max(1)
        return tensor

    ############################## MAIN ##############################
    
    cifar10 = cifar10_utils.get_cifar10('cifar10/cifar-10-batches-py')
    
    net = MLP(input_size, dnn_hidden_units, n_classes)
    net.to(device)
    
    criterion = nn.CrossEntropyLoss()
    
    # optimizer = optim.SGD(net.parameters(), lr=lr_rate, momentum=0.8, nesterov=False)
    optimizer = optim.Adam(net.parameters(), lr=lr_rate)
    
    losses = []
    accuracies = []
    test_accuracies = []
    test_losses = []
    alpha = 0.0001

    x_t = cifar10['test'].images
    y_t = cifar10['test'].labels
    x_t = torch.from_numpy(x_t.reshape(-1, input_size))
    y_t_onehot = torch.from_numpy(y_t).type(torch.LongTensor)
    y_t = to_label(y_t_onehot)
    x_t, y_t = x_t.to(device), y_t.to(device)
    y_t_onehot = y_t_onehot.to(device)

    plt.figure(figsize=(10, 4))
    
    for i in range(n_iterations):
        
        x, y = cifar10['train'].next_batch(batch_size)
        x = torch.from_numpy(x.reshape(-1, input_size))
        y_onehot = torch.from_numpy(y).type(torch.LongTensor)
        y = to_label(y_onehot)
        x, y = x.to(device), y.to(device)
        y_onehot = y_onehot.to(device)
        
        optimizer.zero_grad()
        output = net(x)
        train_loss = criterion(output, y)

        reg_loss = 0
        for param in net.parameters():
            reg_loss += param.norm(2)
            
        loss = train_loss + alpha * reg_loss
        loss.backward()
        optimizer.step()
        
        losses.append(loss.item())
        accuracies.append(accuracy(output.detach().data, y_onehot.detach()))
        
        del x, y
        
        if i % eval_freq == 0:
            acc_t, loss_t = test()
            test_accuracies.append(acc_t)
            test_losses.append(loss_t)
            
            log_string = "[{:5d}/{:5d}] Test Accuracy: {:.4f} | Batch Accuracy: {:.4f} | Batch Loss: {:.6f} | Train/Reg: {:.6f}/{:.6f}\n".format(
                i, n_iterations, test_accuracies[-1], accuracies[-1], loss, train_loss, reg_loss * alpha
            )
            print(log_string)
            
            if SAVE_LOGS:
                with open("./out/log/pytorch_log_" + str(batch_size) + "_" + str(lr_rate) + ".txt", "a") as myfile:
                    myfile.write(log_string)

            if SAVE_PLOTS:
                plot(i)

            net.train()
Exemplo n.º 4
0
def train():
    """
    Performs training and evaluation of MLP model. 

    TODO:
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """

    # DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################
    def reshape_cifar10_mlp(x):
        batch_size = x.shape[0]
        x = x.transpose([2, 3, 1, 0])
        x = x.reshape([-1, batch_size])
        x = x.transpose()
        return x

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir)
    x_train, y_train = cifar10['train'].next_batch(FLAGS.batch_size)
    x_train = reshape_cifar10_mlp(x_train)
    x_train = torch.from_numpy(x_train).to(device)
    y_train = torch.from_numpy(y_train).to(device)

    crossent_softmax = nn.CrossEntropyLoss()
    mlp = MLP(x_train.shape[1], dnn_hidden_units, y_train.shape[1], bn_flag=True)
    # optimizer = torch.optim.SGD(mlp.parameters(), lr=FLAGS.learning_rate)
    optimizer = torch.optim.Adam(mlp.parameters(), weight_decay=1e-3)
    mlp.to(device)

    train_accs = []
    train_losses = []
    eval_accs = []
    eval_losses = []
    for i in np.arange(FLAGS.max_steps):
        print('\nStep: {}\n'.format(i))
        print('Training: ')
        optimizer.zero_grad()
        logits = mlp(x_train)
        train_loss = crossent_softmax(logits, y_train.argmax(dim=-1))
        train_acc = accuracy(logits, y_train)
        print('loss: {:.4f}, acc: {:.4f}\n'.format(train_loss, train_acc))

        train_loss.backward()
        optimizer.step()

        x_train, y_train = cifar10['train'].next_batch(FLAGS.batch_size)
        x_train = reshape_cifar10_mlp(x_train)
        x_train = torch.from_numpy(x_train).to(device)
        y_train = torch.from_numpy(y_train).to(device)
        if i % FLAGS.eval_freq == 0:
            with torch.no_grad():
                print('Evaluation: ')
                x_eval, y_eval = cifar10['test'].images, cifar10['test'].labels
                x_eval = reshape_cifar10_mlp(x_eval)
                x_eval = torch.from_numpy(x_eval).to(device)
                y_eval = torch.from_numpy(y_eval).to(device)

                logits = mlp(x_eval)
                eval_loss = crossent_softmax(logits, y_eval.argmax(dim=-1))
                eval_acc = accuracy(logits, y_eval)

                train_losses.append(train_loss)
                train_accs.append(train_acc)
                eval_losses.append(eval_loss)
                eval_accs.append(eval_acc)
                print('loss: {:.4f}, acc: {:.4f}'.format(eval_loss, eval_acc))
    print('Evaluation: ')
    x_eval, y_eval = cifar10['test'].images, cifar10['test'].labels
    x_eval = reshape_cifar10_mlp(x_eval)
    x_eval = torch.from_numpy(x_eval).to(device)
    y_eval = torch.from_numpy(y_eval).to(device)

    logits = mlp(x_eval)
    eval_loss = crossent_softmax(logits, y_eval.argmax(dim=-1))
    eval_acc = accuracy(logits, y_eval)

    train_losses.append(train_loss)
    train_accs.append(train_acc)
    eval_losses.append(eval_loss)
    eval_accs.append(eval_acc)
    print('loss: {:.4f}, acc: {:.4f}'.format(eval_loss, eval_acc))

    print('Finished training.')

    plt.figure(figsize=(10, 5))
    plt.plot(np.arange(len(train_losses)), train_losses, label='training loss')
    plt.plot(np.arange(len(eval_losses)), eval_losses, label='evaluation loss')
    plt.ylim(0, 3)
    plt.legend()
    plt.xlabel('Iterations [x{}]'.format(FLAGS.eval_freq))
    plt.savefig('results/mlp_loss_torch_adam_layers_maxstep_reg_batch.png', bbox_inches='tight')

    plt.figure(figsize=(10, 5))
    plt.plot(np.arange(len(train_accs)), train_accs, label='training accuracy')
    plt.plot(np.arange(len(eval_accs)), eval_accs, label='evaluation accuracy')
    plt.legend()
    plt.xlabel('Iterations [x{}]'.format(FLAGS.eval_freq))
    plt.savefig('results/mlp_acc_torch_adam_layers_maxstep_reg_batch.png', bbox_inches='tight')
Exemplo n.º 5
0
def train():
  """
  Performs training and evaluation of MLP model.

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

  ### DO NOT CHANGE SEEDS!
  # Set the random seeds for reproducibility
  np.random.seed(42)
  torch.manual_seed(42)
  # torch.backends.cudnn.deterministic = True
  # torch.backends.cudnn.benchmark = False

  ## Prepare all functions
  # Get number of units in each hidden layer specified in the string such as 100,100
  if FLAGS.dnn_hidden_units:
    dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
    dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
  else:
    dnn_hidden_units = []

  # Get negative slope parameter for LeakyReLU
  neg_slope = FLAGS.neg_slope
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  # print("[DEBUG], Device ", device)

  ########################
  # PUT YOUR CODE HERE  #
  #######################
  cifar10 = cifar10_utils.get_cifar10(data_dir=FLAGS.data_dir)
  train_data = cifar10['train']

  # 60000 x 3 x 32 x32 -> 60000 x 3072, input vector 3072
  n_inputs = train_data.images.reshape(train_data.images.shape[0], -1).shape[1]
  n_hidden = dnn_hidden_units
  n_classes = train_data.labels.shape[1]

  # print(f"[DEBUG] n_inputs {n_inputs}, n_classes {n_classes}")

  model = MLP(n_inputs, n_hidden, n_classes, FLAGS.neg_slope)
  model.to(device)

  params = model.parameters()

  if FLAGS.optimizer == 'Adam':
    optimizer = torch.optim.Adam(params, lr=FLAGS.learning_rate)
  elif FLAGS.optimizer == 'Adamax':
    optimizer = torch.optim.Adamax(params, lr=FLAGS.learning_rate)
  elif FLAGS.optimizer == 'Adagrad':
    optimizer = torch.optim.Adagrad(params, lr=FLAGS.learning_rate)
  elif FLAGS.optimizer == 'Adadelta':
    optimizer = torch.optim.Adadelta(params, lr=FLAGS.learning_rate)
  elif FLAGS.optimizer == 'SparseAdam':
    optimizer = torch.optim.SparseAdam(params, lr=FLAGS.learning_rate)
  else:
    optimizer = torch.optim.SGD(params,lr=FLAGS.learning_rate)


  criterion = torch.nn.CrossEntropyLoss()
  train_acc_plot = []
  test_acc_plot = []
  loss_train = []
  loss_test = []
  rloss = 0
  best_accuracy = 0
  # print('[DEBUG] start training')

  for i in range(0, FLAGS.max_steps):
    x, y = cifar10['train'].next_batch(FLAGS.batch_size)
    x, y = torch.from_numpy(x).float().to(device) , torch.from_numpy(y).float().to(device)
    x = x.reshape(x.shape[0], -1)

    out = model.forward(x)
    loss = criterion.forward(out, y.argmax(1))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    rloss += loss.item()

    if i % FLAGS.eval_freq == 0:
      train_accuracy =  accuracy(out, y)
      with torch.no_grad():
        test_accuracys, test_losses = [] ,[]
        for j in range(0, FLAGS.max_steps):
          test_x, test_y = cifar10['test'].next_batch(FLAGS.batch_size)
          test_x, test_y = torch.from_numpy(test_x).float().to(device) , torch.from_numpy(test_y).float().to(device)

          test_x = test_x.reshape(test_x.shape[0], -1)

          test_out  = model.forward(test_x)
          test_loss = criterion(test_out, test_y.argmax(1))
          test_accuracy = accuracy(test_out, test_y)
          if device == 'cpu':
            test_losses.append(test_loss)
          else:
            test_losses.append(test_loss.cpu().data.numpy())

          test_accuracys.append(test_accuracy)
        t_acc = np.array(test_accuracys).mean()
        t_loss = np.array(test_losses).mean()
        train_acc_plot.append(train_accuracy)
        test_acc_plot.append(t_acc)
        loss_train.append(rloss/(i + 1))
        loss_test.append(t_loss)
        print(f"iter {i}, train_loss_avg {rloss/(i + 1)}, test_loss_avg {t_loss}, train_acc {train_accuracy}, test_acc_avg {t_acc}")
        if t_acc > best_accuracy:
          best_accuracy = t_acc

  print(f"Best Accuracy {best_accuracy}",flush=True)
  if FLAGS.plot:
    print('Start plotting...')
    fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
    ax1.plot(np.arange(len(train_acc_plot)), train_acc_plot, label='training')
    ax1.plot(np.arange(len(test_acc_plot)), test_acc_plot, label='testing')
    ax1.set_title('Training evaluation batch size '+str(FLAGS.batch_size)+' learning rate '+str(FLAGS.learning_rate)+ '\n best accuracy '+str(best_accuracy) )
    ax1.set_ylabel('Accuracy')
    ax1.legend()
    ax2.plot(np.arange(len(loss_train)), loss_train, label='Train Loss')
    ax2.plot(np.arange(len(loss_test)), loss_test, label='Test Loss')
    ax2.set_title('Loss evaluation')
    ax2.set_ylabel('Loss')
    ax2.legend()
    plt.xlabel('Iteration')
    plt.savefig('pytorch.png')
def train():
    """
    Performs training and evaluation of MLP model.
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################

    # load the test daa
    cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir, one_hot=False)
    test_images, test_labels = torch.from_numpy(cifar10['test'].images).to(device), \
                               torch.from_numpy(cifar10['test'].labels).to(device)

    # flatten the images for the MLP
    test_vectors = reshape_images(test_images)

    # set up the model
    mlp_model = MLP(3072, dnn_hidden_units, 10)
    mlp_model.to(device)
    loss_module = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(mlp_model.parameters(),
                                 lr=FLAGS.learning_rate)

    accuracies = []
    losses = []
    mlp_model.train()
    for i in range(FLAGS.max_steps):

        # load data
        images, labels = cifar10['train'].next_batch(FLAGS.batch_size)
        image_vectors = reshape_images(images)
        image_vectors, labels = torch.from_numpy(
            image_vectors), torch.from_numpy(labels)
        image_vectors, labels = image_vectors.to(device), labels.to(device)
        labels.to(device)

        # forward pass
        model_pred = mlp_model(image_vectors)

        # calculate the loss
        loss = loss_module(model_pred, labels)

        # backward pass
        optimizer.zero_grad()
        loss.backward()

        # update the parameters
        optimizer.step()

        # evaluate the model on the data set every eval_freq steps
        mlp_model.eval()
        if i % FLAGS.eval_freq == 0:
            with torch.no_grad():
                test_pred = mlp_model(test_vectors)
                test_accuracy = accuracy(test_pred, test_labels)
                accuracies.append(test_accuracy)
                losses.append(loss)

        mlp_model.train()

    plot_curve(accuracies, 'Accuracy')
    plot_curve(losses, 'Loss')
def train():
    """
    Performs training and evaluation of MLP model. 
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print('device', device)

    # flags
    batch_size = FLAGS.batch_size
    optim = FLAGS.optimizer
    lr = FLAGS.learning_rate

    # cifar
    cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir)
    x_test_np, y_test_np = cifar10['test'].images, cifar10['test'].labels
    (test_images, height, width, colors) = x_test_np.shape
    n_inputs = height * width * colors
    (_, n_classes) = y_test_np.shape

    # torch crap
    x_test_flat = x_test_np.reshape((test_images, n_inputs))
    x_test_torch = torch.from_numpy(x_test_flat).to(device)
    y_test_torch = torch.from_numpy(y_test_np).long().to(device)
    idx_test = torch.argmax(y_test_torch, dim=-1).long()

    # model
    ce = torch.nn.CrossEntropyLoss()
    model = MLP(n_inputs, dnn_hidden_units, n_classes)
    model.to(device)
    pars = model.parameters()

    # optimizer
    optim_pars = {'params': pars, 'lr': lr, 'weight_decay': FLAGS.weight_decay}
    if optim == 'adadelta':
        optimizer = torch.optim.Adadelta(**optim_pars)
    elif optim == 'adagrad':
        optimizer = torch.optim.Adagrad(**optim_pars)
    elif optim == 'rmsprop':
        optimizer = torch.optim.RMSprop(**optim_pars)
    elif optim == 'adam':
        optimizer = torch.optim.Adam(**optim_pars)
    else:
        # default is SGD, same as the numpy version
        optimizer = torch.optim.SGD(**optim_pars)

    cols = ['train_acc', 'test_acc', 'train_loss', 'test_loss', 'secs']

    # train
    results = []
    name = f'mlp-pytorch-{optim}'
    with SummaryWriter(name) as w:
        for step in tqdm(range(FLAGS.max_steps)):
            # print(step)
            optimizer.zero_grad()

            # batch
            x_train_np, y_train_np = cifar10['train'].next_batch(batch_size)
            x_train_flat = x_train_np.reshape((batch_size, n_inputs))
            x_train_torch = torch.from_numpy(x_train_flat).to(device)
            y_train_torch = torch.from_numpy(y_train_np).long().to(device)
            idx_train = torch.argmax(y_train_torch, dim=-1).long()

            # results
            train_predictions = model.forward(x_train_torch)
            train_loss = ce(train_predictions, idx_train)
            train_acc = accuracy(train_predictions, idx_train)

            # evaluate
            if step % FLAGS.eval_freq == 0:
                time = int(step / FLAGS.eval_freq)
                start = timer()
                test_predictions = model.forward(x_test_torch)
                end = timer()
                secs = end - start
                test_loss = ce(test_predictions, idx_test)
                test_acc = accuracy(test_predictions, idx_test)
                vals = [train_acc, test_acc, train_loss, test_loss, secs]
                stats = dict(
                    zip(cols, [
                        np.asscalar(i.detach().cpu().numpy().take(0))
                        if isinstance(i, torch.Tensor) else np.asscalar(i)
                        if isinstance(i, (np.ndarray, np.generic)) else i
                        for i in vals
                    ]))
                # print(yaml.dump({k: round(i, 3) if isinstance(i, float) else i for k, i in stats.items()}))
                print(test_acc.item())
                w.add_scalars('metrics', stats, time)
                results.append(stats)

                # stop if loss has converged!
                check = 10
                if len(results) >= 2 * check:
                    threshold = 1e-6
                    losses = [item['train_loss'] for item in results]
                    current = np.mean(losses[-check:])
                    prev = np.mean(losses[-2 * check:-check])
                    if (prev - current) < threshold:
                        break

            train_loss.backward()
            optimizer.step()

        # w.add_scalars('metrics', stats)

    df = pd.DataFrame(results, columns=cols)
    meta = {
        'framework': 'pytorch',
        'algo': 'mlp',
        'optimizer': optim,
        'batch_size': FLAGS.batch_size,
        'learning_rate': FLAGS.learning_rate,
        'dnn_hidden_units': FLAGS.dnn_hidden_units,
        'weight_decay': FLAGS.weight_decay,
        'max_steps': FLAGS.max_steps,
    }
    for k, v in meta.items():
        df[k] = v
    csv_file = os.path.join(
        os.getcwd(), 'results',
        f'{name}-batch={FLAGS.batch_size}-lr={FLAGS.learning_rate}-hidden={FLAGS.dnn_hidden_units}-regularization={FLAGS.weight_decay}-steps={FLAGS.max_steps}.csv'
    )
    df.to_csv(csv_file)
    csv_file = os.path.join(os.getcwd(), 'results', 'results.csv')
    if os.path.isfile(csv_file):
        df.to_csv(csv_file, header=False, mode='a')
    else:
        df.to_csv(csv_file, header=True, mode='w')
    torch_file = os.path.join(os.getcwd(), 'results', f'{name}.pth')
    torch.save(model.state_dict(), torch_file)
    print('done!')
    return test_loss
Exemplo n.º 8
0
def train():
    """
    Performs training and evaluation of MLP model.

    TODO:
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    torch.manual_seed(42)
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
    else:
        dnn_hidden_units = []

    device = torch.device("cuda")

    mlp = MLP(IMAGE_SIZE, dnn_hidden_units, 10)
    mlp.to(device)
    cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir)

    # get all train data
    x_train, y_train = cifar10["train"].images, cifar10["train"].labels
    x_train = x_train.reshape((x_train.shape[0], IMAGE_SIZE))

    x_train = torch.torch.from_numpy(x_train).to(device)
    y_train = y_train.argmax(axis=1)
    y_train = torch.from_numpy(y_train).to(device).type(torch.long)

    # get test data
    x_test, y_test = cifar10["test"].images, cifar10["test"].labels
    x_test = x_test.reshape((x_test.shape[0], IMAGE_SIZE))

    inputs_test = torch.torch.from_numpy(x_test).to(device)
    y_test = y_test.argmax(axis=1)
    targets_test = torch.from_numpy(y_test).to(device).type(torch.long)

    # define a loss function and an optimizer, as mentioned in the official framework's tutorial
    #  https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#sphx-glr-beginner-blitz-cifar10-tutorial-py
    loss_function = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(mlp.parameters(), lr=5e-2, weight_decay=2e-3, momentum=0.8)

    losses_test = []
    losses_train = []
    accuracies_test = []
    accuracies_train = []
    for i in range(FLAGS.max_steps):
        # getting batch for forwarding
        x, y = cifar10['train'].next_batch(FLAGS.batch_size)
        x = x.reshape((FLAGS.batch_size, IMAGE_SIZE))

        class_indexes_targets = y.argmax(axis=1)  # pytorch does not support one hot encoded vectors as targests
        # making a tensor which pytorch can work with
        inputs = torch.from_numpy(x).to(device)
        targets = torch.from_numpy(class_indexes_targets).to(device).type(torch.long)

        # making gradients zero, framework tutorial states if we don't do so the gradients at each step will just add
        optimizer.zero_grad()

        predictions = mlp.forward(inputs)  # forwarding batch into the network
        loss = loss_function.forward(predictions, targets)  # calculating the Cross Entropy loss
        loss.backward()  # backwards the loss into the net, updating gradients

        optimizer.step()  # updating the weights

        if i % FLAGS.eval_freq == 0 or i == FLAGS.max_steps - 1:
            # evaluate on Test
            forward_test = mlp.forward(inputs_test)

            acc = accuracy(forward_test, targets_test)
            accuracies_test.append(acc.item())

            loss_test = loss_function.forward(forward_test, targets_test)
            losses_test.append(loss_test.item())
            print("TEST loss:" + str(round(losses_test[-1], 2)) + " acc:" + str(
                round(accuracies_test[-1], 2)) + " model:" + str(i))

            # evaluate on Train
            forward_train = mlp.forward(x_train)

            acc_train = accuracy(forward_train, y_train)
            accuracies_train.append(acc_train.item())

            loss_train = loss_function.forward(forward_train, y_train)
            losses_train.append(loss_train.item())
            print("TRAIN loss:" + str(round(losses_train[-1], 2)) + " acc:" + str(
                round(accuracies_train[-1], 2)) + " model:" + str(i))

    with open('../results/torch_mlp.pkl', 'wb') as f:
        mlp_data = dict()
        mlp_data["train_loss"] = losses_train
        mlp_data["test_loss"] = losses_test
        mlp_data["train_acc"] = accuracies_train
        mlp_data["test_acc"] = accuracies_test
        pk.dump(mlp_data, f)

    x = [i * FLAGS.eval_freq for i in range(len(accuracies_test))]
    plt.title("Torch MLP loss accuracy")
    plt.plot(x, accuracies_test, label="accuracy")
    plt.plot(x, losses_test, label="loss")
    plt.legend()
    plt.savefig("../results/pytorch_mlp.png")
    plt.show()
Exemplo n.º 9
0
def train():
    """
  Performs training and evaluation of MLP model.

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################
    cifar10 = cifar10_utils.get_cifar10()

    if torch.cuda.is_available():
        # print(torch.device('cpu'), torch.device("cuda"))
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    network = MLP(3072, dnn_hidden_units, 10)
    network.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(network.parameters(),
                           lr=FLAGS.learning_rate)  #, weight_decay=1/(200*9))
    # optimizer = optim.RMSprop(network.parameters(), lr=FLAGS.learning_rate)
    # optimizer = optim.SGD(network.parameters(), lr=FLAGS.learning_rate)

    # print(FLAGS.batch_size)
    # print(FLAGS.eval_freq)
    # print(FLAGS.learning_rate)
    # print(FLAGS.max_steps)

    plotting_accuracy = []
    plotting_loss = []
    plotting_accuracy_test = []
    plotting_loss_test = []

    for i in range(1, FLAGS.max_steps - 1):

        x, y = cifar10['train'].next_batch(FLAGS.batch_size)
        x = torch.from_numpy(x)
        y = torch.from_numpy(y)
        x = x.to(device)
        y = y.to(device)

        x = x.view(FLAGS.batch_size, -1)

        out = network.forward(x)
        loss = criterion(out, y.argmax(dim=1))
        # print("Batch: {} Loss {}".format(i, loss))
        # acc = accuracy(out, y)
        # print("Accuracy: {}".format(acc))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # learning_rate = 0.01
        # for f in network.parameters():
        #     f.data.sub_(f.grad.data * learning_rate)

        # if (i % FLAGS.eval_freq == 0):
        #     # print("TRAIN Batch: {} Loss {}".format(i, loss.item()))
        #     acc = accuracy(out, y)
        #     print("TRAIN Accuracy: {}".format(acc))
        #     plotting_accuracy.append(acc)
        #     plotting_loss.append(loss.item())
        #
        #     x, y = cifar10['test'].next_batch(5000)
        #     x = torch.from_numpy(x)
        #     y = torch.from_numpy(y)
        #     x = x.to(device)
        #     y = y.to(device)
        #     x = x.view(5000, -1)
        #     out = network.forward(x)
        #     loss = criterion(out, y.argmax(dim=1))
        #     # print("TEST Batch: {} Loss {}".format(i, loss))
        #     acc = accuracy(out, y)
        #     print("TEST Accuracy: {}".format(acc))
        #     # print(loss.item())
        #     # print(asdasd)
        #     plotting_accuracy_test.append(acc)
        #     plotting_loss_test.append(loss.item())

        if (i == FLAGS.max_steps - FLAGS.eval_freq):
            print("hellooo")
            acc = accuracy(out, y)
            print("TRAIN Accuracy: {}".format(acc))
            train_accuracy = acc
            train_loss = loss.item()

            x, y = cifar10['test'].next_batch(5000)
            x = torch.from_numpy(x)
            y = torch.from_numpy(y)
            x = x.to(device)
            y = y.to(device)
            x = x.view(5000, -1)
            out = network.forward(x)
            loss = criterion(out, y.argmax(dim=1))
            acc = accuracy(out, y)
            print("TEST Accuracy: {}".format(acc))

            test_accuracy = acc
            test_loss = loss.item()

            with open('MLP_results.csv', 'a') as output_file:
                writer = csv.writer(output_file)
                writer.writerow([
                    FLAGS.dnn_hidden_units, FLAGS.learning_rate,
                    train_accuracy, train_loss, test_accuracy, test_loss
                ])
Exemplo n.º 10
0
def train():
    """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################

    #load data
    cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir)

    #hyperparameters
    eta = FLAGS.learning_rate
    eps = 1e-6  # convergence criterion
    max_steps = FLAGS.max_steps
    b_size = FLAGS.batch_size

    #load test data
    x_test = cifar10["test"].images
    y_test = cifar10["test"].labels
    y_test = torch.tensor(y_test, requires_grad=False).type(dtype).to(device)

    #get usefull dimensions
    n_inputs = np.size(x_test, 0)
    n_classes = np.size(y_test, 1)
    v_size = np.size(x_test, 1) * np.size(x_test, 2) * np.size(x_test, 3)
    n_test_batches = np.size(x_test, 0) // b_size

    # x_test = x_test.reshape((n_inputs, v_size))
    # x_test = torch.tensor(x_test, requires_grad=False).type(dtype).to(device)

    #load whole train data ############################################################
    x_train = cifar10["train"].images
    x_train = x_train.reshape((np.size(x_train, 0), v_size))
    x_train = torch.tensor(x_train, requires_grad=False).type(dtype).to(device)
    n_train_batches = np.size(x_train, 0) // b_size

    #initialize the MLP model
    model = MLP(n_inputs=v_size,
                n_hidden=dnn_hidden_units,
                n_classes=n_classes,
                b_norm=FLAGS.b_norm)
    get_loss = torch.nn.CrossEntropyLoss()

    if FLAGS.optimizer == "adam":
        optimizer = torch.optim.Adam(model.parameters(), lr=eta)
    elif FLAGS.optimizer == "sgd":
        optimizer = torch.optim.SGD(model.parameters(), lr=eta)

    model.to(device)

    train_loss = []
    test_loss = []
    train_acc = []
    test_acc = []

    for step in range(max_steps):
        #get batch
        x, y = cifar10['train'].next_batch(b_size)
        y = torch.tensor(y).type(dtype).to(device)

        #stretch input images into vectors
        x = x.reshape(b_size, v_size)
        x = torch.tensor(x).type(dtype).to(device)

        #forward pass
        pred = model.forward(x)

        #get training loss
        current_loss = get_loss(pred, y.argmax(dim=1))
        optimizer.zero_grad()

        #get training loss gradient
        current_loss.backward()

        # #get training accuracy
        # current_train_acc = accuracy(pred, y)

        optimizer.step()

        #free memory up
        pred.detach()
        x.detach()
        y.detach()

        #select evaluation step
        if (step % FLAGS.eval_freq) == 0:

            # c_train_loss = current_loss.data.item()
            # train_loss.append(c_train_loss)
            # train_acc.append(current_train_acc)
            #
            c_train_loss = 0
            current_train_acc = 0

            c_test_loss = 0
            current_test_acc = 0

            #loop through train set in batches ######################################################
            for test_batch in range(n_train_batches):
                #load test data
                x_train, y_train = cifar10['train'].next_batch(b_size)
                y_train = torch.tensor(
                    y_train, requires_grad=False).type(dtype).to(device)

                #stretch input images into vectors
                x_train = x_train.reshape(b_size, v_size)
                x_train = torch.tensor(
                    x_train, requires_grad=False).type(dtype).to(device)

                #get test batch results
                train_pred = model.forward(x_train)
                current_train_loss = get_loss(train_pred,
                                              y_train.argmax(dim=1))

                c_train_loss += current_train_loss.data.item()
                current_train_acc += accuracy(train_pred, y_train)

                #free memory up
                train_pred.detach()
                x_train.detach()
                y_train.detach()

            #loop through entire test set in batches
            for test_batch in range(n_test_batches):
                #load test data
                x_test, y_test = cifar10['test'].next_batch(b_size)
                y_test = torch.tensor(
                    y_test, requires_grad=False).type(dtype).to(device)

                #stretch input images into vectors
                x_test = x_test.reshape(b_size, v_size)
                x_test = torch.tensor(x_test).type(dtype).to(device)

                #get test batch results
                test_pred = model.forward(x_test)
                current_test_loss = get_loss(test_pred, y_test.argmax(dim=1))

                c_test_loss += current_test_loss.data.item()
                current_test_acc += accuracy(test_pred, y_test)

                #free memory up
                test_pred.detach()
                x_test.detach()
                y_test.detach()

            #get full training set results #########################################################
            c_train_loss = c_train_loss / n_train_batches
            current_train_acc = current_train_acc / n_train_batches
            train_loss.append(c_train_loss)
            train_acc.append(current_train_acc)

            #get full test set results
            c_test_loss = c_test_loss / n_test_batches
            current_test_acc = current_test_acc / n_test_batches
            test_loss.append(c_test_loss)
            test_acc.append(current_test_acc)

            if FLAGS.optimize == False:
                print('\nStep ', step, '\n------------\nTraining Loss = ',
                      round(c_train_loss,
                            4), ', Train Accuracy = ', current_train_acc,
                      '\nTest Loss = ', round(c_test_loss, 4),
                      ', Test Accuracy = ', current_test_acc)

            if step > 0 and abs(test_loss[(int(step / FLAGS.eval_freq))] -
                                test_loss[int(step / FLAGS.eval_freq) -
                                          1]) < eps:
                break

    if FLAGS.optimize == False:
        plot_graphs(train_loss,
                    'Training Loss',
                    'orange',
                    test_loss,
                    'Test Loss',
                    'blue',
                    title='Stochastic gradient descent',
                    ylabel='Loss',
                    xlabel='Steps')

        plot_graphs(train_acc,
                    'Training Accuracy',
                    'darkorange',
                    test_acc,
                    'Test Accuracy',
                    'darkred',
                    title='Stochastic gradient descent',
                    ylabel='Accuracy',
                    xlabel='Steps')

        #save results:
        path = "./results/pytorch results/"
        np.save(path + 'train_loss', train_loss)
        np.save(path + 'train_acc', train_acc)
        np.save(path + 'test_loss', test_loss)
        np.save(path + 'test_acc', test_acc)

    return train_loss, test_loss, train_acc, test_acc
def train():
    """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    # np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    output_dir = FLAGS.output_dir
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    learning_rate = FLAGS.learning_rate
    max_steps = FLAGS.max_steps
    batch_size = FLAGS.batch_size
    eval_freq = FLAGS.eval_freq
    data_dir = FLAGS.data_dir
    no_write = FLAGS.no_write == 1

    # Obtain dataset
    dataset = cifar10_utils.get_cifar10(data_dir)
    n_inputs = dataset['train'].images[0].reshape(-1).shape[0]
    n_classes = dataset['train'].labels[0].shape[0]
    n_test = dataset['test'].images.shape[0]

    # Initialise MLP
    dev = 'cuda' if torch.cuda.is_available() else 'cpu'
    device = torch.device(dev)
    print("Device: " + dev)
    net = MLP(n_inputs, dnn_hidden_units, n_classes).to(device)
    loss_fn = F.cross_entropy
    print("Network architecture:\n\t{}\nLoss module:\n\t{}".format(
        str(net), str(loss_fn)))

    # Evaluation vars
    train_loss = []
    gradient_norms = []
    train_acc = []
    test_acc = []
    iteration = 0

    # Training
    optimizer = optim.SGD(net.parameters(), lr=learning_rate)
    while iteration < max_steps:
        iteration += 1

        # Sample a mini-batch
        x, y = dataset['train'].next_batch(batch_size)
        x = torch.from_numpy(x.reshape((batch_size, -1))).to(device)
        y = torch.from_numpy(y).argmax(dim=1).long().to(device)

        # Forward propagation
        prediction = net.forward(x)
        loss = loss_fn(prediction, y)
        acc = accuracy(prediction, y)
        train_acc.append((iteration, acc.tolist()))
        train_loss.append((iteration, loss.tolist()))

        # Backprop
        optimizer.zero_grad()
        loss.backward()

        # Weight update in linear modules
        optimizer.step()
        with torch.no_grad():
            norm = 0
            for params in net.parameters():
                norm += params.reshape(-1).pow(2).sum()
            gradient_norms.append((iteration, norm.reshape(-1).tolist()))

            # Evaluation
            if iteration % eval_freq == 0:
                x = torch.from_numpy(dataset['test'].images.reshape(
                    (n_test, -1))).to(device)
                y = torch.from_numpy(
                    dataset['test'].labels).argmax(dim=1).long().to(device)
                prediction = net.forward(x)
                acc = accuracy(prediction, y)
                test_acc.append((iteration, acc.tolist()))
                print("Iteration: {}\t\tTest accuracy: {}".format(
                    iteration, acc))

    # Save or return raw output
    metrics = {
        "train_loss": train_loss,
        "gradient_norms": gradient_norms,
        "train_acc": train_acc,
        "test_acc": test_acc
    }
    raw_data = {"net": net.to(torch.device('cpu')), "metrics": metrics}
    if no_write:
        return raw_data
    # Save
    now = datetime.datetime.now()
    time_stamp = "{}{}{}{}{}".format(now.year, now.month, now.day, now.hour,
                                     now.minute)
    net_name = "torchnet"
    out_dir = os.path.join(output_dir, net_name, time_stamp)
    if not os.path.isdir(out_dir):
        os.makedirs(out_dir)
    pickle.dump(raw_data, open(os.path.join(out_dir, "torch_raw_data"), "wb"))

    # Save plots
    # Loss
    fig, ax = plt.subplots()
    iter = [i for (i, q) in train_loss]
    loss = [q for (i, q) in train_loss]
    ax.plot(iter, loss)
    ax.set(xlabel='Iteration',
           ylabel='Loss (log)',
           title='Batch training loss')
    ax.set_yscale('log')
    ax.grid()
    fig.savefig(os.path.join(out_dir, "torch_loss.png"))
    # gradient norm
    fig, ax = plt.subplots()
    iter = [i for (i, q) in gradient_norms]
    norm = [q for (i, q) in gradient_norms]
    ax.plot(iter, norm)
    ax.set(xlabel='Iteration', ylabel='Norm', title='Gradient norm')
    ax.grid()
    fig.savefig(os.path.join(out_dir, "torch_gradient_norm.png"))
    # accuracies
    fig, ax = plt.subplots()
    iter = [i for (i, q) in train_acc]
    accu = [q for (i, q) in train_acc]
    ax.plot(iter, accu, label='Train')
    iter = [i for (i, q) in test_acc]
    accu = [q for (i, q) in test_acc]
    ax.plot(iter, accu, label='Test')
    ax.set(xlabel='Iteration',
           ylabel='Accuracy',
           title='Train and test accuracy')
    ax.legend()
    ax.grid()
    fig.savefig(os.path.join(out_dir, "torch_accuracy.png"))

    return raw_data
Exemplo n.º 12
0
def train():
    """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir)
    x, y = cifar10['train'].next_batch(FLAGS.batch_size)
    x = torch.from_numpy(x.reshape(FLAGS.batch_size, -1)).float().to(device)
    y = torch.from_numpy(y).float().to(device)
    n_inputs = x.shape[1]
    n_classes = y.shape[1]
    n_hidden = dnn_hidden_units
    MutLP = MLP(n_inputs, n_hidden, n_classes)
    MutLP.to(device)
    if FLAGS.optimizer == 'SGD':
        optimizer = optim.SGD(MutLP.parameters(),
                              lr=FLAGS.learning_rate,
                              weight_decay=FLAGS.weight_decay)
    elif FLAGS.optimizer == 'Adam':
        optimizer = optim.Adam(MutLP.parameters(),
                               lr=FLAGS.learning_rate,
                               weight_decay=FLAGS.weight_decay)
    else:
        print('Try SGD or Adam...')
    loss = nn.CrossEntropyLoss()
    l_list = list()
    t_list = list()
    train_acc = list()
    test_acc = list()
    iterations = list()
    print('\nTraining...')
    for i in range(FLAGS.max_steps):
        optimizer.zero_grad()
        s_pred = MutLP(x)
        f_loss = loss(s_pred, y.argmax(dim=1))
        f_loss.backward()
        optimizer.step()
        if i % FLAGS.eval_freq == 0:
            l_list.append(round(f_loss.item(), 3))
            iterations.append(i + 1)
            train_acc.append(accuracy(s_pred, y))
            t_x, t_y = cifar10['test'].images, cifar10['test'].labels
            t_x = torch.from_numpy(t_x.reshape(t_x.shape[0],
                                               -1)).float().to(device)
            t_y = torch.from_numpy(t_y).float().to(device)
            t_pred = MutLP(t_x)
            t_loss = loss(t_pred, t_y.argmax(dim=1))
            t_list.append(round(t_loss.item(), 3))
            test_acc.append(accuracy(t_pred, t_y))
        x, y = cifar10['train'].next_batch(FLAGS.batch_size)
        x = torch.from_numpy(x.reshape(FLAGS.batch_size,
                                       -1)).float().to(device)
        y = torch.from_numpy(y).float().to(device)
    print('Done!\n')
    print('Training Losses:', l_list)
    print('Test Losses:', t_list)
    print('Training Accuracies:', train_acc)
    print('Test Accuracies:', test_acc)
    print('Best Test Accuracy:', max(test_acc))
    fig, axs = plt.subplots(1, 2, figsize=(10, 5))
    axs[0].plot(iterations, train_acc, iterations, test_acc)
    axs[0].set_xlabel('Iteration')
    axs[0].set_ylabel('Accuracy')
    axs[0].legend(('train', 'test'))
    axs[1].plot(iterations, l_list, iterations, t_list)
    axs[1].set_xlabel('Iteration')
    axs[1].set_ylabel('Loss')
    axs[1].legend(('train', 'test'))
    fig.tight_layout()
    plt.show()