Пример #1
0
def train():
    """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################
    batch_size = 100
    image_dim = 32
    channels = 3
    mlp_classes = 10
    mlp_input_size = image_dim * image_dim * channels
    data = cifar10_utils.get_cifar10()
    train_data = data['train']
    validation_data = data['validation']
    test_data = data['test']
    NN = MLP(mlp_input_size, dnn_hidden_units[0], mlp_classes)
    x, y = train_data.next_batch(batch_size)
    print(x.shape, y.shape)
    for image_label in zip(x, y):
        im = np.reshape(image_label[0], (1, mlp_input_size))
        im = torch.tensor(im)
        out = NN.forward(im)
        print(out, image_label[1])
def train():
  """
  Performs training and evaluation of MLP model.

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

  ### DO NOT CHANGE SEEDS!
  # Set the random seeds for reproducibility
  np.random.seed(42)

  ## Prepare all functions
  # Get number of units in each hidden layer specified in the string such as 100,100
  if FLAGS.dnn_hidden_units:
    dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
    dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
  else:
    dnn_hidden_units = []

  # Get negative slope parameter for LeakyReLU
  neg_slope = FLAGS.neg_slope


  ########################
  # PUT YOUR CODE HERE  #
  #######################
  # raise NotImplementedError

  # Basic delarations and pulling in initial data
  loss_train = []
  acc_train = []
  acc_test = []

  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  cifar10_set = cifar10_utils.get_cifar10(FLAGS.data_dir)

  x, t = cifar10_set['train'].next_batch(FLAGS.batch_size)
  print("The size of the dataset is: " + str(cifar10_set['train'].num_examples))
  x = x.reshape(FLAGS.batch_size, -1)
  #Get them shapes
  out_dim = t.shape[1]
  in_dim = x.shape[1]

  x = torch.tensor(x, dtype=torch.float32).to(device)

  #Parameters to be used in model
  hu = 4
  lr = 1e-4
  wd = 5e-4
  dnn_hidden_units[0] = 600
  for i in range(0,hu):
    dnn_hidden_units.append(int(500-(450*(i/hu))))
  print(dnn_hidden_units)
  mlp = MLP(in_dim, dnn_hidden_units, out_dim, neg_slope).to(device)

  # Choose your optimizer
  #print('This is SGD')
  # optimizer = torch.optim.SGD(mlp.parameters(), lr = FLAGS.learning_rate)
  print("Opt is Adam")
  optimizer = torch.optim.Adam(mlp.parameters(), lr = FLAGS.learning_rate)
  # optimizer = torch.optim.Adam(mlp.parameters(),lr = lr, weight_decay = wd)

  loss_funct = nn.CrossEntropyLoss()

  #Adding regularization
  reg_on = False
  reg_const = 0.00001

  #Declare the steps
  # steps = int((cifar10_set['train'].num_examples/FLAGS.batch_size) * 16)
  steps = FLAGS.max_steps

  # Using already pulled data do a step
  y = mlp.forward(x)
  loss = loss_funct(y, torch.LongTensor(np.argmax(t, 1)).to(device))
  if reg_on:
    for mod in mlp.modls:
      if type(mod) == nn.Linear:
        loss += loss + (torch.sum(torch.abs(mod.weight)) * reg_const)
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()
  loss_train.append(loss)
  acc_train.append(accuracy(y.cpu().detach().numpy(), t))
  x, t = cifar10_set['test'].images, cifar10_set['test'].labels
  x = torch.tensor(x.reshape(x.shape[0], -1), dtype=torch.float32).to(device)
  y = mlp.forward(x)
  acc_test.append(accuracy(y.cpu().detach().numpy(), t))
  print("The accuracy at step, " + str(0) + " is : " + str(acc_test[-1]))

  # Loop through the rest of the data
  for i in range(1, steps + 1):
    x, t = cifar10_set['train'].next_batch(FLAGS.batch_size)
    x = torch.tensor(x.reshape(FLAGS.batch_size, -1), dtype=torch.float32).to(device)
    y = mlp.forward(x)
    loss = loss_funct(y,torch.LongTensor(np.argmax(t, 1)).to(device))
    if reg_on:
      for mod in mlp.modls:
        if type(mod) == nn.Linear:
          loss += loss + (torch.sum(torch.abs(mod.weight))*reg_const)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    # Evaluation and data storage step
    if i % FLAGS.eval_freq == 0:
      loss_train.append(loss)
      acc_train.append(accuracy(y.cpu().detach().numpy(), t))
      x,t = cifar10_set['test'].images, cifar10_set['test'].labels
      x = torch.tensor(x.reshape(x.shape[0], -1), dtype=torch.float32).to(device)
      y = mlp.forward(x)
      acc_test.append(accuracy(y.cpu().detach().numpy(),t))
      print("The accuracy at step, " + str(i) + " is : " + str(acc_test[-1]))

  #Plotting the accuracy of test and train:
  plt.figure(0)
  plt.plot(np.arange(0, len(acc_train) * FLAGS.eval_freq * FLAGS.batch_size, FLAGS.eval_freq* FLAGS.batch_size) / cifar10_set['train'].num_examples, acc_train, label='Train')
  plt.plot(np.arange(0, len(acc_train) * FLAGS.eval_freq* FLAGS.batch_size, FLAGS.eval_freq* FLAGS.batch_size) / cifar10_set['train'].num_examples, acc_test, label='Test')
  plt.xlabel('Epoch')
  plt.ylabel('Accuracy')
  plt.title('Accuracy of Train and Test Set Through Training')
  plt.legend()
  acc_loc = 'acc_adam_' + str((hu*2)+3) + '_learning_rate_' + str(lr) +'_weightdecay_' + str(wd) +'.png'
  plt.savefig(acc_loc)
  # plt.show()

  plt.figure(1)
  plt.plot(np.arange(0, len(loss_train)*FLAGS.eval_freq* FLAGS.batch_size, FLAGS.eval_freq* FLAGS.batch_size)/cifar10_set['train'].num_examples, loss_train, label = 'Train')
  plt.xlabel('Epoch')
  plt.ylabel('Loss')
  plt.title('Loss Through Training')
  loss_loc = 'loss_adam_' + str((hu * 2) + 3) + '_learning_rate_' + str(lr) + '_weightdecay_' + str(wd) + '.png'
  plt.savefig(loss_loc)
Пример #3
0
def train():
    """
    Performs training and evaluation of MLP model.
  
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """
    
    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
        torch.cuda.manual_seed_all(42)
    
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    # print("Device", device)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
    else:
        dnn_hidden_units = []
    

    # DNN_HIDDEN_UNITS_DEFAULT = '100'
    # LEARNING_RATE_DEFAULT = 1e-3
    # MAX_STEPS_DEFAULT = 1400
    # BATCH_SIZE_DEFAULT = 200
    # EVAL_FREQ_DEFAULT = 100
    
    data = cifar10_utils.get_cifar10(data_dir=FLAGS.data_dir)
    train = data['train']
    print(train.images.shape)
    test = data['test']
    n_inputs = train.images[0].flatten().shape[0]
    n_classes = train.labels[0].shape[0]

    mlp = MLP(n_inputs, dnn_hidden_units, n_classes)
    loss_mod = nn.CrossEntropyLoss()
    if FLAGS.optimizer == 'SGD':
        optimizer = torch.optim.SGD(mlp.parameters(), lr=FLAGS.learning_rate)
    elif FLAGS.optimizer == 'AdamW':
        optimizer = torch.optim.AdamW(mlp.parameters(), lr=FLAGS.learning_rate)
    
    mlp.to(device)

    loss_history = []
    acc_history = []
    for step in range(FLAGS.max_steps): #FLAGS.max_steps
        mlp.train()
        x, y = train.next_batch(FLAGS.batch_size)
        x = torch.from_numpy(x.reshape(x.shape[0], n_inputs)).to(device)
        y = torch.from_numpy(np.argmax(y, axis=1)).to(device) # converts onehot to dense

        out = mlp(x)
        loss = loss_mod(out, y)
        loss_history.append(loss)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if step == 0 or (step + 1) % FLAGS.eval_freq == 0:
            mlp.eval()
            with torch.no_grad():
                x, y = test.images, test.labels
                x = torch.from_numpy(x.reshape(x.shape[0], n_inputs)).to(device)
                y = torch.from_numpy(y).to(device)
                test_out = mlp.forward(x)
                acc = accuracy(test_out, y)
                print('Accuracy:', acc)
                acc_history.append(acc)
    print('Final loss:', loss_history[-1])
    print('Final acc:', acc_history[-1])

    plt.plot(loss_history)
    plt.step(range(0, FLAGS.max_steps + 1, FLAGS.eval_freq), acc_history) # range(0, FLAGS.max_steps, FLAGS.eval_freq)
    plt.legend(['loss', 'accuracy'])
    plt.show()
Пример #4
0
def train():
    """
    Performs training and evaluation of MLP model.

    TODO:
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    if FLAGS.data_dir:
        DATA_DIR_DEFAULT = FLAGS.data_dir

    # Get negative slope parameter for LeakyReLU
    neg_slope = FLAGS.neg_slope

    ########################
    # PUT YOUR CODE HERE  #
    #######################

    batch_size = FLAGS.batch_size
    learning_rate = FLAGS.learning_rate

    cifar_data = cifar10_utils.get_cifar10(DATA_DIR_DEFAULT)

    train_data = cifar_data['train']
    test_data = cifar_data['test']

    n_classes = train_data.labels.shape[1]
    n_inputs = np.prod(train_data.images.shape[1:])

    x_test, y_test = test_data.images, test_data.labels
    x_test = torch.from_numpy(np.reshape(x_test, (x_test.shape[0], n_inputs)))
    y_test = torch.from_numpy(np.argmax(y_test, axis=1)).type(torch.LongTensor)

    criterion = nn.CrossEntropyLoss()
    model = MLP(n_inputs, dnn_hidden_units, n_classes, neg_slope)

    if FLAGS.optimizer == 'ADAM':
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    elif FLAGS.optimizer == 'ADAMwd':
        optimizer = optim.Adam(model.parameters(),
                               lr=learning_rate,
                               weight_decay=0.02)
    elif FLAGS.optimizer == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    elif FLAGS.optimizer == 'RMS':
        optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)
    else:
        print("Optimizer: Used default option, SGD")
        optimizer = optim.SGD(model.parameters(), lr=learning_rate)

    # Train and Test losses
    losses = [[], []]
    # Train and Test accuracies
    accuracies = [[], []]

    # True iteration for plotting
    iterations = []

    for iteration in np.arange(FLAGS.max_steps):
        x, y = train_data.next_batch(batch_size)
        x = torch.from_numpy(np.reshape(x, (batch_size, n_inputs)))
        # argmax in order to align labels with the Cross entropy loss function
        y = torch.from_numpy(np.argmax(y, axis=1)).type(torch.LongTensor)

        train_output = model.forward(x)
        loss = criterion(train_output, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if iteration % FLAGS.eval_freq == 0 or iteration == FLAGS.max_steps - 1:
            iterations.append(iteration)

            # Second forward pass for test set
            with torch.no_grad():
                test_output = model.forward(x_test)

            # Calculate losses
            train_loss = criterion.forward(train_output, y)
            losses[0].append(train_loss)
            test_loss = criterion.forward(test_output, y_test)
            losses[1].append(test_loss)

            # Calculate accuracies
            train_acc = accuracy(train_output, y)
            test_acc = accuracy(test_output, y_test)
            accuracies[0].append(train_acc)
            accuracies[1].append(test_acc)

            print(
                "Iteration {}, Train loss: {}, Train accuracy: {}, Test accuracy: {}"
                .format(iteration, train_loss, train_acc, test_acc))

    fig = plt.figure(figsize=(25, 10), dpi=200)
    fig.suptitle('PyTorch MLP: Losses and Accuracies', fontsize=40)
    ax1 = fig.add_subplot(1, 2, 1)
    ax2 = fig.add_subplot(1, 2, 2)

    ax1.plot(iterations, losses[0], linewidth=4, color="g", label="Train loss")
    ax1.plot(iterations, losses[1], linewidth=4, color="c", label="Test loss")
    ax2.plot(iterations,
             accuracies[0],
             linewidth=4,
             color="g",
             label="Train accuracy")
    ax2.plot(iterations,
             accuracies[1],
             linewidth=4,
             color="c",
             label="Test accuracy")

    ax1.set_xlabel('$Iteration$', fontsize=28)
    ax1.set_ylabel('$Loss$', fontsize=28)
    ax2.set_xlabel('$Iteration$', fontsize=28)
    ax2.set_ylabel('$Accuracy$', fontsize=28)

    ax1.legend(fontsize=22)
    ax2.legend(fontsize=22)

    plt.savefig("../figures/pytorch_mlp.png")
    plt.show()
def train():
  """
  Performs training and evaluation of MLP model.

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

  ### DO NOT CHANGE SEEDS!
  # Set the random seeds for reproducibility
  np.random.seed(42)

  ## Prepare all functions
  # Get number of units in each hidden layer specified in the string such as 100,100
  if FLAGS.dnn_hidden_units:
    dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
    dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
  else:
    dnn_hidden_units = []

  # Get negative slope parameter for LeakyReLU
  neg_slope = FLAGS.neg_slope


  ########################
  # PUT YOUR CODE HERE  #
  #######################
  # raise NotImplementedError

  acc_param_search = []

  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  cifar10_set = cifar10_utils.get_cifar10(FLAGS.data_dir)

  x, y = cifar10_set['train'].next_batch(FLAGS.batch_size)
  print("The size of the dataset is: " + str(cifar10_set['train'].num_examples))
  x = x.reshape(FLAGS.batch_size, -1)

  out_dim = y.shape[1]
  in_dim = x.shape[1]

  hu = 4
  lr_list = [1e-2, 1.5e-3, 1.25e-3, 1e-3 , 1e-4]
  wd_list = [1e-4, 5e-4, 1e-5, 5e-5]
  dnn_hidden_units[0] = 600
  for i in range(0, hu):
    dnn_hidden_units.append(int(500 - (450 * (i / hu))))
  for lr in lr_list:
    for wd in wd_list:
      loss_train = []
      acc_train = []
      acc_test = []
      print('Testing Parameters layers ' + str((hu * 2) + 3) + '_learning_rate_' + str(
        lr) + '_weightdecay_' + str(wd))
      max_acc = 0

      mlp = MLP(in_dim, dnn_hidden_units, out_dim, neg_slope).to(device)
      #print('This is SGD')
      # optimizer = torch.optim.SGD(mlp.parameters(), lr = FLAGS.learning_rate)
      print("Opt is Adam")
      # optimizer = torch.optim.Adam(mlp.parameters(), lr = FLAGS.learning_rate)
      optimizer = torch.optim.Adam(mlp.parameters(),lr = lr, weight_decay = wd)
      #  lr=1.25e-3
      loss_funct = nn.CrossEntropyLoss()

      #Adding regularization
      reg_on = False
      dropout_on = False
      reg_const = 0.00001
      # steps = 500
      steps = int((cifar10_set['train'].num_examples/FLAGS.batch_size) * 10)
      # dataset is size 50,000
      print(steps)
      # dataset is size 50,000

      for i in range(0, steps + 1):
        x, t = cifar10_set['train'].next_batch(FLAGS.batch_size)
        x = torch.tensor(x.reshape(FLAGS.batch_size, -1), dtype=torch.float32).to(device)
        y = mlp.forward(x)
        loss = loss_funct(y,torch.LongTensor(np.argmax(t, 1)).to(device))
        if reg_on:
          for mod in mlp.modls:
            if type(mod) == nn.Linear:
              loss += loss + (torch.sum(torch.abs(mod.weight))*reg_const)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if i % FLAGS.eval_freq == 0:
          loss_train.append(loss)
          acc_train.append(accuracy(y.cpu().detach().numpy(), t))
          x,t = cifar10_set['test'].images, cifar10_set['test'].labels
          x = torch.tensor(x.reshape(x.shape[0], -1), dtype=torch.float32).to(device)
          y = mlp.forward(x)
          acc_test.append(accuracy(y.cpu().detach().numpy(),t))
      max_acc = np.array(acc_test).max()
      print('The max found for these settings: layers ' + str((hu*2)+3) + '_learning_rate_' + str(lr) +'_weightdecay_' + str(wd) + 'was :' +str(max_acc))
      acc_param_search.append(max_acc)


      #Plotting the accuracy of test and train:
      # plt.figure(0, figsize = (17,10))
      plt.figure(0)
      plt.plot(np.arange(0, len(acc_train) * FLAGS.eval_freq * FLAGS.batch_size, FLAGS.eval_freq* FLAGS.batch_size) / cifar10_set['train'].num_examples, acc_train, label='Train')
      plt.plot(np.arange(0, len(acc_train) * FLAGS.eval_freq* FLAGS.batch_size, FLAGS.eval_freq* FLAGS.batch_size) / cifar10_set['train'].num_examples, acc_test, label='Test')
      plt.xlabel('Epoch')
      plt.ylabel('Accuracy')
      plt.title('Accuracy of Train and Test Set Through Training')
      plt.legend()
      acc_loc = 'figs/loss_adam_' + str((hu*2)+3) + '_learning_rate_' + str(lr) +'_weightdecay_' + str(wd) +'.png'
      plt.savefig(acc_loc)
      # plt.show()

      # plt.figure(1, figsize=(17,10))
      plt.figure(1)
      plt.plot(np.arange(0, len(loss_train)*FLAGS.eval_freq* FLAGS.batch_size, FLAGS.eval_freq* FLAGS.batch_size)/cifar10_set['train'].num_examples, loss_train, label = 'Train')
      plt.xlabel('Epoch')
      plt.ylabel('Loss')
      plt.title('Loss Through Training')
      loss_loc = 'figs/loss_adam_' + str((hu * 2) + 3) + '_learning_rate_' + str(lr) + '_weightdecay_' + str(wd) + '.png'
      plt.savefig(loss_loc)
      # plt.show()
      # plt.legend()
      ########################
      # END OF YOUR CODE    #
      #######################
  print(acc_param_search)
  np.save(acc_grid_srch_4, acc_param_search)
def train():
    """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data = cifar10_utils.get_cifar10(FLAGS.data_dir)
    n_inputs = 3 * 32 * 32
    n_classes = 10
    batches_per_epoch = (int)(data['test'].images.shape[0] /
                              FLAGS.batch_size)  # need this for test set
    model = MLP(n_inputs, dnn_hidden_units, n_classes).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = None
    if FLAGS.optimizer == "Adam":
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=FLAGS.learning_rate,
                                     weight_decay=FLAGS.weight_decay)
    if FLAGS.optimizer == "SGD":
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=FLAGS.learning_rate,
                                    weight_decay=FLAGS.weight_decay,
                                    momentum=FLAGS.momentum)
    if FLAGS.optimizer == "RMSprop":
        optimizer = torch.optim.RMSprop(model.parameters(),
                                        lr=FLAGS.learning_rate,
                                        weight_decay=FLAGS.weight_decay,
                                        momentum=FLAGS.momentum)
    max_accuracy = 0.0
    start_time = time.perf_counter()
    for step in range(1, FLAGS.max_steps + 1):
        x, y = get_batch(data, 'train', FLAGS.batch_size, device)
        predictions = model.forward(x)
        training_loss = loss_fn(predictions, y.argmax(dim=1))
        optimizer.zero_grad()
        training_loss.backward()
        optimizer.step()
        if step == 1 or step % FLAGS.eval_freq == 0:
            with torch.no_grad():
                test_loss = 0
                test_acc = 0
                for test_batch in range(batches_per_epoch):
                    x, y = get_batch(data, 'test', FLAGS.batch_size, device)
                    predictions = model(x)
                    test_loss += loss_fn(predictions,
                                         y.argmax(dim=1)) / batches_per_epoch
                    test_acc += accuracy(predictions, y) / batches_per_epoch
                if test_acc > max_accuracy:
                    max_accuracy = test_acc
                print(
                    "step %d/%d: training loss: %.3f test loss: %.3f accuracy: %.1f%%"
                    % (step, FLAGS.max_steps, training_loss, test_loss,
                       test_acc * 100))

    time_taken = time.perf_counter() - start_time
    csv = open("results.csv", "a+")
    csv.write("%s;%s;%f;%f;%f;%d;%d;%d;%f;%.3f\n" %
              (FLAGS.dnn_hidden_units, FLAGS.optimizer, FLAGS.learning_rate,
               FLAGS.momentum, FLAGS.weight_decay, FLAGS.batch_size,
               FLAGS.max_steps, FLAGS.eval_freq, max_accuracy, time_taken))
    csv.close()
    print("Done. Scored %.1f%% in %.1f seconds." %
          (max_accuracy * 100, time_taken))
def train():
    """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    # Get negative slope parameter for LeakyReLU
    neg_slope = FLAGS.neg_slope

    ########################
    # PUT YOUR CODE HERE  #
    #######################

    def init_weights(m):
        print(m)
        if type(m) == nn.Linear:
            m.weight.data.uniform_(0.0, 1.0)
            print(m.weight)
            m.bias.data.fill_(0.0)
            print(m.bias)

    lr = FLAGS.learning_rate
    eval_freq = FLAGS.eval_freq
    max_steps = FLAGS.max_steps
    batch_size = FLAGS.batch_size
    input_size = 32 * 32 * 3
    output_size = 10
    # load dataset
    raw_data = cifar10_utils.get_cifar10(DATA_DIR_DEFAULT)
    train_data = raw_data['train']
    validation_data = raw_data["validation"]
    test_data = raw_data['test']

    model = MLP(n_inputs=input_size,
                n_hidden=dnn_hidden_units,
                n_classes=output_size,
                neg_slope=neg_slope)
    print(model.layers)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_target = nn.CrossEntropyLoss()
    csv_data = [[
        'step', 'train_loss', 'test_loss', 'train_accuracy', 'test_accuracy'
    ]]
    print("initial weights as normal distribution and bias as zeros")
    # model.layers.apply(init_weights)

    for step in range(max_steps):
        x, y = train_data.next_batch(batch_size)
        x = x.reshape(batch_size, input_size)
        x = torch.tensor(x, dtype=torch.float32)
        y = torch.tensor(y, dtype=torch.long)
        # train
        # x = Variable(torch.from_numpy(x))
        output = model.forward(x)
        loss = loss_target.forward(output, y.argmax(dim=1))
        # somehow we need to divide the loss by the output size to get the same loss
        loss_avg = loss.item()
        # model.zero_grad()
        optimizer.zero_grad()
        loss.backward()

        # only need to update weights for linear module for each step
        optimizer.step()

        # with torch.no_grad():
        #   for param in model.parameters():
        #     param.data -= lr * param.grad

        train_acc = accuracy(output, y)
        # with the \r and end = '' trick, we can print on the same line
        print('\r[{}/{}] train_loss: {}  train_accuracy: {}'.format(
            step + 1, max_steps, round(loss_avg, 3), round(train_acc, 3)),
              end='')
        # evaluate
        if step % eval_freq == 0 or step >= (max_steps - 1):
            x, y = test_data.next_batch(test_data.num_examples)
            x = x.reshape(test_data.num_examples, input_size)
            x = torch.tensor(x, dtype=torch.float32)
            y = torch.tensor(y, dtype=torch.long)
            output = model.forward(x)
            test_loss = loss_target.forward(output, y.argmax(dim=1)).item()
            test_acc = accuracy(output, y)
            csv_data.append([step, loss_avg, test_loss, train_acc, test_acc])
            print(' test_loss: {}, test_accuracy: {}'.format(
                round(test_loss, 3), round(test_acc, 3)))
    with open('results/train_summary_torch_{}.csv'.format(int(time.time())),
              'w') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerows(csv_data)
Пример #8
0
def train():
    """
  Performs training and evaluation of MLP model.
    """
    print_flags()
    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)

    # use GPU if available
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    lr = FLAGS.learning_rate
    max_steps = FLAGS.max_steps
    batch_size = FLAGS.batch_size
    eval_freq = FLAGS.eval_freq
    data_dir = FLAGS.data_dir
    optim_type = FLAGS.optimizer
    #plot_results = FLAGS.plot
    train_treshold = 1e-6  # if train loss below that threshold, training stops

    # evaluation metrics
    acc_train = []
    acc_test = []
    loss_train = []
    loss_test = []

    # load input data
    cifar10 = cifar10_utils.get_cifar10(data_dir, one_hot=True)

    # get test data
    x_test = cifar10["test"].images
    y_test = cifar10["test"].labels
    train_data = cifar10["train"]

    # determine dimension of data
    x_dim = x_test.shape
    n_test_samples = x_dim[0]  # number of test samples
    # images of size 32 x 32 x 3
    n_inputs = x_dim[1] * x_dim[2] * x_dim[3]  # channels * height * width
    # reshape test images to fit MLP input
    x_test = x_test.reshape((n_test_samples, n_inputs))

    n_classes = y_test.shape[1]

    #reshape data to tensor representation
    x_test = x_test.reshape((n_test_samples, n_inputs))
    x_test_torch = torch.tensor(x_test, dtype=torch.float, device=device)
    y_test_torch = torch.tensor(y_test, dtype=torch.float, device=device)

    #initialize MLP model
    mlp_model = MLP(n_inputs=n_inputs,
                    n_hidden=dnn_hidden_units,
                    n_classes=n_classes).to(device)

    if optim_type == 'SGD':
        optimizer = torch.optim.SGD(mlp_model.parameters(), lr=lr)
    elif optim_type == 'Adam':
        optimizer = torch.optim.Adam(mlp_model.parameters(), lr=lr)
    elif optim_type == 'Adadelta':
        optimizer = torch.optim.Adadelta(mlp_model.parameters(), lr=lr)

    optimizer.zero_grad()

    #define loss function
    loss_fn = nn.CrossEntropyLoss()

    # evaluation metrics
    acc_train = []
    acc_test = []
    loss_train = []
    loss_test = []
    best_acc = 0.0
    results = []

    #train the model
    print("Start training")
    for step in range(max_steps):

        #get mini-batch
        x_train, y_train = train_data.next_batch(batch_size)
        x_train = x_train.reshape((batch_size, n_inputs))

        #transform to tensor representation
        x_train_torch = torch.tensor(x_train, dtype=torch.float, device=device)
        y_train_torch = torch.tensor(
            y_train, dtype=torch.float,
            device=device)  #labels for mb training set

        #set gradients to zero
        optimizer.zero_grad()

        #forward pass mb to get predictions as output
        out = mlp_model.forward(x_train_torch)

        #compute loss
        loss_mb = loss_fn.forward(out, y_train_torch.argmax(dim=1))

        #backward pass
        loss_mb.backward()
        optimizer.step()

        #evaluate training and validation set (pretty much the same as with Numpy)
        # perhaps modify learning rate?
        if (step % eval_freq == 0) or (step == max_steps - 1):
            print(f"Step: {step}")
            # compute and store training metrics
            loss_train.append(loss_mb.item())
            acc_train.append(accuracy(out, y_train_torch))
            print("TRAIN acc: {0:.4f}  & loss: {1:.4f}".format(
                acc_train[-1], loss_train[-1]))

            # compute and store test metrics
            # Note that we use the test set as validation set!! Only as an exception :P
            # if test set is too big to fit into memory, use mini-batches as well and average results
            out_test = mlp_model.forward(x_test_torch)
            loss_val = loss_fn.forward(out_test, y_test_torch.argmax(dim=1))
            loss_test.append(loss_val.item())
            acc_test.append(accuracy(out_test, y_test_torch))
            print("TEST acc: {0:.4f}  & loss: {1:.4f}".format(
                acc_test[-1], loss_test[-1]))

            results.append([
                step, acc_train[-1], loss_train[-1], acc_test[-1],
                loss_test[-1]
            ])

            if acc_test[-1] > best_acc:
                best_acc = acc_test[-1]
                print("New BEST acc: {0:.4f}".format(best_acc))

            # Early stop when training loss below threshold?
            if len(loss_train) > 20:
                prev_losses = loss_test[-2]
                cur_losses = loss_test[-1]
                if abs(prev_losses - cur_losses) < train_treshold:
                    print("Training stopped early at step {0}".format(step +
                                                                      1))
                    break
    print("Finished training")
    print("BEST acc: {0:.4f}".format(best_acc))

    res_path = Path.cwd().parent / 'mlp_pytorch_results'

    if not res_path.exists():
        res_path.mkdir(parents=True)

    print("Saving results to {0}".format(res_path))

    #model_path.mkdir(parents=True, exist_ok=True)
    #model_path = model_path / 'mlp_pytorch.csv'
    res_path = res_path / 'mlp_pytorch.csv'

    mode = 'a'
    if not res_path.exists():
        mode = 'w'

    col_names = [
        'step', 'train_acc', 'train_loss', 'test_acc', 'test_loss', 'lr',
        'max_steps', 'batch_size', 'dnn_hidden_units', 'optimizer'
    ]

    with open(res_path, mode) as csv_file:
        if mode == 'w':
            csv_file.write('|'.join(col_names) + '\n')
        for i in range(len(results)):
            csv_file.write(
                f'{results[i][0]};{results[i][1]};{results[i][2]};{results[i][3]};{results[i][4]}'
                f'{lr};{max_steps};{batch_size};{dnn_hidden_units};{optim_type};'
                + '\n')

            #results.append([step, acc_train[-1], loss_train[-1], acc_test[-1], loss_test[-1]])
    return results
def train():
  """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the 
  whole test set each eval_freq iterations.
  """

  ### DO NOT CHANGE SEEDS!
  # Set the random seeds for reproducibility
  np.random.seed(42)
  torch.manual_seed(42)

  ## Prepare all functions
  # Get number of units in each hidden layer specified in the string such as 100,100
  if FLAGS.dnn_hidden_units:
    dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
    dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
  else:
    dnn_hidden_units = []
    
  #for readability's sake
  lr = FLAGS.learning_rate
  max_steps =  FLAGS.max_steps
  batch_size = FLAGS.batch_size
  eval_freq = FLAGS.eval_freq
  data_dir = FLAGS.data_dir    

  cifar10 = cifar10_utils.get_cifar10(data_dir)
  
  #load test data
  x_test = cifar10["test"].images
  y_test = cifar10["test"].labels
  
  #get the dimensions of the data
  in_dim = x_test.shape
  n_samples = in_dim[0]
  height = in_dim[1]
  width = in_dim[2]
  channels = in_dim[3]
  flat_image = height * width * channels
  
  #reshape the test data so the MLP can read it.
  x_test = x_test.reshape((n_samples, flat_image))
  
  #tranform np arrays into torch tensors
  x_test = torch.tensor(x_test, requires_grad=False).type(dtype).to(device)
  y_test = torch.tensor(y_test, requires_grad=False).type(dtype).to(device)
  
  #get the number of classes
  n_classes = y_test.shape[1]
  
  #initialize the model
  MLP_model = MLP(n_inputs = flat_image, 
                  n_hidden = dnn_hidden_units,
                  n_classes = n_classes)
  
  #create loss function
  loss_func = torch.nn.CrossEntropyLoss()
  
  #loads Adam optimizer
  optimizer = torch.optim.Adam(MLP_model.parameters(), lr=lr)
  
  #metrics to keep track during training.
  acc_train = []
  acc_test = []
  loss_train = []
  loss_test = []
  
  for step in range(max_steps):
      
      #load minibatch
      X, y = cifar10['train'].next_batch(batch_size)
      
      #reshape images into a vector
      X = X.reshape((batch_size, flat_image))
      
      #use torch tensor + gpu 
      X = torch.from_numpy(X).type(dtype).to(device)
      y = torch.from_numpy(y).type(dtype).to(device)
      
      #set optimizer gradient to zero
      optimizer.zero_grad()
      
      #forward pass
      out = MLP_model.forward(X)
      
      #compute loss
      loss_t = loss_func(out, y.argmax(dim=1))
      
      #backward propagation
      loss_t.backward()
      optimizer.step()
      
      if (step%eval_freq == 0) | (step == max_steps -1):
          print(f"step:{step}")
          
          #keep metrics on training set:
          loss_train.append(loss_t)
          acc_train.append(accuracy(out, y))
          print(f"train performance: acc = {acc_train[-1]}, loss = {loss_train[-1]}")
          
          #keep metrics on the test set:
          out = MLP_model.forward(x_test)
          loss_test.append(loss_func.forward(out, y_test.argmax(dim=1)))
          acc_test.append(accuracy(out, y_test))
          print(f"test performance: acc = {acc_test[-1]}, loss = {loss_test[-1]}")
          
          #pytorch breaks here for some reason...
#          if len(loss_train)> 10:
#              #no changes in the past 10 evaluations
##              if (np.mean(loss_train[-10:-5]) - np.mean(loss_train[-5:])) < 1e-7:
##                  print("Early Stop")
##                  break  
     
  #finished training:
  path = "./torch results/"
  print("saving results in folder...")
  np.save(path + "torch_loss_train", loss_train)
  np.save(path + "torch_accuracy_train", acc_train)
  np.save(path + "torch_loss_test", loss_test)
  np.save(path + "torch_accuracy_test", acc_test)
  
  print("saving model in folder")
  np.save(path+"MLP_torch_model", MLP_model)
  return acc_test
Пример #10
0
def train():
    """
    Performs training and evaluation of MLP model.
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """

    # DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    # Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    data = cifar10_utils.get_cifar10(DATA_DIR_DEFAULT)

    n_inputs = np.prod(data['train'].images.shape[1:])
    n_classes = data['train'].labels.shape[1]
    n_test = data['test'].images.shape[0]

    x_test, y_test = data['test'].next_batch(n_test)
    x_test = torch.from_numpy(x_test.reshape((n_test, n_inputs)))
    y_test = torch.from_numpy(y_test.argmax(axis=1)).long()

    net = MLP(n_inputs, dnn_hidden_units, n_classes)
    criterion = nn.CrossEntropyLoss()
    if FLAGS.optimizer == 'SGD':
        optimizer = optim.SGD(net.parameters(),
                              lr=FLAGS.learning_rate,
                              momentum=0.0)
    else:
        optimizer = optim.Adam(net.parameters(),
                               lr=FLAGS.learning_rate,
                               weight_decay=1e-2)

    losses = {'train': [], 'test': []}
    accuracies = {'train': [], 'test': []}
    eval_steps = []

    for s in range(FLAGS.max_steps):
        x, y = data['train'].next_batch(FLAGS.batch_size)
        x = torch.from_numpy(x.reshape((FLAGS.batch_size, n_inputs)))
        y = torch.from_numpy(y.argmax(axis=1)).long()

        # FORWARD, BACKWARD, AND STEP
        out = net.forward(x)
        net.zero_grad()
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()

        # Evaluation
        if s % FLAGS.eval_freq == 0 or s == FLAGS.max_steps - 1:
            eval_steps.append(s)
            losses['train'].append(loss)
            accuracies['train'].append(accuracy(out, y))

            out = net.forward(x_test)
            losses['test'].append(criterion(out, y_test))
            accuracies['test'].append(accuracy(out, y_test))

            print('Iter {:04d}: Test: {:.2f} ({:f}), Train: {:.2f} ({:f})'.
                  format(s, 100 * accuracies['test'][-1], losses['test'][-1],
                         100 * accuracies['train'][-1], losses['train'][-1]))

    # Plotting
    for d, n in [(accuracies, 'Accuracy'), (losses, 'Loss')]:
        plt.figure()
        plt.plot(eval_steps, d['train'], label='train')
        plt.plot(eval_steps, d['test'], label='test')
        plt.xlabel('Step')
        plt.ylabel(n)
        plt.legend()
        plt.tight_layout()
        plt.savefig('torch_' + n.lower() + '.pdf')

    print('Best testing loss: {:.2f} accuracy: {:.2f}'.format(
        np.min(losses['test']), 100 * np.max(accuracies['test'])))
Пример #11
0
def train():
    """
    Performs training and evaluation of MLP model.

    TODO:
    Implement training and evaluation of MLP model. Evaluate your model on the
    whole test set each eval_freq iterations.
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string
    # such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ \
                            in dnn_hidden_units]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################

    # initialize empty dictionaries
    x, y, accu, loss = ({} for _ in range(4))

    # retrieve data
    data = cifar10_utils.get_cifar10(FLAGS.data_dir)

    # determine shapes
    image_shape = data['test'].images[0].shape
    nr_pixels = image_shape[0] * image_shape[1] * image_shape[2]
    nr_labels = data['test'].labels.shape[1]
    nr_test = data['test'].images.shape[0]

    # set standards
    tensor = torch.FloatTensor
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # save in variables
    for tag in data:
        nr_images = data[tag].images.shape[0]
        x_tmp = np.reshape(data[tag].images, (nr_images, nr_pixels))
        y_tmp = np.reshape(data[tag].labels, (nr_images, nr_labels))
        x[tag] = torch.tensor(x_tmp).type(tensor).to(device)
        y[tag] = torch.tensor(y_tmp).type(tensor).to(device)
        accu[tag] = []
        loss[tag] = []


    # create neural network
    neural_network = MLP(nr_pixels, dnn_hidden_units, nr_labels).to(device)
    cross_entropy = nn.CrossEntropyLoss().to(device)
    parameter_optimizer = torch.optim.Adam(neural_network.parameters(), \
                                 FLAGS.learning_rate)

    dx = 1
    i = 0
    logs = ['\n'.join([key + ' : ' + str(value)
                      for key, value in vars(FLAGS).items()])]
    while i < FLAGS.max_steps and np.linalg.norm(dx) > 1e-5:

        i += 1

        # sample batch from data
        rand_idx = np.random.randint(x['train'].shape[0], size=FLAGS.batch_size)
        x_batch = x['train'][rand_idx]
        y_batch = y['train'][rand_idx]

        parameter_optimizer.zero_grad()

        nn_out = neural_network.forward(x_batch)
        ce_out = cross_entropy.forward(nn_out, y_batch.argmax(dim=1))
        ce_out.backward()
        parameter_optimizer.step()

        if i % FLAGS.eval_freq == 0:

            # save train accuracy and loss
            accu['train'].append(accuracy(nn_out, y_batch))
            loss['train'].append(ce_out)

            # calculate and save test accuracy and loss
            nn_out = neural_network.forward(x['test'])
            ce_out = cross_entropy.forward(nn_out, y['test'].argmax(dim=1))
            accu['test'].append(accuracy(nn_out, y['test']))
            loss['test'].append(ce_out.item())

            # show results in command prompt and save log
            s = 'iteration ' + str(i) + ' | train acc/loss ' + \
                str('{:.3f}'.format(accu['train'][-1])) + '/' + \
                str('{:.3f}'.format(loss['train'][-1])) + ' | test acc/loss ' \
                + str('{:.3f}'.format(accu['test'][-1])) + '/' + \
                str('{:.3f}'.format(loss['test'][-1]))

            logs.append(s)
            print(s)
            #sys.stdout.write("\r%s" % s)
            #sys.stdout.flush()


    t = str(time.time())

    # write logs
    with open('results/logs_' + t + '.txt', 'w') as f:
        f.writelines(['%s\n' % item for item in logs])

    # write data to file
    with open('results/data_' + t + '.txt', 'w') as f:
        f.write('train accuracy')
        f.writelines([',%s' % str(item) for item in accu['train']])
        f.write('\ntrain loss')
        f.writelines([',%s' % str(item) for item in loss['train']])
        f.write('\ntest accuracy')
        f.writelines([',%s' % str(item) for item in accu['test']])
        f.write('\ntest loss')
        f.writelines([',%s' % str(item) for item in loss['test']])

    # plot accuracy
    axis = [j for j in range(int(i / FLAGS.eval_freq))]
    plt.plot(axis, accu['train'], label='train')
    plt.plot(axis, accu['test'], label='test')
    plt.legend()
    plt.savefig('acc_pytorch.png')
    plt.clf()

    # plot loss
    plt.plot(axis, loss['train'], label='train')
    plt.plot(axis, loss['test'], label='test')
    plt.legend()
    plt.savefig('loss_pytorch.png')
    plt.clf()
Пример #12
0
def train():
    """
    Performs training and evaluation of MLP model.
  
    TODO:
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []
    cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir)
    loss_list = []
    batch_list = []
    accuracy_list = []
    # load the batches and reshape the samples

    for iter in range(FLAGS.max_steps):
        x, y = cifar10['train'].next_batch(FLAGS.batch_size)
        y = np.argmax(y, axis=1)
        # transform sample into vector
        x = np.reshape(x, (FLAGS.batch_size, -1))
        batch_list.append((x, y))
    print('Batch list completed')
    in_features = batch_list[0][0].shape[1]
    out_features = 10  #num_classes

    x_test, y_test = cifar10['test'].images, cifar10['test'].labels
    x_test = np.reshape(x_test, (x_test.shape[0], -1))
    y_test = np.argmax(y_test, axis=1)
    print(y_test.shape)
    x_test = torch.from_numpy(x_test)
    y_test = torch.from_numpy(y_test).long()
    print(y_test)
    net = MLP(in_features, dnn_hidden_units, out_features)
    #var_init(net, sd=0.0001)
    lossfunc = nn.CrossEntropyLoss()
    optimiser = optim.SGD(net.parameters(), lr=FLAGS.learning_rate)
    print(net)
    net.train()
    for i in range(FLAGS.max_steps):
        inputs, labels = batch_list[i]
        #inputs = torch.from_numpy(inputs)
        inputs = torch.tensor(inputs)
        labels = torch.from_numpy(labels).long()

        optimiser.zero_grad()
        outputs = net.forward(inputs.float())

        loss = lossfunc(outputs, labels)

        loss_list.append(loss)

        loss.backward()
        optimiser.step()

        if (i + 1) % FLAGS.eval_freq == 0:
            net.eval()
            predicted = net.forward(x_test)
            accuracy_val = accuracy(predicted, y_test)
            accuracy_list.append(accuracy_val)
            print('Accuracy on test set at step {} is {}'.format(
                i, accuracy_val))
            print('Loss of training is {}'.format(loss.item()))

    plt.subplot(2, 1, 1)
    plt.plot(
        np.arange(len(accuracy_list) * FLAGS.eval_freq, step=FLAGS.eval_freq),
        accuracy_list, 'o-')
    plt.xlabel('Step')
    plt.ylabel('Accuracy')
    #
    plt.subplot(2, 1, 2)
    plt.plot(np.arange(len(loss_list)), loss_list)
    plt.xlabel('Step')
    plt.ylabel('Loss')
Пример #13
0
def train():
    """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    # Get negative slope parameter for LeakyReLU
    neg_slope = FLAGS.neg_slope

    #import the test data
    cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir)
    test_x, test_y = cifar10['test'].images, cifar10['test'].labels
    test_x = test_x.reshape(test_x.shape[0], -1)

    loss_function = torch.nn.CrossEntropyLoss()
    neuralnet = MLP(test_x.shape[1], dnn_hidden_units, test_y.shape[1],
                    neg_slope)
    sgd_back = torch.optim.Adam(neuralnet.parameters(), lr=FLAGS.learning_rate)

    # create tensors
    test_x = torch.from_numpy(test_x)
    test_y = torch.from_numpy(test_y)
    print(test_x.shape, test_y.shape)

    #lists with losses and accuracies
    train_losses = []
    train_accs = []
    test_losses = []
    test_accs = []
    graph_x = []

    for i in range(FLAGS.max_steps):
        x, y = cifar10['train'].next_batch(FLAGS.batch_size)
        x = torch.from_numpy(x.reshape(x.shape[0], -1))
        y = torch.from_numpy(y)

        # predict on the train data and calculate the gradients
        out = neuralnet.forward(x)  #output after the softmax

        train_loss = loss_function(out, y.argmax(dim=1))

        sgd_back.zero_grad()
        train_loss.backward()
        sgd_back.step()

        # save the losses for every eval_freqth loop
        if i % FLAGS.eval_freq == 0 or i == (FLAGS.max_steps - 1):
            train_out = neuralnet.forward(x)  # output after the softmax
            test_out = neuralnet.forward(test_x)

            train_loss = loss_function(train_out, y.argmax(dim=1))
            test_loss = loss_function(test_out, test_y.argmax(dim=1))
            train_acc = accuracy(out, y)
            test_acc = accuracy(test_out, test_y)

            train_losses.append(train_loss)
            train_accs.append(train_acc)
            test_losses.append(test_loss)
            test_accs.append(test_acc)
            graph_x.append(i)

            print("iteration:", i)
            print("Test accuracy:", test_accs[-1])
            print("Test loss:", test_losses[-1])

    plt.figure()
    plt.subplot(1, 2, 1)
    plt.plot(graph_x, train_losses, label="train loss")
    plt.plot(graph_x, test_losses, label="test loss")
    plt.title('Losses')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(graph_x, train_accs, label="train acc")
    plt.plot(graph_x, test_accs, label="test acc")
    plt.title('Accuracies')
    plt.legend()

    print("Final test accuracy:", test_accs[-1])
    print("Final test loss:", test_losses[-1])

    plt.show()
def train():
    """
    Performs training and evaluation of MLP model. 

    TODO:
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
    else:
        dnn_hidden_units = []

    #######################
    # PUT YOUR CODE HERE  #
    #######################
    model = MLP(32 ** 2 * 3, dnn_hidden_units, 10)
    print(model)

    cv_size = 10000
    cifar10 = cifar10_utils.get_cifar10('cifar10/cifar-10-batches-py',
                                        validation_size=cv_size)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=FLAGS.learning_rate,
                          momentum=0.9, weight_decay=0.003)

    log = defaultdict(list)

    for step in range(FLAGS.max_steps):
        optimizer.zero_grad()

        x, y = cifar10['train'].next_batch(FLAGS.batch_size)
        x = torch.from_numpy(x.reshape(FLAGS.batch_size, -1))
        y = torch.from_numpy(y)

        h = model.forward(x)

        loss = criterion(h, y.argmax(1))
        loss.backward()
        
        optimizer.step()

        if step % FLAGS.eval_freq == 0:
            log['train_loss'].append(loss.item())
            log['train_acc'].append(accuracy(h, y))

            model.eval()

            x, y = cifar10['validation'].next_batch(cv_size)
            x = torch.from_numpy(x.reshape(-1, 32 ** 2 * 3))
            y = torch.from_numpy(y)

            h = model.forward(x)

            loss = criterion(h, y.argmax(1))

            log['cv_loss'].append(loss.item())
            log['cv_acc'].append(accuracy(h, y))

            model.train()

            print(
                f"Step {step} | "
                f"Training loss: {log['train_loss'][-1]:.5f}, "
                f"accuracy: {100 * log['train_acc'][-1]:.1f}% | "
                f"CV loss: {log['cv_loss'][-1]:.5f}, "
                f"accuracy: {100 * log['cv_acc'][-1]:.1f}%")

    model.eval()
    x, y = cifar10['test'].next_batch(cifar10['test'].num_examples)
    x = torch.from_numpy(x.reshape(-1, 32 ** 2 * 3))
    y = torch.from_numpy(y)

    h = model.forward(x)

    loss = criterion(h, y.argmax(1))

    print(f"Test loss: {loss.item()}, accuracy: {100 * accuracy(h, y):.1f}%")

    # Plot loss and accuracy.
    plt.subplot(121)
    plt.title("Loss")
    plt.plot(log['train_loss'], label="Training")
    plt.plot(log['cv_loss'], label="Cross Validation")
    plt.xlabel("Step")
    plt.legend()

    plt.subplot(122)
    plt.title("Accuracy")
    plt.plot(log['train_acc'], label="Training")
    plt.plot(log['cv_acc'], label="Cross Validation")
    plt.xlabel("Step")
    plt.legend()

    plt.legend()
    plt.show()
Пример #15
0
def train():
    """
    Performs training and evaluation of MLP model.

    TODO:
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################
    # initialize required arrays for saving the results
    print(torch.cuda.is_available())
    # device = torch.device("cpu") # my gpu is not cuda conform
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    train_accuracies = []
    train_losses = []
    test_accuracies = []
    test_losses = []
    steps = []

    # load data from directory specified in the input
    cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir)

    # load test images and labels
    test_images = cifar10['test'].images
    test_targets = cifar10['test'].labels

    # data dimensions
    # test_images.shape -> (10000, 3, 32, 32): n_images, channels, height, width
    # test_targets.shape <- (10000, 10): n_images, n_classes
    n_test = test_images.shape[0]
    # n_inputs is one vector for all channels of width and height
    # n_input = n_channel * width * height
    n_inputs = test_images.shape[1] * test_images.shape[2] * test_images.shape[3]
    # reshape to (n_samples, n_inputs)
    test_images = test_images.reshape((n_test, n_inputs))
    n_classes = 10

    # use torch tensors instead of np arrays, no grad needed as model is not trained on test images
    test_images = torch.tensor(test_images, requires_grad=False).to(device)
    test_targets = torch.tensor(test_targets, requires_grad=False).to(device)

    # initialize MLP model
    MLP_model = MLP(n_inputs=n_inputs, n_hidden=dnn_hidden_units, n_classes=n_classes, neg_slope=FLAGS.neg_slope)
    print(MLP_model)
    # loss function os loaded
    loss_module = nn.CrossEntropyLoss()

    learning_rate = FLAGS.learning_rate

    if OPTIMIZER == "SGD":
        optimizer = torch.optim.SGD(MLP_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    else:
        optimizer = torch.optim.Adam(MLP_model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    batch_size = FLAGS.batch_size
    # extract max accuracy while training on test set
    max_acc = 0
    max_iter = 0

    # optimizer = torch.optimAdam(MLP_model.parameters(), lr=lr)
    for iteration in range(FLAGS.max_steps):

        train_images, train_targets = cifar10['train'].next_batch(batch_size)
        # input to MLP.forward is (batch_size, n_inputs)
        train_images = train_images.reshape((batch_size, n_inputs))

        # switch from numpy version to tensor and to device
        train_images = torch.tensor(train_images).type(torch.FloatTensor).to(device)
        train_targets = torch.tensor(train_targets).type(torch.LongTensor).to(device)

        if iteration % LR_FREQ == 0:
            learning_rate = learning_rate * 0.8
            optimizer = torch.optim.SGD(MLP_model.parameters(), lr=learning_rate,
                                        weight_decay=weight_decay)

        # gradients zero initialized
        optimizer.zero_grad()

        # predictions by forward pass
        train_predictions = MLP_model.forward(train_images)

        # loss acc to loss module, predictions and targets
        loss = loss_module(train_predictions, train_targets.argmax(dim=1))

        # Apply backward pass: MLP backward takes gradients of losses = dout
        # dout = backward of loss module
        loss.backward()
        # backward pass from loss (dout)
        optimizer.step()

        train_accuracies.append(accuracy(train_predictions, train_targets))
        train_losses.append(loss)
        steps.append(iteration)

        ## Save training statistics
        # save loss, acc, iteration for train evaluation afterwards
        if iteration % 100 == 0:
            print("iteration:" + str(iteration) + "train_acc:" + str(np.mean(train_accuracies)))

        # Consider FLAGS.EVAL_FREQ_DEFAULT for the evaluation of the current MLP
        # on the test data and training data
        if iteration % FLAGS.eval_freq == 0:
            ## Test Statistics
            test_predictions = MLP_model.forward(test_images)
            test_loss = loss_module.forward(test_predictions, test_targets.argmax(dim=1))
            test_acc = accuracy(test_predictions, test_targets)
            test_accuracies.append(test_acc)
            print("iteration:" + str(iteration) + "test_acc:" + str(test_accuracies[-1]))
            test_losses.append(test_loss)
            if (max_acc < test_acc):
                max_acc = test_acc
                max_iter = iteration

    print('Training is done')
    print('Save results in folder: .')
    # save loss and accuracies to plot from for report
    # folder for numpy results

    print('Training is done')
    print('Plot Results')

    plot_results(train_accuracies, test_accuracies, train_losses, test_losses)
    print("max accuracy: " + str(max_acc) + " at iteration: " + str(max_iter))
def train():
    """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    # Get the datasets
    data = cifar10_utils.get_cifar10()
    n_classes = data['train'].labels.shape[1]
    mlp = MLP(
        data['train'].images.shape[1] * data['train'].images.shape[2] *
        data['train'].images.shape[3], dnn_hidden_units, n_classes)
    loss_module = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(mlp.parameters(), lr=FLAGS.learning_rate)

    train_losses = []
    test_accuracies = []

    # Iterate over the batches
    for iteration in range(0, FLAGS.max_steps):

        optimizer.zero_grad()

        if (iteration % FLAGS.eval_freq == 0):
            print("Iteration {}...".format(iteration))
            reshaped_test = np.reshape(
                data['test'].images,
                (data['test'].images.shape[0], data['test'].images.shape[1] *
                 data['test'].images.shape[2] * data['test'].images.shape[3]))
            test_probabilities = mlp.forward(
                torch.from_numpy(reshaped_test).cuda())
            acc = accuracy(test_probabilities,
                           torch.from_numpy(data['test'].labels).cuda())
            print("Test accuracy:", acc)
            test_accuracies.append(acc)

        batch, batch_labels = data['train'].next_batch(FLAGS.batch_size)
        reshaped_batch = np.reshape(
            batch,
            (batch.shape[0], batch.shape[1] * batch.shape[2] * batch.shape[3]))
        train_probabilities = mlp.forward(
            torch.from_numpy(reshaped_batch).cuda())
        loss = loss_module(
            train_probabilities,
            torch.argmax(torch.from_numpy(batch_labels), dim=1).long().cuda())
        # if (iteration%FLAGS.eval_freq == 0):
        #   reshaped_train = np.reshape(data['train'].images, (data['train'].images.shape[0], data['train'].images.shape[1]*data['train'].images.shape[2]*data['train'].images.shape[3]))
        #   train_probabilities = mlp.forward(torch.from_numpy(reshaped_train).cuda())
        #   acc = accuracy(train_probabilities, torch.from_numpy(data['train'].labels).cuda())
        #   print("Train accuracy:", acc)
        #   train_losses.append(loss.item())
        loss.backward()
        optimizer.step()

    # Plot results
    x = range(0, len(test_accuracies) * FLAGS.eval_freq, FLAGS.eval_freq)
    fig, ax = plt.subplots()
    ax.plot(x, train_losses)
    ax.set(xlabel='batches',
           ylabel='loss',
           title='Loss training set after batches trained')
    ax.grid()

    fig.savefig("figures/loss_{0}_{1}_{2}_{3}.png".format(
        FLAGS.dnn_hidden_units, FLAGS.learning_rate, FLAGS.max_steps,
        FLAGS.batch_size))
    plt.show()
Пример #17
0
def train():
  """
  Performs training and evaluation of MLP model.

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

  ### DO NOT CHANGE SEEDS!
  # Set the random seeds for reproducibility
  np.random.seed(42)
  torch.manual_seed(42)
  # torch.backends.cudnn.deterministic = True
  # torch.backends.cudnn.benchmark = False

  ## Prepare all functions
  # Get number of units in each hidden layer specified in the string such as 100,100
  if FLAGS.dnn_hidden_units:
    dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
    dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
  else:
    dnn_hidden_units = []

  # Get negative slope parameter for LeakyReLU
  neg_slope = FLAGS.neg_slope
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  # print("[DEBUG], Device ", device)

  ########################
  # PUT YOUR CODE HERE  #
  #######################
  cifar10 = cifar10_utils.get_cifar10(data_dir=FLAGS.data_dir)
  train_data = cifar10['train']

  # 60000 x 3 x 32 x32 -> 60000 x 3072, input vector 3072
  n_inputs = train_data.images.reshape(train_data.images.shape[0], -1).shape[1]
  n_hidden = dnn_hidden_units
  n_classes = train_data.labels.shape[1]

  # print(f"[DEBUG] n_inputs {n_inputs}, n_classes {n_classes}")

  model = MLP(n_inputs, n_hidden, n_classes, FLAGS.neg_slope)
  model.to(device)

  params = model.parameters()

  if FLAGS.optimizer == 'Adam':
    optimizer = torch.optim.Adam(params, lr=FLAGS.learning_rate)
  elif FLAGS.optimizer == 'Adamax':
    optimizer = torch.optim.Adamax(params, lr=FLAGS.learning_rate)
  elif FLAGS.optimizer == 'Adagrad':
    optimizer = torch.optim.Adagrad(params, lr=FLAGS.learning_rate)
  elif FLAGS.optimizer == 'Adadelta':
    optimizer = torch.optim.Adadelta(params, lr=FLAGS.learning_rate)
  elif FLAGS.optimizer == 'SparseAdam':
    optimizer = torch.optim.SparseAdam(params, lr=FLAGS.learning_rate)
  else:
    optimizer = torch.optim.SGD(params,lr=FLAGS.learning_rate)


  criterion = torch.nn.CrossEntropyLoss()
  train_acc_plot = []
  test_acc_plot = []
  loss_train = []
  loss_test = []
  rloss = 0
  best_accuracy = 0
  # print('[DEBUG] start training')

  for i in range(0, FLAGS.max_steps):
    x, y = cifar10['train'].next_batch(FLAGS.batch_size)
    x, y = torch.from_numpy(x).float().to(device) , torch.from_numpy(y).float().to(device)
    x = x.reshape(x.shape[0], -1)

    out = model.forward(x)
    loss = criterion.forward(out, y.argmax(1))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    rloss += loss.item()

    if i % FLAGS.eval_freq == 0:
      train_accuracy =  accuracy(out, y)
      with torch.no_grad():
        test_accuracys, test_losses = [] ,[]
        for j in range(0, FLAGS.max_steps):
          test_x, test_y = cifar10['test'].next_batch(FLAGS.batch_size)
          test_x, test_y = torch.from_numpy(test_x).float().to(device) , torch.from_numpy(test_y).float().to(device)

          test_x = test_x.reshape(test_x.shape[0], -1)

          test_out  = model.forward(test_x)
          test_loss = criterion(test_out, test_y.argmax(1))
          test_accuracy = accuracy(test_out, test_y)
          if device == 'cpu':
            test_losses.append(test_loss)
          else:
            test_losses.append(test_loss.cpu().data.numpy())

          test_accuracys.append(test_accuracy)
        t_acc = np.array(test_accuracys).mean()
        t_loss = np.array(test_losses).mean()
        train_acc_plot.append(train_accuracy)
        test_acc_plot.append(t_acc)
        loss_train.append(rloss/(i + 1))
        loss_test.append(t_loss)
        print(f"iter {i}, train_loss_avg {rloss/(i + 1)}, test_loss_avg {t_loss}, train_acc {train_accuracy}, test_acc_avg {t_acc}")
        if t_acc > best_accuracy:
          best_accuracy = t_acc

  print(f"Best Accuracy {best_accuracy}",flush=True)
  if FLAGS.plot:
    print('Start plotting...')
    fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
    ax1.plot(np.arange(len(train_acc_plot)), train_acc_plot, label='training')
    ax1.plot(np.arange(len(test_acc_plot)), test_acc_plot, label='testing')
    ax1.set_title('Training evaluation batch size '+str(FLAGS.batch_size)+' learning rate '+str(FLAGS.learning_rate)+ '\n best accuracy '+str(best_accuracy) )
    ax1.set_ylabel('Accuracy')
    ax1.legend()
    ax2.plot(np.arange(len(loss_train)), loss_train, label='Train Loss')
    ax2.plot(np.arange(len(loss_test)), loss_test, label='Test Loss')
    ax2.set_title('Loss evaluation')
    ax2.set_ylabel('Loss')
    ax2.legend()
    plt.xlabel('Iteration')
    plt.savefig('pytorch.png')
Пример #18
0
def train():
  """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

  ### DO NOT CHANGE SEEDS!
  # Set the random seeds for reproducibility
  np.random.seed(42)
  torch.manual_seed(42)

  ## Prepare all functions
  # Get number of units in each hidden layer specified in the string such as 100,100
  if FLAGS.dnn_hidden_units:
    dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
    dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
  else:
    dnn_hidden_units = []

  ########################
  # PUT YOUR CODE HERE  #
  #######################

  # get train data
  cifar10 = cifar10_utils.get_cifar10('cifar10/cifar-10-batches-py', validation_size= 2000)
  x, y = cifar10['train'].next_batch(FLAGS.batch_size)
  x = x.reshape(np.size(x, 0), -1)

  # get input shape for network initialization
  n_input = np.size(x, 1)

  # get validation data
  x_val, y_val = cifar10['validation'].next_batch(FLAGS.batch_size)
  x_val = x_val.reshape(np.size(x_val, 0), -1)

  # create model
  net = MLP(n_input, dnn_hidden_units, 10)

  # get loss function and optimizer
  crossEntropy = nn.CrossEntropyLoss()
  optimizer = torch.optim.SGD(net.parameters(), lr=FLAGS.learning_rate, momentum=0.9, weight_decay=0.0005)

  # scheduler init and hyperparams
  step_size = (FLAGS.max_steps / 6)
  gamma = 0.3
  scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

  # keep track of loss and accuracy
  loss_list = []
  loss_val_list = []
  accuracy_train_list = []
  accuracy_val_list = []

  # loop for set amount of steps
  for i in range(FLAGS.max_steps):

    # apply scheduler
    scheduler.step()

    # convert to torch compatible input
    x = Variable(torch.from_numpy(x), requires_grad=True)

    # get output and convert to numpy
    out = net(x)
    out_numpy = out.data[:].numpy()

    # convert one hot vector to class indices
    label_index = np.argmax(y, axis=1)
    label_index = torch.LongTensor(label_index)

    # apply cross entropy
    loss = crossEntropy(out, label_index)

    # show progress
    if i % FLAGS.eval_freq == 0:
      # calculate accuracy
      accuracy_train = accuracy(out_numpy, y)

      # run torch without keeping track of gradients for validation
      with torch.no_grad():
        # load validation data
        x_val, y_val = cifar10['validation'].next_batch(FLAGS.batch_size)
        x_val = x_val.reshape(np.size(x_val, 0), -1)

        # convert to torch compatible variable
        x_val = Variable(torch.from_numpy(x_val), requires_grad=False)

        # run on validation set
        val_out = net.forward(x_val)
        val_out_numpy = val_out.data[:].numpy()

        # convert one hot vector to class indices
        y_val_index = np.argmax(y_val, axis=1)
        y_val_index = torch.LongTensor(y_val_index)

        # calculate accuracy
        accuracy_val = accuracy(val_out_numpy, y_val)

        # apply cross entropy
        loss_val = crossEntropy.forward(val_out, y_val_index)

      # save variables
      accuracy_train_list.append(accuracy_train)
      accuracy_val_list.append(accuracy_val)
      loss_list.append(loss.data.numpy())
      loss_val_list.append(loss_val.data.numpy())

      # show progress
      print("##############################################################")
      print("Epoch ", i)
      print("---------------------------------------------------------------")
      print("The ACCURACY on the TRAIN set is currently: ", accuracy_train)
      print("---------------------------------------------------------------")
      print("The ACCURACY on the VALIDATION set is currently:", accuracy_val)
      print("---------------------------------------------------------------")
      print("The LOSS on the TRAIN set is currently:", loss.data.numpy())
      print("---------------------------------------------------------------")
      print("The LOSS on the VALIDATION set is currently:", loss_val.data.numpy())
      print("---------------------------------------------------------------")
      print("###############################################################")
      print("\n")

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # insert data
    x, y = cifar10['train'].next_batch(FLAGS.batch_size)
    x = x.reshape(np.size(x, 0), -1)


  # test without tracking the gradients
  with torch.no_grad():
    # load test data
    x, y = cifar10['test'].images, cifar10['test'].labels
    x = x.reshape(np.size(x, 0), -1)

    # convert to torch compatible input
    x = Variable(torch.from_numpy(x), requires_grad = False)

    # get output and calculate accuracy
    out = net(x)
    out_numpy = out.data[:].numpy()
    print("The accuracy on the test set is:")
    print(accuracy(out_numpy,y))

  # plot
  fig, ax1 = plt.subplots()

  ax1.plot(loss_list, label="Train loss", color='firebrick')
  ax1.plot(loss_val_list, label="Validation loss", color='darksalmon')
  ax1.set_title("Accuracy and Loss curves")
  ax1.set_ylabel("Loss")
  ax1.set_xlabel("Evaluation")
  ax1.tick_params(axis='y', labelcolor='red')
  ax1.legend(loc='upper left')

  ax2 = ax1.twinx()

  ax2.plot(accuracy_train_list, label='Train accuracy', color='royalblue')
  ax2.plot(accuracy_val_list, label='Validation accuracy', color='lightskyblue')
  ax2.tick_params(axis='y', labelcolor='blue')
  ax2.set_ylabel("Accuracy")

  fig.tight_layout()
  ax2.legend(loc='upper right')
  plt.show()
def train():
    """
  Performs training and evaluation of MLP model.

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################

    # will be used to compute accuracy and loss for the train and test sets by batches
    batch_size_acc = 500
    data_accuracy_loss = cifar10_utils.get_cifar10(data_dir=FLAGS.data_dir)
    X_train_acc, y_train_acc = data_accuracy_loss[
        'train'].images, data_accuracy_loss['train'].labels
    X_test_acc, y_test_acc = data_accuracy_loss[
        'test'].images, data_accuracy_loss['test'].labels
    X_train_acc = np.reshape(X_train_acc, (X_train_acc.shape[0], -1))
    X_test_acc = np.reshape(X_test_acc, (X_test_acc.shape[0], -1))
    steps_train = int(X_train_acc.shape[0] / batch_size_acc)
    steps_test = int(X_test_acc.shape[0] / batch_size_acc)

    #loading data for training
    data = cifar10_utils.get_cifar10(data_dir=FLAGS.data_dir)
    n_classes = data['train'].labels.shape[1]
    n_inputs = data['train'].images.shape[1] * data['train'].images.shape[
        2] * data['train'].images.shape[3]
    batch_size = FLAGS.batch_size
    m_steps = FLAGS.max_steps
    alpha = FLAGS.learning_rate

    mlp = MLP(n_inputs, dnn_hidden_units, n_classes)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(mlp.parameters(), lr=alpha)

    X_test, y_test = data['test'].images, data['test'].labels
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    X_test = torch.from_numpy(X_test)
    y_test = torch.LongTensor(y_test)

    x_ax = []
    acc_train = []
    acc_test = []
    loss_train = []
    loss_test = []

    for step in range(m_steps):

        x, y = data['train'].next_batch(batch_size)
        n = x.shape
        x = x.reshape([n[0], n[1] * n[2] * n[3]])
        x = torch.from_numpy(x)

        y_pred = mlp(x)
        labels = torch.LongTensor(y)

        loss = criterion(y_pred, torch.max(labels, 1)[1])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % FLAGS.eval_freq == 0:
            print('Iteration ', step)

            x_ax.append(step)

            acc_ = []
            loss_ = []
            for i in range(steps_train):
                x_acc = X_train_acc[i * batch_size_acc:(i + 1) *
                                    batch_size_acc]
                y_acc = y_train_acc[i * batch_size_acc:(i + 1) *
                                    batch_size_acc]
                x_acc = torch.from_numpy(x_acc)
                y_acc = torch.LongTensor(y_acc)

                y_pred = mlp.forward(x_acc)
                acc_.append(accuracy(y_pred, y_acc))
                loss_.append(float(criterion(y_pred, torch.max(y_acc, 1)[1])))

            acc_train.append(np.mean(acc_))
            loss_train.append(np.mean(loss_))

            predictions = mlp.forward(X_test)
            acc_test.append(accuracy(predictions, y_test))

            loss_te = criterion(predictions, torch.max(y_test, 1)[1])
            loss_test.append(float(loss_te))

            print('Max train accuracy ', max(acc_train))
            print('Max test accuracy ', max(acc_test))
            print('Min train loss ', min(loss_train))
            print('Min test loss ', min(loss_test))

    x_ax = np.array(x_ax)
    acc_test = np.array(acc_test)
    acc_train = np.array(acc_train)
    loss_test = np.array(loss_test)
    loss_train = np.array(loss_train)

    print('Max train accuracy ', max(acc_train))
    print('Max test accuracy ', max(acc_test))
    print('Min train loss ', min(loss_train))
    print('Min test loss ', min(loss_test))

    fig = plt.figure()
    ax = plt.axes()

    plt.title("MLP Pytorch. Accuracy curves")
    ax.plot(x_ax, acc_train, label='train')
    ax.plot(x_ax, acc_test, label='test')
    ax.set_xlabel('Step')
    ax.set_ylabel('Accuracy')
    plt.legend()
    plt.savefig('accuracy_mlp.jpg')

    fig = plt.figure()
    ax = plt.axes()
    plt.title("MLP Pytorch. Loss curves")
    ax.plot(x_ax, loss_train, label='train')
    ax.plot(x_ax, loss_test, label='test')
    ax.set_xlabel('Step')
    ax.set_ylabel('Loss')
    ax.set_ylim(top=10, bottom=1)
    plt.legend()
    plt.savefig('loss_mlp.jpg')
Пример #20
0
def train():
  """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

  ### DO NOT CHANGE SEEDS!
  # Set the random seeds for reproducibility
  np.random.seed(42)

  ## Prepare all functions
  # Get number of units in each hidden layer specified in the string such as 100,100
  if FLAGS.dnn_hidden_units:
    dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
    dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
  else:
    dnn_hidden_units = []

  ########################
  # PUT YOUR CODE HERE  #
  #######################
  if torch.cuda.is_available():
    device = torch.device("cuda")
  else:
    device = torch.device("cpu")

  dataset_dict = cifar10_utils.get_cifar10(DATA_DIR_DEFAULT)
  train_loader = dataset_dict['train']
  test_loader = dataset_dict['test']
  test_images = Variable(torch.tensor(test_loader.images.reshape(test_loader.images.shape[0], -1)))
  test_labels = torch.tensor(test_loader.labels)
  model = MLP(n_inputs=32 * 32 * 3, n_hidden=dnn_hidden_units, n_classes=10).to(device)
  #optimizer = torch.optim.Adam(model.parameters(), lr=FLAGS.learning_rate)
  optimizer = torch.optim.SGD(model.parameters(), lr=FLAGS.learning_rate)
  criterion = nn.CrossEntropyLoss()

  test_accs = []
  train_accs = []
  losses = []

  for epoch in range(FLAGS.max_steps):
    batch_x, batch_y = train_loader.next_batch(FLAGS.batch_size)
    batch_x, batch_y = Variable(torch.tensor(batch_x).to(device)), Variable(torch.tensor(batch_y).to(device))
    optimizer.zero_grad()
    out = model.forward(batch_x.reshape(FLAGS.batch_size, -1))
    loss = criterion(out, batch_y.max(1)[1])
    losses.append(round(float(loss), 3))
    loss.backward()
    optimizer.step()

    if epoch % FLAGS.eval_freq == 0:
      # print accuracy on test and train set
      train_acc = accuracy(out, batch_y)
      out = model.forward(test_images.to(device))
      test_acc = accuracy(out, test_labels.to(device))
      print(
        'Train Epoch: {}/{}\tLoss: {:.6f}\tTrain accuracy: {:.6f}\tTest accuracy: {:.6f}'.format(
          epoch, FLAGS.max_steps, loss, train_acc, test_acc))
      test_accs.append(float(test_acc))
      train_accs.append(float(train_acc))

  out = model.forward(test_images.to(device))
  test_acc = accuracy(out, test_labels.to(device))
  print('FINAL Test accuracy: {:.6f}'.format(test_acc))

  import matplotlib.pyplot as plt
  plt.figure()
  plt.plot([i for i in range(0, MAX_STEPS_DEFAULT, EVAL_FREQ_DEFAULT)], train_accs)
  plt.plot([i for i in range(0, MAX_STEPS_DEFAULT, EVAL_FREQ_DEFAULT)], test_accs)
  plt.legend(["train", "test"])
  plt.ylabel("accuracy")
  plt.xlabel("epoch")
  plt.savefig("accuracy")
  plt.figure()
  plt.plot([i for i in range(0, MAX_STEPS_DEFAULT, 1)], losses)
  plt.legend(["loss"])
  plt.ylabel("loss")
  plt.xlabel("epoch")
  plt.savefig("loss")
Пример #21
0
def train(running_loss=0.0):
    """
    Performs training and evaluation of MLP model.

    TODO:
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
    else:
        dnn_hidden_units = []

    # Set path to data
    data_dir = FLAGS.data_dir

    data = cifar10_utils.get_cifar10(data_dir)

    # # ===============================  Approach 1  ===========================================

    # # Prepare the test set
    # input_dims_test = data['test'].images.shape
    # height = input_dims_test[1]
    # width = input_dims_test[2]
    # channels = input_dims_test[3]
    # num_images_test = input_dims_test[0]
    # image_dims_ravel = height * width * channels
    #
    # X_test = data["test"].images
    # Y_test = data["test"].labels
    # # Make acceptable input for test
    # X_test = X_test.reshape((num_images_test, image_dims_ravel))
    #
    # # make usable by pytorch
    # X_test = torch.tensor(X_test, requires_grad=False).type(dtype).to(device)
    # Y_test = torch.tensor(Y_test, requires_grad=False).type(dtype).to(device)

    #
    # # Create model (i.e. Net)
    # model = MLP(n_inputs=image_dims_ravel, n_hidden=dnn_hidden_units, n_classes=10)
    #
    # accuracy_train_log = list()
    # accuracy_test_log = list()
    # loss_train_log = list()
    # loss_test_log = list()
    #
    # # FLAGS hold command line arguments
    # batch_size = FLAGS.batch_size
    # numb_iterations = FLAGS.max_steps
    # learning_rate = FLAGS.learning_rate
    # evaluation_freq = FLAGS.eval_freq
    # logging.info(f"learning rate: %2d " % learning_rate)
    #
    # # Before backprop calc loss and its derivative
    # criterion = nn.CrossEntropyLoss()
    # new = model.model_params_tensors[0] + model.model_params_tensors[1]
    # # optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
    # optimizer = optim.SGD(model.param_list, lr=learning_rate, momentum=0.9)
    #
    # for step in range(numb_iterations):
    #
    #     X_batch, Y_batch = data['train'].next_batch(batch_size)
    #
    #     X_batch = X_batch.reshape((batch_size, image_dims_ravel))
    #
    #     # Convert to tensors which are handled by the device
    #     X_batch = torch.from_numpy(X_batch).type(dtype).to(device)
    #     Y_batch = torch.from_numpy(Y_batch).type(dtype).to(device)
    #
    #     # why do we need this again?
    #     optimizer.zero_grad()
    #
    #     targs = Y_batch.argmax(dim=1)
    #     # forward + backward + optimize
    #     outputs = model(X_batch)
    #     loss_current = criterion(outputs, targs)
    #     loss_current.backward()
    #     optimizer.step()
    #
    #     running_loss = loss_current.item()
    #
    #     if step % evaluation_freq == 0:
    #         loss_train_log.append(running_loss)
    #         accuracy_train_log.append(accuracy(outputs, Y_batch))
    #         logging.info(f"train performance: loss = %4f, accuracy = %4f ", loss_train_log[-1], accuracy_train_log[-1])
    #
    #         # Get performance on the test set
    #         # targs_test = Y_test.argmax(dim=1)
    #         # outputs = model(X_test)
    #         # loss_test_log.append(criterion(outputs, targs_test))
    #         # accuracy_test_log.append(accuracy(outputs, Y_test))
    #         # logging.info(f"test performance: loss = %4f , accuracy = %4f", loss_test_log[-1], accuracy_test_log[-1])
    #
    #         # TODO: implement early stopping ?
    #
    # path = "./mlp_results_pytorch/"
    # date_time = datetime.now().replace(second=0, microsecond=0).strftime(format="%Y-%m-%d-%H-%M")
    # np.save(os.path.join(path, date_time + "accuracy_test"), accuracy_test_log)
    # np.save(os.path.join(path, date_time + "loss_test"), loss_test_log)
    # np.save(os.path.join(path, date_time + "loss_train"), loss_train_log)
    # np.save(os.path.join(path, date_time + "accuracy_train"), accuracy_train_log)

    # ===============================  Approach 1.2, sequantial  ===========================================

    input_dims_test = data['test'].images.shape
    height = input_dims_test[1]
    width = input_dims_test[2]
    channels = input_dims_test[3]
    num_images_test = input_dims_test[0]
    image_dims_ravel = height * width * channels

    X_test = data["test"].images
    Y_test = data["test"].labels
    # Make acceptable input for test
    X_test = X_test.reshape((num_images_test, image_dims_ravel))

    X_test = torch.tensor(X_test, requires_grad=False).type(dtype).to(device)
    Y_test = torch.tensor(Y_test, requires_grad=False).type(dtype).to(device)

    model = MLP(n_inputs=image_dims_ravel, n_hidden=dnn_hidden_units, n_classes=10)
    # if cuda_flag:
    model.cuda()

    accuracy_train_log = list()
    accuracy_test_log = list()
    loss_train_log = list()
    loss_test_log = list()

    batch_size = FLAGS.batch_size
    numb_iterations = FLAGS.max_steps
    learning_rate = FLAGS.learning_rate
    evaluation_freq = FLAGS.eval_freq
    logging.info(f"learning rate: %2d " % learning_rate)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    for step in range(numb_iterations):

        X_batch, Y_batch = data['train'].next_batch(batch_size)

        X_batch = X_batch.reshape((batch_size, image_dims_ravel))

        # Convert to tensors which are handled by the device
        X_batch = torch.from_numpy(X_batch).type(dtype).to(device)
        Y_batch = torch.from_numpy(Y_batch).type(dtype).to(device)

        # why do we need this again?
        optimizer.zero_grad()

        targs = Y_batch.argmax(dim=1)
        outputs = model.forward(X_batch)
        loss_current = criterion(outputs, targs)
        loss_current.backward()
        optimizer.step()

        X_train = data['train'].images.reshape((data['train'].images.shape[0],
                                                image_dims_ravel))
        Y_train = data['train'].labels

        X_train = torch.tensor(X_train, requires_grad=False).type(dtype).to(device)
        Y_train = torch.tensor(Y_train, requires_grad=False).type(dtype).to(device)

        targs_train = Y_train.argmax(dim=1)

        running_loss = loss_current.detach().item()

        if step % evaluation_freq == 0:
            list_acc = list()
            list_loss = list()
            for i in range(0, 70):
                selection = random.sample(range(1, 5000), 64)
                targs_train = Y_train[selection].argmax(dim=1)
                outputs_train = model(X_train[selection])
                loss_current_train = criterion(outputs_train, targs_train).detach().item()
                acc_current_train = accuracy(outputs_train, Y_train[selection])
                list_loss.append(loss_current_train)
                list_acc.append(acc_current_train)
            loss_train_log.append(np.mean(list_loss))
            accuracy_train_log.append(np.mean(list_acc))
            logging.info(f"train performance: loss = %4f, accuracy = %4f ", loss_train_log[-1], accuracy_train_log[-1])

            list_acc = list()
            list_loss = list()
            for i in range(0, 15):
                selection = random.sample(range(1, 1000), 64)
                targs_test = Y_test[selection].argmax(dim=1)
                outputs_test = model(X_test[selection])
                loss_current_test = criterion(outputs_test, targs_test).detach().item()
                acc_current_test = accuracy(outputs_test, Y_test[selection])
                list_loss.append(loss_current_test)
                list_acc.append(acc_current_test)
            loss_test_log.append(np.mean(list_loss))
            accuracy_test_log.append(np.mean(list_acc))
            logging.info(f"test performance: loss = %4f , accuracy = %4f\n", loss_test_log[-1], accuracy_test_log[-1])

            ## NO BATCHES
            # # evaluate on the whole train set, not only on the bathes
            # output = model.forward(X_train)
            # # targs = data['train'].labels.argmax(dim=1)
            # loss_model_current = criterion(output, targs_train)
            # loss_train_log.append(loss_model_current.detach().item())
            # accuracy_train_log.append(accuracy(output, Y_train))
            # logging.info(f"train performance: loss = %4f, accuracy = %4f ", loss_train_log[-1], accuracy_train_log[-1])
            #
            # # Get performance on the test set
            # targs_test = Y_test.argmax(dim=1)
            # outputs = model(X_test)
            # loss_test_log.append(criterion(outputs, targs_test))
            # accuracy_test_log.append(accuracy(outputs, Y_test))
            # logging.info(f"test performance: loss = %4f , accuracy = %4f\n", loss_test_log[-1], accuracy_test_log[-1])

    path = "./mlp_results_pytorch/"
    date_time = datetime.now().replace(second=0, microsecond=0).strftime(format="%Y-%m-%d-%H-%M")
    np.save(os.path.join(path, date_time + "accuracy_test"), accuracy_test_log)
    np.save(os.path.join(path, date_time + "loss_test"), loss_test_log)
    np.save(os.path.join(path, date_time + "loss_train"), loss_train_log)
    np.save(os.path.join(path, date_time + "accuracy_train"), accuracy_train_log)
def train():
    """
    Performs training and evaluation of MLP model. 
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print('device', device)

    # flags
    batch_size = FLAGS.batch_size
    optim = FLAGS.optimizer
    lr = FLAGS.learning_rate

    # cifar
    cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir)
    x_test_np, y_test_np = cifar10['test'].images, cifar10['test'].labels
    (test_images, height, width, colors) = x_test_np.shape
    n_inputs = height * width * colors
    (_, n_classes) = y_test_np.shape

    # torch crap
    x_test_flat = x_test_np.reshape((test_images, n_inputs))
    x_test_torch = torch.from_numpy(x_test_flat).to(device)
    y_test_torch = torch.from_numpy(y_test_np).long().to(device)
    idx_test = torch.argmax(y_test_torch, dim=-1).long()

    # model
    ce = torch.nn.CrossEntropyLoss()
    model = MLP(n_inputs, dnn_hidden_units, n_classes)
    model.to(device)
    pars = model.parameters()

    # optimizer
    optim_pars = {'params': pars, 'lr': lr, 'weight_decay': FLAGS.weight_decay}
    if optim == 'adadelta':
        optimizer = torch.optim.Adadelta(**optim_pars)
    elif optim == 'adagrad':
        optimizer = torch.optim.Adagrad(**optim_pars)
    elif optim == 'rmsprop':
        optimizer = torch.optim.RMSprop(**optim_pars)
    elif optim == 'adam':
        optimizer = torch.optim.Adam(**optim_pars)
    else:
        # default is SGD, same as the numpy version
        optimizer = torch.optim.SGD(**optim_pars)

    cols = ['train_acc', 'test_acc', 'train_loss', 'test_loss', 'secs']

    # train
    results = []
    name = f'mlp-pytorch-{optim}'
    with SummaryWriter(name) as w:
        for step in tqdm(range(FLAGS.max_steps)):
            # print(step)
            optimizer.zero_grad()

            # batch
            x_train_np, y_train_np = cifar10['train'].next_batch(batch_size)
            x_train_flat = x_train_np.reshape((batch_size, n_inputs))
            x_train_torch = torch.from_numpy(x_train_flat).to(device)
            y_train_torch = torch.from_numpy(y_train_np).long().to(device)
            idx_train = torch.argmax(y_train_torch, dim=-1).long()

            # results
            train_predictions = model.forward(x_train_torch)
            train_loss = ce(train_predictions, idx_train)
            train_acc = accuracy(train_predictions, idx_train)

            # evaluate
            if step % FLAGS.eval_freq == 0:
                time = int(step / FLAGS.eval_freq)
                start = timer()
                test_predictions = model.forward(x_test_torch)
                end = timer()
                secs = end - start
                test_loss = ce(test_predictions, idx_test)
                test_acc = accuracy(test_predictions, idx_test)
                vals = [train_acc, test_acc, train_loss, test_loss, secs]
                stats = dict(
                    zip(cols, [
                        np.asscalar(i.detach().cpu().numpy().take(0))
                        if isinstance(i, torch.Tensor) else np.asscalar(i)
                        if isinstance(i, (np.ndarray, np.generic)) else i
                        for i in vals
                    ]))
                # print(yaml.dump({k: round(i, 3) if isinstance(i, float) else i for k, i in stats.items()}))
                print(test_acc.item())
                w.add_scalars('metrics', stats, time)
                results.append(stats)

                # stop if loss has converged!
                check = 10
                if len(results) >= 2 * check:
                    threshold = 1e-6
                    losses = [item['train_loss'] for item in results]
                    current = np.mean(losses[-check:])
                    prev = np.mean(losses[-2 * check:-check])
                    if (prev - current) < threshold:
                        break

            train_loss.backward()
            optimizer.step()

        # w.add_scalars('metrics', stats)

    df = pd.DataFrame(results, columns=cols)
    meta = {
        'framework': 'pytorch',
        'algo': 'mlp',
        'optimizer': optim,
        'batch_size': FLAGS.batch_size,
        'learning_rate': FLAGS.learning_rate,
        'dnn_hidden_units': FLAGS.dnn_hidden_units,
        'weight_decay': FLAGS.weight_decay,
        'max_steps': FLAGS.max_steps,
    }
    for k, v in meta.items():
        df[k] = v
    csv_file = os.path.join(
        os.getcwd(), 'results',
        f'{name}-batch={FLAGS.batch_size}-lr={FLAGS.learning_rate}-hidden={FLAGS.dnn_hidden_units}-regularization={FLAGS.weight_decay}-steps={FLAGS.max_steps}.csv'
    )
    df.to_csv(csv_file)
    csv_file = os.path.join(os.getcwd(), 'results', 'results.csv')
    if os.path.isfile(csv_file):
        df.to_csv(csv_file, header=False, mode='a')
    else:
        df.to_csv(csv_file, header=True, mode='w')
    torch_file = os.path.join(os.getcwd(), 'results', f'{name}.pth')
    torch.save(model.state_dict(), torch_file)
    print('done!')
    return test_loss
Пример #23
0
def train():
    """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################
    #dnn_hidden_units = [200,200]

    #batch_size = 200
    cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir)

    x_train, y_train = cifar10['train'].next_batch(FLAGS.batch_size)
    #  print(x_train.shape)

    MLP_net = MLP(n_inputs=1 * 3 * 32 * 32,
                  n_hidden=dnn_hidden_units,
                  n_classes=10)

    params = MLP_net.parameters()
    criterion = torch.nn.CrossEntropyLoss()
    #  criterion = torch.nn.L1Loss()
    #  optimizer = torch.optim.SGD(params,lr=FLAGS.learning_rate)#,momentum=0.005)# weight_decay=0.001)
    optimizer = torch.optim.Adam(
        params, lr=FLAGS.learning_rate)  #,weight_decay=0.0001)
    #  optimizer = torch.optim.SGD(params,lr=0.02)
    #  scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=4000, gamma=0.8)
    print(MLP_net)

    batch_norm = torch.nn.BatchNorm2d(3)  #,affine=False,momentum=0)

    loss_list = []
    for step in range(FLAGS.max_steps):
        # Get batch and reshape input to vector
        x_train, y_train = cifar10['train'].next_batch(FLAGS.batch_size)

        x_train = batch_norm(torch.from_numpy(x_train)).detach().numpy()

        x_train = np.reshape(x_train, (FLAGS.batch_size, -1))

        net_output = MLP_net.forward(torch.from_numpy(x_train))

        batch_accuracy = accuracy(net_output.detach().numpy(), y_train)

        y_train = torch.from_numpy(y_train)
        y_train = y_train.type(torch.LongTensor)
        #      y_train = y_train.type(torch.FloatTensor)

        loss = criterion(net_output, torch.max(y_train, 1)[1])
        loss_list.append(loss)
        #      print("loss : ",loss)

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()

        #      scheduler.step()
        #      print("out and y shapes : "+str(net_output.shape),str(y_train.shape))
        if (step + 1) % FLAGS.eval_freq == 0:
            #          print("in test")
            x_test, y_test = cifar10['test'].images, cifar10['test'].labels
            x_test = batch_norm(torch.from_numpy(x_test)).detach().numpy()
            x_test = np.reshape(x_test, (x_test.shape[0], -1))
            net_test_output = MLP_net.forward(torch.from_numpy(x_test))
            print("test set accuracy for step " + str(step + 1) + " : " +
                  str(accuracy(net_test_output.detach().numpy(), y_test)))
            print("loss : ", sum(loss_list) / len(loss_list))
            loss_list = []
            writer.add_scalar(
                'Test_accuracy',
                accuracy(net_test_output.detach().numpy(), y_test), step)

        writer.add_scalar('Train_accuracy', batch_accuracy, step)
        writer.add_scalar('Train_loss', loss, step)
Пример #24
0
def train():
  """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

  ### DO NOT CHANGE SEEDS!
  # Set the random seeds for reproducibility
  np.random.seed(42)

  ## Prepare all functions
  # Get number of units in each hidden layer specified in the string such as 100,100
  if FLAGS.dnn_hidden_units:
    dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
    dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
  else:
    dnn_hidden_units = []

  ########################
  # PUT YOUR CODE HERE  #
  #######################
  ce_loss = nn.CrossEntropyLoss()
  n_inputs = 3 * 32 * 32
  n_classes = 10
  mlp = MLP(n_inputs, dnn_hidden_units, n_classes)
  optimizer = optim.SGD(
    mlp.parameters(), lr = FLAGS.learning_rate, weight_decay=0.001)

  c10 = cifar10_utils.get_cifar10(FLAGS.data_dir)
  test_data = c10['test'].images
  test_data = test_data.reshape(test_data.shape[0], -1)
  test_data = torch.tensor(test_data)

  acc_values = []
  loss_values = []


  for i in range(FLAGS.max_steps): #range(FLAGS.max_steps) 
    x, y = c10['train'].next_batch(FLAGS.batch_size)
    x = x.reshape(FLAGS.batch_size, -1)
    y = y.argmax(axis=1)
    x = torch.tensor(x)
    y = torch.tensor(y)

    optimizer.zero_grad()
    out = mlp(x)
    loss = ce_loss(out, y)
    loss.backward()
    optimizer.step()  
    loss_values.append(loss.item())

    # evaluate
    if i % FLAGS.eval_freq == 0: 
      predictions = mlp.forward(test_data).detach().numpy()
      targets = c10['test'].labels
      acc = accuracy(predictions, targets)
      print('acc', acc, 'loss', loss.item())
      acc_values.append(acc)

  # save loss and accuracy to file
  with open('accuracy_torch.txt', 'a') as f_acc:
    print (acc_values, file=f_acc)
  with open('loss_torch.txt', 'a') as f_loss:
    print (loss_values, file=f_loss)
Пример #25
0
def train():
    """
    Performs training and evaluation of MLP model.

    TODO:
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """

    # DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    # Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    # -------------------------- UNCKECKED -------------------
    # initialize tensorboard
    run_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S_mlp")
    if batchnorm:
        run_id = run_id + '_batchnorm'
    log_dir = 'tensorboard/' + run_id
    writer = SummaryWriter(log_dir=log_dir)

    # get the dataset
    data_set = cifar10_utils.get_cifar10(FLAGS.data_dir)

    # get dataset information
    n_batches = {
        'train': int(data_set['train']._num_examples / FLAGS.batch_size),
        'validation':
        int(data_set['validation']._num_examples / FLAGS.batch_size),
        'test': int(data_set['test']._num_examples / FLAGS.batch_size)
    }
    image_shape = data_set['train'].images[0].shape
    n_inputs = image_shape[0] * image_shape[1] * image_shape[2]
    n_classes = data_set['train'].labels[0].shape[0]

    # get the necessary components
    classifier = MLP(n_inputs, dnn_hidden_units, n_classes, dropout,
                     batchnorm).to(device)
    loss_function = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(classifier.parameters(),
                                 lr=FLAGS.learning_rate,
                                 weight_decay=weight_decay)

    # list of training accuracies and losses
    train_accuracies = []
    train_losses = []

    # list of test accuracies and losses
    test_accuracies = []
    test_losses = []

    epoch_test_accuracy = 0
    epoch_test_loss = 0

    # training loop
    for step in range(FLAGS.max_steps):

        # get current batch...
        images, labels = data_set['train'].next_batch(FLAGS.batch_size)
        images = images.reshape(FLAGS.batch_size, n_inputs)

        # ...in the gpu
        images = torch.from_numpy(images).type(dtype).to(device=device)
        labels = torch.from_numpy(labels).type(dtype).to(device=device)

        # forward pass
        classifier.train()
        predictions = classifier.forward(images)

        # compute loss
        class_labels = labels.argmax(dim=1)
        loss = loss_function(predictions, class_labels)

        # reset gradients before backwards pass
        optimizer.zero_grad()

        # backward pass
        loss.backward()

        # update weights
        optimizer.step()

        # get accuracy and loss for the batch
        train_accuracy = accuracy(predictions, labels)
        train_accuracies.append(train_accuracy)

        writer.add_scalar("Training accuracy vs steps", train_accuracy, step)

        train_losses.append(loss.item())
        writer.add_scalar("Training loss vs steps", loss.item(), step)

        if ((step + 1) % 100) == 0 or step == 0:
            print("\nStep", step + 1)
            print("\tTRAIN:", round(train_accuracy * 100, 1), "%")

        # run evaluation every eval_freq epochs
        if (step + 1) % FLAGS.eval_freq == 0 or (step + 1) == FLAGS.max_steps:

            # list of test batch accuracies and losses for this step
            step_test_accuracies = []
            step_test_losses = []

            # get accuracy on the test set
            classifier.eval()
            for batch in range(n_batches['test']):
                # get current batch...
                images, labels = data_set['test'].next_batch(FLAGS.batch_size)
                images = images.reshape(FLAGS.batch_size, n_inputs)

                # ...in the gpu
                images = torch.from_numpy(images).type(dtype).to(device=device)
                labels = torch.from_numpy(labels).type(dtype).to(device=device)

                # forward pass
                predictions = classifier(images)

                # compute loss
                class_labels = labels.argmax(dim=1)
                loss = loss_function(predictions, class_labels)

                # get accuracy and loss for the batch
                step_test_accuracies.append(accuracy(predictions, labels))
                step_test_losses.append(loss.item())

            # store accuracy and loss
            epoch_test_accuracy = np.mean(step_test_accuracies)
            test_accuracies.append(epoch_test_accuracy)

            epoch_test_loss = np.mean(step_test_losses)
            test_losses.append(epoch_test_loss)

            print("\tTEST:", round(epoch_test_accuracy * 100, 1), "%")

        writer.add_scalar("Test accuracy vs epochs", epoch_test_accuracy, step)
        writer.add_scalar("Test loss vs epochs", epoch_test_loss, step)

    print("\nBest TEST:", round(max(test_accuracies) * 100, 1), "%")

    # save results
    results = {
        'train_accuracies': train_accuracies,
        'train_losses': train_losses,
        'test_accuracies': test_accuracies,
        'test_losses': test_losses,
        'eval_freq': FLAGS.eval_freq
    }

    if not os.path.exists("results/"):
        os.makedirs("results/")
    with open("results/" + run_id + "_results.pkl", "wb") as file:
        pkl.dump(results, file)

    writer.close()
Пример #26
0
def train():
    """
    Performs training and evaluation of MLP model.

    TODO:
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #

    # because I don't have a GPU and the training was quick enough on a CPU,
    # I don't save my tensor on a GPU

    LEARNING_RATE_DEFAULT = FLAGS.learning_rate
    MAX_STEPS_DEFAULT = FLAGS.max_steps
    BATCH_SIZE_DEFAULT = FLAGS.batch_size
    EVAL_FREQ_DEFAULT = FLAGS.eval_freq
    OPTIMIZER_DEFAULT = FLAGS.optimizer

    # self-added variables
    REGULARIZER_DEFAULT = FLAGS.regularizer
    MOMENTUM_DEFAULT = FLAGS.momentum

    # get test data to initialize the model with
    cifar10 = cifar10_utils.get_cifar10(DATA_DIR_DEFAULT)

    x_test, y_test = cifar10['test'].images, cifar10['test'].labels

    input_size = np.shape(x_test)[1] * np.shape(x_test)[2] * np.shape(
        x_test)[3]
    class_size = np.shape(y_test)[1]

    x_test = torch.from_numpy(x_test.reshape([np.shape(x_test)[0],
                                              input_size]))
    y_test = torch.from_numpy(y_test)

    net = MLP(n_inputs=input_size,
              n_hidden=dnn_hidden_units,
              n_classes=class_size)

    criterion = torch.nn.CrossEntropyLoss()

    eval_accuracies = []
    train_accuracies = []

    eval_loss = []
    train_loss = []

    # choose between optimizer
    if OPTIMIZER_DEFAULT == 'sgd':
        optimizer = optim.SGD(net.parameters(),
                              lr=LEARNING_RATE_DEFAULT,
                              momentum=MOMENTUM_DEFAULT,
                              weight_decay=REGULARIZER_DEFAULT)
    elif OPTIMIZER_DEFAULT == 'adam':
        optimizer = optim.Adam(net.parameters(),
                               lr=LEARNING_RATE_DEFAULT,
                               weight_decay=REGULARIZER_DEFAULT)

    for step in range(MAX_STEPS_DEFAULT):
        x, y = cifar10['train'].next_batch(BATCH_SIZE_DEFAULT)
        x = x.reshape([np.shape(x)[0], input_size])
        x = torch.from_numpy(x)
        y = torch.from_numpy(y)
        optimizer.zero_grad()

        out = net.forward(x)
        # convert out and y to index of max (class prediction)?

        # required?
        # x = x.argmax(dim=1)

        loss = criterion(out, y.argmax(dim=1))
        loss.backward()
        optimizer.step()
        # print(loss.item())

        if step % EVAL_FREQ_DEFAULT == 0:

            test_out = net.forward(x_test)
            # print(accuracy(test_out, y_test))
            eval_accuracies.append(accuracy(test_out, y_test))
            train_accuracies.append(accuracy(out, y))

            eval_loss.append(
                criterion(test_out, y_test.argmax(dim=1)).data.item())
            train_loss.append(criterion(out, y.argmax(dim=1)).data.item())
    # final accuracy calculation

    test_out = net.forward(x_test)
    print("EVAL ACCURACY")
    print(eval_accuracies)
    print("train ACCURACY")
    print(train_accuracies)
    print("EVAL loss")
    print(eval_loss)
    print("train loss")
    print(train_loss)
Пример #27
0
def train(n_hidden_1, dropout, lr, wdecay, _run):
    """
  Performs training and evaluation of MLP model. 

  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################
    def get_xy_tensors(batch):
        x, y = batch
        x = torch.tensor(x.reshape(-1, 3072), dtype=torch.float32).to(device)
        y = torch.tensor(y, dtype=torch.long).to(device)
        return x, y

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    datasets = cifar10_utils.read_data_sets(DATA_DIR_DEFAULT, one_hot=False)
    train_data = datasets['train']
    test_data = datasets['test']
    model = MLP(n_inputs=3072,
                n_hidden=[n_hidden_1, 400],
                n_classes=10,
                dropout=dropout).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wdecay)

    log_every = 50
    avg_loss = 0
    avg_acc = 0
    for step in range(FLAGS.max_steps):
        x, y = get_xy_tensors(train_data.next_batch(FLAGS.batch_size))

        # Forward and backward passes
        optimizer.zero_grad()
        out = model.forward(x)
        loss = loss_fn(out, y)
        loss.backward()

        # Parameter updates
        optimizer.step()

        avg_loss += loss.item() / log_every
        avg_acc += accuracy(out, y) / log_every
        if step % log_every == 0:
            print('[{}/{}] train loss: {:.6f}  train acc: {:.6f}'.format(
                step, FLAGS.max_steps, avg_loss, avg_acc))
            _run.log_scalar('train-loss', avg_loss, step)
            _run.log_scalar('train-acc', avg_acc, step)
            avg_loss = 0
            avg_acc = 0

        # Evaluate
        if step % FLAGS.eval_freq == 0 or step == (FLAGS.max_steps - 1):
            x, y = get_xy_tensors(test_data.next_batch(test_data.num_examples))
            model.eval()
            out = model.forward(x)
            model.train()
            test_loss = loss_fn(out, y).item()
            test_acc = accuracy(out, y)
            print('[{}/{}]  test accuracy: {:6f}'.format(
                step, FLAGS.max_steps, test_acc))

            _run.log_scalar('test-loss', test_loss, step)
            _run.log_scalar('test-acc', test_acc, step)
def train():
    """
  Performs training and evaluation of MLP model. 

  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []
    print("arch: ", dnn_hidden_units)

    ########################
    # PUT YOUR CODE HERE  #
    #######################

    device = torch.device("cuda")

    dataset = cifar10_utils.get_cifar10()
    training = dataset['train']
    test = dataset['test']

    test_images = Variable(
        torch.tensor(test.images.reshape(test.images.shape[0], -1)))
    test_labels = torch.tensor(test.labels)

    model = MLP(n_inputs=32 * 32 * 3, n_hidden=dnn_hidden_units,
                n_classes=10).to(device)

    opt = torch.optim.SGD(model.parameters(), lr=FLAGS.learning_rate)
    ce = nn.CrossEntropyLoss()

    test_accuracy = []
    train_accuracy = []
    loss_list = []

    for epoch in range(FLAGS.max_steps):

        x, y = training.next_batch(FLAGS.batch_size)
        x = Variable(torch.tensor(x).to(device))
        y = Variable(torch.tensor(y).to(device))

        opt.zero_grad()
        out = model.forward(x.reshape(FLAGS.batch_size, -1))
        loss = ce(out, y.max(1)[1])
        loss_list.append(float(loss))
        loss.backward()
        opt.step()

        if not epoch % FLAGS.eval_freq:
            train_accuracy.append(accuracy(out, y))
            out = model.forward(test_images.to(device))
            test_accuracy.append(accuracy(out, test_labels.to(device)))
            print('Epoch: ', epoch, 'Loss: ', loss, 'Accuracy: ',
                  train_accuracy[-1], 'Test ac.:', test_accuracy[-1])

    out = model.forward(test_images.to(device))
    print('Test accuracy: ', accuracy(out, test_labels.to(device)))

    import seaborn as sns
    import matplotlib.pyplot as plt
    f, axes = plt.subplots(1, 2)
    ax = sns.lineplot(np.arange(0, MAX_STEPS_DEFAULT, EVAL_FREQ_DEFAULT),
                      train_accuracy,
                      ax=axes[0])
    ax = sns.lineplot(np.arange(0, MAX_STEPS_DEFAULT, EVAL_FREQ_DEFAULT),
                      test_accuracy,
                      ax=axes[0])
    ax.set_title('Training and test accuracy')
    ax.legend(['training', 'test'])
    ax = sns.lineplot(np.arange(0, MAX_STEPS_DEFAULT, 1),
                      loss_list,
                      ax=axes[1])
    ax.set_title('Loss')
    figure = ax.get_figure()
    figure.savefig("mlp-pytorch-results")
Пример #29
0
def train():
  """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

  ### DO NOT CHANGE SEEDS!
  # Set the random seeds for reproducibility
  np.random.seed(42)

  ## Prepare all functions
  # Get number of units in each hidden layer specified in the string such as 100,100
  if FLAGS.dnn_hidden_units:
    dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
    dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
  else:
    dnn_hidden_units = []

  # Get negative slope parameter for LeakyReLU
  neg_slope = FLAGS.neg_slope
  
  ########################
  # PUT YOUR CODE HERE  #
  #######################
  cifar10 = cifar10_utils.get_cifar10(DATA_DIR_DEFAULT)
  x, y = cifar10['train'].next_batch(1)
  x_test, y_test = cifar10['test'].next_batch(10000)
  x = x.reshape(x.shape[0], -1)
  x_test = x_test.reshape(x_test.shape[0], -1)

  x_test = torch.tensor(x_test)
  y_test = torch.tensor(y_test)

  model = MLP(x.shape[1], dnn_hidden_units, y.shape[1], neg_slope)

  prediction = model.forward(torch.tensor(x[0]))
  crossEntropy = nn.CrossEntropyLoss()
  target = torch.tensor(y[0])

  optimizer = torch.optim.Adam(model.parameters(), lr=FLAGS.learning_rate, amsgrad=True)

  """
  batch gradient descent
  """
  for i in range(FLAGS.max_steps):
    x, y = cifar10['train'].next_batch(FLAGS.batch_size)
    x = x.reshape(x.shape[0], -1)
    x = torch.tensor(x)
    y = torch.LongTensor(y)

    prediction = model.forward(x)

    loss = crossEntropy.forward(prediction, torch.max(y, 1)[1])
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if i%FLAGS.eval_freq == 0:
      prediction = model.forward(x_test)
      prediction = nn.functional.softmax(prediction)
      print('Accuracy after '+ str(i) +' steps ' + str(accuracy(prediction, y_test)))

  prediction = model.forward(x_test)
  print('Final accuracy')
  print(accuracy(prediction, y_test))
def train():
    """
    Performs training and evaluation of MLP model.

    TODO:
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    dataset = cifar10_utils.get_cifar10(DATA_DIR_DEFAULT)
    a, b, c = dataset['train'].images.shape[1:]
    n_classes = dataset['train'].labels.shape[1]
    n_inputs = a * b * c

    mlp = MLP(n_inputs, dnn_hidden_units, n_classes)

    if (FLAGS.optimizer == 'SGD'):
        optimizer = optim.SGD(mlp.parameters(), lr=FLAGS.learning_rate)
    else:
        optimizer = optim.Adam(mlp.parameters(),
                               lr=FLAGS.learning_rate,
                               weight_decay=1e-2)

    crossentropy = nn.CrossEntropyLoss()

    test_input, test_labels = dataset['test'].images, dataset['test'].labels
    test_labels = np.argmax(test_labels, axis=1)
    test_input = np.reshape(test_input, (test_input.shape[0], n_inputs))
    test_input, test_labels = torch.from_numpy(test_input), torch.from_numpy(
        test_labels).long()

    max_accuracy = 0
    min_loss = 0
    for step in range(FLAGS.max_steps):
        input, labels = dataset['train'].next_batch(FLAGS.batch_size)
        labels = np.argmax(labels, axis=1)
        input = np.reshape(input, (FLAGS.batch_size, n_inputs))
        input, labels = torch.from_numpy(input), torch.from_numpy(
            labels).long()
        predictions = mlp.forward(input)

        loss = crossentropy(predictions, labels)
        # clean up old gradients
        mlp.zero_grad()
        loss.backward()
        optimizer.step()

        if (step % FLAGS.eval_freq == 0):
            test_prediction = mlp.forward(test_input)
            test_loss = crossentropy(test_prediction, test_labels)
            test_accuracy = accuracy(test_prediction, test_labels)
            if (max_accuracy < test_accuracy):
                max_accuracy = test_accuracy
                min_loss = test_loss
            # sys.stdout = open(str(FLAGS.dnn_hidden_units)+'_'+str(FLAGS.learning_rate)+'_'+str(FLAGS.max_steps)+'_'+str(FLAGS.batch_size)+'_'+str(FLAGS.batch_size)+'_'+str(FLAGS.optimizer)+'_mlp.csv', 'a')
            # print("{},{:f},{:f}".format(step, test_loss, test_accuracy))

    # sys.stdout = open(
    #     str(FLAGS.dnn_hidden_units) + '_' + str(FLAGS.learning_rate) + '_' + str(FLAGS.max_steps) + '_' + str(
    #         FLAGS.batch_size) + '_' + str(FLAGS.batch_size) + '_' + str(FLAGS.optimizer) + '_mlp.csv', 'a')
    print("max accuracy{:f}, minimum loss{:f}".format(max_accuracy, min_loss))